node-llama-cpp 3.0.0-beta.8 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (685) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -6
  21. package/dist/bindings/Llama.js +214 -40
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +288 -90
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +323 -191
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +63 -22
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +503 -121
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +22 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/.clang-format +1 -2
  586. package/llama/CMakeLists.txt +134 -5
  587. package/llama/addon/AddonContext.cpp +629 -0
  588. package/llama/addon/AddonContext.h +52 -0
  589. package/llama/addon/AddonGrammar.cpp +39 -0
  590. package/llama/addon/AddonGrammar.h +19 -0
  591. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  592. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  593. package/llama/addon/AddonModel.cpp +672 -0
  594. package/llama/addon/AddonModel.h +61 -0
  595. package/llama/addon/AddonModelData.cpp +25 -0
  596. package/llama/addon/AddonModelData.h +15 -0
  597. package/llama/addon/AddonModelLora.cpp +105 -0
  598. package/llama/addon/AddonModelLora.h +28 -0
  599. package/llama/addon/AddonSampler.cpp +513 -0
  600. package/llama/addon/AddonSampler.h +65 -0
  601. package/llama/addon/RingBuffer.h +109 -0
  602. package/llama/addon/addon.cpp +223 -0
  603. package/llama/addon/addonGlobals.cpp +22 -0
  604. package/llama/addon/addonGlobals.h +12 -0
  605. package/llama/addon/globals/addonLog.cpp +136 -0
  606. package/llama/addon/globals/addonLog.h +21 -0
  607. package/llama/addon/globals/addonProgress.cpp +15 -0
  608. package/llama/addon/globals/addonProgress.h +15 -0
  609. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  610. package/llama/addon/globals/getGpuInfo.h +6 -0
  611. package/llama/binariesGithubRelease.json +1 -1
  612. package/llama/gitRelease.bundle +0 -0
  613. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  614. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  615. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  616. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  618. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  619. package/llama/grammars/README.md +297 -6
  620. package/llama/grammars/json.gbnf +4 -4
  621. package/llama/grammars/json_arr.gbnf +4 -4
  622. package/llama/llama.cpp.info.json +1 -1
  623. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  624. package/package.json +109 -59
  625. package/templates/packed/electron-typescript-react.json +1 -0
  626. package/templates/packed/node-typescript.json +1 -0
  627. package/dist/AbortError.d.ts +0 -2
  628. package/dist/AbortError.js +0 -7
  629. package/dist/AbortError.js.map +0 -1
  630. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  631. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  634. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  635. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  636. package/dist/cli/commands/BuildCommand.js +0 -106
  637. package/dist/cli/commands/BuildCommand.js.map +0 -1
  638. package/dist/cli/commands/ClearCommand.js.map +0 -1
  639. package/dist/cli/commands/DownloadCommand.js +0 -169
  640. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  643. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  647. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  648. package/dist/evaluator/LlamaModel.d.ts +0 -120
  649. package/dist/evaluator/LlamaModel.js +0 -320
  650. package/dist/evaluator/LlamaModel.js.map +0 -1
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  653. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  654. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  655. package/dist/utils/parseModelTypeDescription.js +0 -9
  656. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  657. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  658. package/dist/utils/resolveChatWrapper.js +0 -16
  659. package/dist/utils/resolveChatWrapper.js.map +0 -1
  660. package/llama/addon.cpp +0 -916
  661. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  662. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  663. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  664. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  665. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  666. package/llamaBins/linux-x64/llama-addon.node +0 -0
  667. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  668. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  669. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  670. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  671. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  672. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  673. package/llamaBins/mac-x64/llama-addon.node +0 -0
  674. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  675. package/llamaBins/win-x64/llama-addon.exp +0 -0
  676. package/llamaBins/win-x64/llama-addon.lib +0 -0
  677. package/llamaBins/win-x64/llama-addon.node +0 -0
  678. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  679. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  681. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  685. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -1,66 +1,113 @@
1
- import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
1
+ import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
3
  import { compareTokens } from "../../utils/compareTokens.js";
4
- import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
4
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
5
+ import { TokenMeter } from "../TokenMeter.js";
6
+ import { UnsupportedError } from "../../utils/UnsupportedError.js";
7
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
8
+ import { LlamaSampler } from "./LlamaSampler.js";
9
+ const defaultLoraScale = 1;
10
+ const shrinkRetriesMinContextSize = 4096;
11
+ const defaultMaxPunishTokens = 64;
12
+ const defaultFailedCreationRemedy = {
13
+ retries: 6,
14
+ autoContextSizeShrink: 0.16
15
+ };
5
16
  export class LlamaContext {
6
17
  /** @internal */ _llama;
7
18
  /** @internal */ _ctx;
8
19
  /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
20
+ /** @internal */ _backendContextDisposeGuard;
9
21
  /** @internal */ _model;
10
22
  /** @internal */ _contextSize;
11
23
  /** @internal */ _batchSize;
24
+ /** @internal */ _flashAttention;
25
+ /** @internal */ _idealThreads;
26
+ /** @internal */ _minThreads;
27
+ /** @internal */ _performanceTracking;
12
28
  /** @internal */ _totalSequences;
13
29
  /** @internal */ _unusedSequenceIds = [];
14
30
  /** @internal */ _batchingOptions;
15
31
  /** @internal */ _queuedDecodeSequenceIds = new Set();
16
32
  /** @internal */ _queuedDecodes = [];
17
- /** @internal */ _disposeAggregator = new DisposeAggregator();
33
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
34
+ /** @internal */ _modelPreventDisposalHandle;
35
+ /** @internal */ _loraAdapters = new Set();
36
+ /** @internal */ _gcRegistry;
18
37
  /** @internal */ _nextGeneratedSequenceId = 0;
19
38
  /** @internal */ _dispatchDecodeScheduled = false;
20
39
  /** @internal */ _batchDispatchPending = false;
40
+ /** @internal */ _threadSplitterConsumer;
41
+ /** @internal */ _freeReservedThreadsTimeout;
21
42
  /** @internal */ _currentDispatchBatchHandle = {};
22
43
  /** @internal */ _allocatedContextSize;
23
44
  /** @internal */ _disposed = false;
24
45
  onDispose = new EventRelay();
25
- /**
26
- * @param options
27
- */
28
- constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
29
- if (model.disposed)
46
+ constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
47
+ if (_model.disposed)
30
48
  throw new DisposedError();
31
- this._llama = model._llama;
32
- this._model = model;
49
+ this._llama = _model._llama;
50
+ this._model = _model;
51
+ this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
52
+ this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
33
53
  this._totalSequences = Math.max(1, Math.floor(sequences));
34
54
  this._contextSize = Math.max(2, contextSize);
35
55
  this._batchSize = Math.max(batchSize, this._totalSequences);
56
+ this._flashAttention = flashAttention;
57
+ this._idealThreads = typeof threads === "number"
58
+ ? this._llama._threadsSplitter.normalizeThreadsValue(threads)
59
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
60
+ ? this._llama.cpuMathCores
61
+ : this._llama.maxThreads));
62
+ this._minThreads = Math.max(1, typeof threads === "number"
63
+ ? 1
64
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
65
+ this._performanceTracking = !!performanceTracking;
36
66
  this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
37
- seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
38
- contextSize: contextSize * this._totalSequences,
67
+ contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
39
68
  batchSize: this._batchSize,
40
- threads: Math.max(0, Math.floor(threads)),
41
- embedding: _embedding,
42
- noSeed: _noSeed
69
+ sequences: this._totalSequences,
70
+ flashAttention: this._flashAttention,
71
+ threads: this._idealThreads,
72
+ embeddings: _embeddings,
73
+ performanceTracking: this._performanceTracking
43
74
  }));
44
75
  this._batchingOptions = {
45
76
  dispatchSchedule: batchingDispatchSchedule,
46
- itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
77
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
47
78
  };
79
+ this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
80
+ this._gcRegistry.register(this, this._loraAdapters);
48
81
  this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
82
+ this._freeReservedThreads = this._freeReservedThreads.bind(this);
83
+ this._disposeAggregator.add(() => {
84
+ this._disposed = true;
85
+ });
86
+ this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
49
87
  this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
50
88
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
89
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
51
90
  this._disposeAggregator.add(() => {
52
- this._ctx.dispose();
91
+ if (this._loraAdapters.size > 0) {
92
+ const loraAdapters = new Set(this._loraAdapters);
93
+ this._loraAdapters.clear();
94
+ return this._model._removeLoraUsage(loraAdapters);
95
+ }
96
+ });
97
+ this._disposeAggregator.add(async () => {
98
+ await this._backendContextDisposeGuard.acquireDisposeLock();
99
+ await this._ctx.dispose();
100
+ this._modelPreventDisposalHandle.dispose();
53
101
  });
54
- this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
55
102
  }
56
- dispose() {
103
+ async dispose() {
57
104
  if (this._disposed)
58
105
  return;
59
106
  this._disposed = true;
60
- this._disposeAggregator.dispose();
107
+ await this._disposeAggregator.dispose();
61
108
  }
62
109
  /** @hidden */
63
- [Symbol.dispose]() {
110
+ [Symbol.asyncDispose]() {
64
111
  return this.dispose();
65
112
  }
66
113
  get disposed() {
@@ -75,6 +122,30 @@ export class LlamaContext {
75
122
  get batchSize() {
76
123
  return this._batchSize;
77
124
  }
125
+ get flashAttention() {
126
+ return this._flashAttention;
127
+ }
128
+ /**
129
+ * The actual size of the state in the memory in bytes.
130
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
131
+ */
132
+ get stateSize() {
133
+ this._ensureNotDisposed();
134
+ return this._ctx.getStateSize();
135
+ }
136
+ /** The number of threads currently used to evaluate tokens */
137
+ get currentThreads() {
138
+ this._ensureNotDisposed();
139
+ return this._ctx.getThreads();
140
+ }
141
+ /**
142
+ * The number of threads that are preferred to be used to evaluate tokens.
143
+ *
144
+ * The actual number of threads used may be lower when other evaluations are running in parallel.
145
+ */
146
+ get idealThreads() {
147
+ return this._idealThreads;
148
+ }
78
149
  getAllocatedContextSize() {
79
150
  this._ensureNotDisposed();
80
151
  if (this._allocatedContextSize == null)
@@ -90,9 +161,9 @@ export class LlamaContext {
90
161
  /**
91
162
  * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
92
163
  * When there are no sequences left, this method will throw an error.
93
- * @param [options]
94
164
  */
95
- getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
165
+ getSequence(options = {}) {
166
+ const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
96
167
  this._ensureNotDisposed();
97
168
  const nextSequenceId = this._popSequenceId();
98
169
  if (nextSequenceId == null)
@@ -100,6 +171,7 @@ export class LlamaContext {
100
171
  return LlamaContextSequence._create({
101
172
  sequenceId: nextSequenceId,
102
173
  context: this,
174
+ tokenMeter: _tokenMeter,
103
175
  contextShift: {
104
176
  size: contextShiftSize,
105
177
  strategy: contextShiftStrategy
@@ -116,17 +188,18 @@ export class LlamaContext {
116
188
  this._currentDispatchBatchHandle = {};
117
189
  this._dispatchDecodeScheduled = false;
118
190
  this._batchDispatchPending = false;
119
- let prioritizeStrategy;
120
- try {
121
- this._ensureNotDisposed();
122
- prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
123
- }
124
- catch (err) {
125
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
126
- return;
127
- }
128
- let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
129
- while (shouldHaveAnotherBatch) {
191
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
192
+ const resolvePrioritizationStrategy = () => {
193
+ try {
194
+ this._ensureNotDisposed();
195
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
196
+ }
197
+ catch (err) {
198
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
199
+ }
200
+ return null;
201
+ };
202
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
130
203
  const batchItemToQueuedDecodeMap = new Map();
131
204
  const batchItemsList = [];
132
205
  for (const queuedDecode of this._queuedDecodes) {
@@ -139,42 +212,65 @@ export class LlamaContext {
139
212
  }
140
213
  let prioritizedItems;
141
214
  try {
142
- prioritizedItems = prioritizeStrategy({
215
+ prioritizedItems = prioritizationStrategy({
143
216
  items: batchItemsList,
144
217
  size: this._batchSize
145
218
  });
146
219
  }
147
220
  catch (err) {
148
221
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
149
- return;
222
+ return null;
150
223
  }
151
- let batchTokenSlotsLeft = this._batchSize;
152
- const afterDecodeActions = [];
153
- const queuedDecodesToDelete = new Set();
154
- const currentQueuedDecodeItems = new Set();
155
- const currentBatchItems = [];
156
- let currentBatchSize = 0;
157
- for (const prioritizedItem of prioritizedItems) {
224
+ return prioritizedItems.map((prioritizedItem) => {
158
225
  const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
159
226
  if (queuedDecode == null)
160
227
  throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
161
228
  "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
162
- const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
163
- if (processAmount <= 0)
229
+ return {
230
+ queuedDecode,
231
+ processAmount: prioritizedItem.processAmount
232
+ };
233
+ });
234
+ };
235
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
236
+ const currentBatchItems = [];
237
+ let currentBatchSize = 0;
238
+ let batchTokenSlotsLeft = batchSize;
239
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
240
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
241
+ if (resolvedProcessAmount <= 0) {
242
+ if (batchTokenSlotsLeft === 0)
243
+ break;
164
244
  continue;
165
- batchTokenSlotsLeft -= processAmount;
245
+ }
246
+ batchTokenSlotsLeft -= resolvedProcessAmount;
247
+ currentBatchSize += resolvedProcessAmount;
166
248
  currentBatchItems.push({
167
249
  queuedDecode,
168
- processAmount
250
+ processAmount: resolvedProcessAmount
169
251
  });
170
- currentBatchSize += processAmount;
171
252
  }
253
+ return {
254
+ currentBatchItems,
255
+ currentBatchSize
256
+ };
257
+ };
258
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
259
+ const afterDecodeActions = [];
260
+ const queuedDecodesToDelete = new Set();
261
+ const currentQueuedDecodeItems = new Set();
172
262
  if (currentBatchSize !== 0)
173
263
  this._ctx.initBatch(currentBatchSize);
174
- for (const { queuedDecode, processAmount } of currentBatchItems) {
264
+ for (const { queuedDecode, processAmount } of batchItems) {
175
265
  let batchLogitIndex;
176
266
  try {
177
- batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
267
+ const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
268
+ processAmount === queuedDecode.tokens.length;
269
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
270
+ const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
271
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
272
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
273
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
178
274
  }
179
275
  catch (err) {
180
276
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
@@ -193,8 +289,6 @@ export class LlamaContext {
193
289
  queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
194
290
  queuedDecode.firstTokenSequenceIndex += processAmount;
195
291
  }
196
- if (batchTokenSlotsLeft === 0)
197
- break;
198
292
  }
199
293
  for (let i = 0; i < this._queuedDecodes.length; i++) {
200
294
  const queuedDecode = this._queuedDecodes[i];
@@ -204,14 +298,22 @@ export class LlamaContext {
204
298
  i--;
205
299
  }
206
300
  }
207
- shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
208
- try {
209
- if (currentBatchSize !== 0)
301
+ if (currentBatchSize !== 0) {
302
+ const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
303
+ const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
304
+ ? await allocationResult ?? []
305
+ : allocationResult ?? [];
306
+ try {
307
+ if (threadsToUse != null)
308
+ this._ctx.setThreads(threadsToUse);
210
309
  await this._ctx.decodeBatch();
211
- }
212
- catch (err) {
213
- this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
214
- return;
310
+ consumerHandle?.dispose();
311
+ }
312
+ catch (err) {
313
+ consumerHandle?.dispose();
314
+ this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
315
+ return;
316
+ }
215
317
  }
216
318
  for (const action of afterDecodeActions) {
217
319
  const [accept, reject] = action.response;
@@ -225,14 +327,56 @@ export class LlamaContext {
225
327
  }
226
328
  accept(undefined);
227
329
  }
330
+ };
331
+ const prioritizationStrategy = resolvePrioritizationStrategy();
332
+ if (prioritizationStrategy == null)
333
+ return; // all queued items are rejected and dequeued when we get here
334
+ this._reserveThreads();
335
+ try {
336
+ while (shouldHaveAnotherLoop) {
337
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
338
+ if (orderedQueuedDecodes == null)
339
+ return; // all queued items are rejected and dequeued when we get here
340
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
341
+ let preventDisposalHandle;
342
+ try {
343
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
344
+ }
345
+ catch (err) {
346
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
347
+ return;
348
+ }
349
+ try {
350
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
351
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
352
+ }
353
+ finally {
354
+ preventDisposalHandle.dispose();
355
+ }
356
+ }
357
+ }
358
+ finally {
359
+ this._scheduleToFreeReservedThreads();
228
360
  }
229
361
  });
230
362
  }
231
- printTimings() {
363
+ /**
364
+ * Print the timings of token evaluation since that last print for this context.
365
+ *
366
+ * Requires the `performanceTracking` option to be enabled.
367
+ *
368
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
369
+ * it won't print anything.
370
+ */
371
+ async printTimings() {
372
+ this._ensureNotDisposed();
373
+ if (!this._performanceTracking)
374
+ throw new UnsupportedError("Performance tracking is not enabled");
232
375
  this._ctx.printTimings();
376
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
233
377
  }
234
378
  /** @internal */
235
- async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
379
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
236
380
  return await new Promise((accept, reject) => {
237
381
  this._queuedDecodes.push({
238
382
  sequenceId,
@@ -240,6 +384,7 @@ export class LlamaContext {
240
384
  firstTokenSequenceIndex,
241
385
  generateLogitAtTheEnd,
242
386
  evaluationPriority,
387
+ tokenMeter,
243
388
  response: [accept, reject],
244
389
  onDone
245
390
  });
@@ -252,16 +397,14 @@ export class LlamaContext {
252
397
  if (this._disposed)
253
398
  return;
254
399
  void withLock(this, "context", async () => {
400
+ if (this._disposed)
401
+ return;
255
402
  this._ctx.disposeSequence(sequenceId);
256
403
  this._unusedSequenceIds.push(sequenceId);
257
404
  this._onReclaimUnusedSequenceId.dispatchEvent();
258
405
  });
259
406
  }
260
407
  /** @internal */
261
- _acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
262
- this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
263
- }
264
- /** @internal */
265
408
  _popSequenceId() {
266
409
  if (this._unusedSequenceIds.length > 0)
267
410
  return this._unusedSequenceIds.shift();
@@ -311,20 +454,177 @@ export class LlamaContext {
311
454
  if (this._disposed)
312
455
  throw new DisposedError();
313
456
  }
457
+ /** @internal */
458
+ async _setLora({ filePath, scale }) {
459
+ const lora = await this._model._getOrLoadLora(filePath);
460
+ this._ctx.setLora(lora, scale ?? defaultLoraScale);
461
+ if (!this._loraAdapters.has(lora)) {
462
+ this._loraAdapters.add(lora);
463
+ lora.usages++;
464
+ }
465
+ }
466
+ /** @internal */
467
+ _reserveThreads() {
468
+ clearTimeout(this._freeReservedThreadsTimeout);
469
+ delete this._freeReservedThreadsTimeout;
470
+ if (this._threadSplitterConsumer != null)
471
+ return;
472
+ this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
473
+ }
474
+ /** @internal */
475
+ _freeReservedThreads() {
476
+ clearTimeout(this._freeReservedThreadsTimeout);
477
+ delete this._freeReservedThreadsTimeout;
478
+ if (this._threadSplitterConsumer == null)
479
+ return;
480
+ this._threadSplitterConsumer.dispose();
481
+ delete this._threadSplitterConsumer;
482
+ }
483
+ /** @internal */
484
+ _scheduleToFreeReservedThreads() {
485
+ if (this._threadSplitterConsumer == null)
486
+ return;
487
+ clearTimeout(this._freeReservedThreadsTimeout);
488
+ this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
489
+ }
490
+ /** @internal */
491
+ static async _create(options, { _model }) {
492
+ const sequences = options.sequences ?? getDefaultContextSequences();
493
+ const flashAttention = _model.flashAttentionSupported
494
+ ? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
495
+ : false;
496
+ const loraOptions = typeof options.lora === "string"
497
+ ? { adapters: [{ filePath: options.lora }] }
498
+ : options.lora;
499
+ let failedCreationRetries = options.failedCreationRemedy === false
500
+ ? 0
501
+ : Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
502
+ const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
503
+ ? 0
504
+ : options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
505
+ let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
506
+ batchSize: options.batchSize,
507
+ sequences: sequences,
508
+ modelGpuLayers: _model.gpuLayers,
509
+ modelTrainContextSize: _model.trainContextSize,
510
+ flashAttention,
511
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
512
+ llamaGpu: _model._llama.gpu,
513
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
514
+ isEmbeddingContext: options._embeddings
515
+ });
516
+ const minContextSize = options.contextSize === "auto"
517
+ ? shrinkRetriesMinContextSize
518
+ : (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
519
+ ? options.contextSize.min
520
+ : typeof options.contextSize === "number"
521
+ ? options.contextSize
522
+ : shrinkRetriesMinContextSize;
523
+ const { createSignal } = options;
524
+ async function createContext(contextSize) {
525
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
526
+ const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
527
+ contextSize,
528
+ sequences,
529
+ isEmbeddingContext: options._embeddings,
530
+ modelGpuLayers: _model.gpuLayers,
531
+ batchSize,
532
+ flashAttention
533
+ }).gpuVram;
534
+ const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
535
+ const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
536
+ ? null
537
+ : _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
538
+ try {
539
+ if (createSignal?.aborted)
540
+ throw createSignal.reason;
541
+ const contextLoaded = await context._ctx.init();
542
+ if (createSignal?.aborted) {
543
+ if (contextLoaded)
544
+ await context._ctx.dispose();
545
+ throw createSignal.reason;
546
+ }
547
+ else if (!contextLoaded)
548
+ throw new Error("Failed to create context");
549
+ contextCreationMemoryReservation?.dispose?.();
550
+ if (loraOptions != null && loraOptions.adapters.length > 0) {
551
+ let loadedAdapters = 0;
552
+ for (const adapter of loraOptions.adapters) {
553
+ try {
554
+ await context._setLora({
555
+ filePath: adapter.filePath,
556
+ scale: adapter.scale
557
+ });
558
+ loadedAdapters++;
559
+ try {
560
+ loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
561
+ }
562
+ catch (err) {
563
+ console.error(err);
564
+ }
565
+ }
566
+ catch (err) {
567
+ await context.dispose();
568
+ throw err;
569
+ }
570
+ if (createSignal?.aborted) {
571
+ await context.dispose();
572
+ throw createSignal.reason;
573
+ }
574
+ }
575
+ }
576
+ else if (loraOptions?.onLoadProgress != null) {
577
+ try {
578
+ loraOptions.onLoadProgress(1);
579
+ }
580
+ catch (err) {
581
+ console.error(err);
582
+ }
583
+ }
584
+ return context;
585
+ }
586
+ finally {
587
+ contextCreationMemoryReservation?.dispose?.();
588
+ }
589
+ }
590
+ while (failedCreationRetries >= 0) {
591
+ try {
592
+ return await createContext(contextSize);
593
+ }
594
+ catch (err) {
595
+ if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
596
+ throw err;
597
+ failedCreationRetries--;
598
+ let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
599
+ ? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
600
+ : Math.floor(failedCreationAutoContextSizeShrink(contextSize));
601
+ if (!Number.isFinite(newContextSize))
602
+ throw err;
603
+ if (newContextSize < minContextSize)
604
+ newContextSize = minContextSize;
605
+ if (newContextSize >= contextSize)
606
+ throw err;
607
+ contextSize = newContextSize;
608
+ }
609
+ }
610
+ throw new Error("Failed to create context");
611
+ }
314
612
  }
315
613
  export class LlamaContextSequence {
316
614
  /** @internal */ _sequenceId;
317
615
  /** @internal */ _gcRegistry;
318
616
  /** @internal */ _context;
319
617
  /** @internal */ _contextShift;
618
+ /** @internal */ _tokenMeter;
320
619
  /** @internal */ _disposeAggregator = new DisposeAggregator();
321
620
  /** @internal */ _contextTokens = [];
322
621
  /** @internal */ _nextTokenIndex = 0;
323
622
  /** @internal */ _disposed = false;
324
623
  onDispose = new EventRelay();
325
- constructor({ sequenceId, context, contextShift }) {
624
+ constructor({ sequenceId, context, tokenMeter, contextShift }) {
326
625
  this._sequenceId = sequenceId;
327
626
  this._context = context;
627
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
328
628
  this._contextShift = contextShift;
329
629
  this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
330
630
  this._gcRegistry.register(this, sequenceId);
@@ -361,6 +661,9 @@ export class LlamaContextSequence {
361
661
  get contextTokens() {
362
662
  return this._contextTokens.slice();
363
663
  }
664
+ get tokenMeter() {
665
+ return this._tokenMeter;
666
+ }
364
667
  get isLoadedToMemory() {
365
668
  return !this._disposed;
366
669
  }
@@ -386,7 +689,7 @@ export class LlamaContextSequence {
386
689
  }
387
690
  /**
388
691
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
389
- * the start and end of each range are exclusive.
692
+ * The start of each range is inclusive, and the end of each range is exclusive.
390
693
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
391
694
  */
392
695
  async eraseContextTokenRanges(ranges) {
@@ -395,6 +698,8 @@ export class LlamaContextSequence {
395
698
  this._ensureNotDisposed();
396
699
  if (ranges.length === 0)
397
700
  return;
701
+ // if the deletion fails, we'll have to dispose the sequence and fill it up again
702
+ let deletionSuccessful = true;
398
703
  const resolvedRanges = ranges
399
704
  .map(({ start, end }) => {
400
705
  if (start === end)
@@ -424,34 +729,42 @@ export class LlamaContextSequence {
424
729
  let lastDeleteRangeEndPos = null;
425
730
  for (const range of resolvedRanges) {
426
731
  this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
427
- this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
428
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
732
+ if (deletionSuccessful)
733
+ deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
734
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
429
735
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
430
736
  removedTokens += range.end - range.start;
431
737
  lastDeleteRangeEndPos = range.end;
432
738
  }
433
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
739
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
434
740
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
435
741
  this._nextTokenIndex -= removedTokens;
742
+ if (deletionSuccessful)
743
+ return;
744
+ const newSequenceTokens = this._contextTokens.slice();
745
+ this._nextTokenIndex = 0;
746
+ this._context._ctx.disposeSequence(this._sequenceId);
747
+ await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
436
748
  });
437
749
  }
438
- /**
439
- * @param tokens
440
- * @param [options]
441
- */
442
- evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
750
+ evaluate(tokens, options = {}) {
751
+ const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
443
752
  return this._evaluate(tokens, {
444
753
  temperature,
754
+ minP,
445
755
  topK,
446
756
  topP,
757
+ seed,
447
758
  grammarEvaluationState,
448
759
  repeatPenalty,
760
+ tokenBias,
449
761
  evaluationPriority,
450
762
  contextShiftOptions: {
451
763
  size: contextShiftSize,
452
764
  strategy: contextShiftStrategy
453
765
  },
454
- yieldEosToken
766
+ yieldEogToken,
767
+ _noSampling
455
768
  });
456
769
  }
457
770
  /**
@@ -474,59 +787,85 @@ export class LlamaContextSequence {
474
787
  }
475
788
  }
476
789
  /** @internal */
477
- async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
790
+ async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
478
791
  this._ensureNotDisposed();
479
792
  let evalTokens = tokens;
480
793
  if (evalTokens.length === 0)
481
794
  return;
482
- // eslint-disable-next-line no-constant-condition
483
- while (true) {
484
- this._ensureNotDisposed();
485
- // Evaluate to get the next token.
486
- const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
487
- const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
488
- ? repeatPenalty.punishTokens()
489
- : repeatPenalty?.punishTokens;
490
- const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
491
- ? grammarEvaluationState()
492
- : grammarEvaluationState;
493
- if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
494
- throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
495
- return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
496
- temperature,
497
- topK,
498
- topP,
499
- repeatPenalty: repeatPenalty?.penalty,
500
- repeatPenaltyTokens: repeatPenaltyTokens != null
501
- ? Uint32Array.from(repeatPenaltyTokens)
502
- : undefined,
503
- repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
504
- repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
505
- grammarEvaluationState: resolvedGrammarEvaluationState?._state
506
- }));
507
- });
508
- if (nextToken == null)
509
- return;
510
- // the model finished generating text
511
- if (!yieldEosToken && nextToken === this._context.model.tokens.eos)
512
- break;
513
- yield nextToken;
514
- // Create tokens for the next eval.
515
- evalTokens = [nextToken];
795
+ const sampler = new LlamaSampler(this.model);
796
+ try {
797
+ while (true) {
798
+ this._ensureNotDisposed();
799
+ // Evaluate to get the next token.
800
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
801
+ if (_noSampling)
802
+ return null;
803
+ const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
804
+ ? repeatPenalty.punishTokens()
805
+ : repeatPenalty?.punishTokens;
806
+ const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
807
+ const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
808
+ ? grammarEvaluationState()
809
+ : grammarEvaluationState;
810
+ if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
811
+ throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
812
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
813
+ sampler.applyConfig(removeNullFields({
814
+ temperature,
815
+ minP,
816
+ topK,
817
+ topP,
818
+ seed: Math.max(0, Number.isFinite(seed)
819
+ ? Math.floor(seed ?? (Date.now() / 1000))
820
+ : Math.floor(Date.now() / 1000)),
821
+ repeatPenalty: repeatPenalty?.penalty,
822
+ repeatPenaltyMaxTokens: maxPunishTokens,
823
+ repeatPenaltyTokens: repeatPenaltyTokens != null
824
+ ? Uint32Array.from(repeatPenaltyTokens)
825
+ : undefined,
826
+ repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
827
+ repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
828
+ tokenBiasKeys,
829
+ tokenBiasValues,
830
+ grammarEvaluationState: resolvedGrammarEvaluationState?._state
831
+ }));
832
+ return withLock(sampler, "sample", async () => {
833
+ if (sampler.disposed)
834
+ return null;
835
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
836
+ });
837
+ });
838
+ if (nextToken === -1)
839
+ throw new Error("Failed to sample next token");
840
+ if (nextToken == null)
841
+ return;
842
+ // the model finished generating text
843
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
844
+ break;
845
+ const replacementToken = (yield nextToken);
846
+ // set the tokens for the next evaluation
847
+ if (replacementToken != null)
848
+ evalTokens = [replacementToken];
849
+ else
850
+ evalTokens = [nextToken];
851
+ }
852
+ }
853
+ finally {
854
+ void withLock(sampler, "sample", sampler.asyncDispose);
516
855
  }
517
856
  }
518
857
  /** @internal */
519
- async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
858
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
520
859
  this._ensureNotDisposed();
521
860
  const tokensLeftToDecode = tokens.slice();
522
861
  return await withLock(this, "evaluate", async () => {
523
862
  while (tokensLeftToDecode.length > 0) {
524
863
  this._ensureNotDisposed();
525
- let freeSpace = this._context.contextSize - this._nextTokenIndex;
526
- if (freeSpace <= 1) {
864
+ let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
865
+ if (freeSpace <= 0) {
527
866
  await this._freeUpSpaceForTokens(contextShiftOptions);
528
- freeSpace = this._context.contextSize - this._nextTokenIndex;
529
- if (freeSpace <= 1)
867
+ freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
868
+ if (freeSpace <= 0)
530
869
  throw new Error("Failed to free up space for new tokens");
531
870
  }
532
871
  const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
@@ -536,7 +875,8 @@ export class LlamaContextSequence {
536
875
  tokens: tokensToDecode,
537
876
  firstTokenSequenceIndex: this._nextTokenIndex,
538
877
  generateLogitAtTheEnd,
539
- evaluationPriority
878
+ evaluationPriority,
879
+ tokenMeter
540
880
  }, !generateLogitAtTheEnd
541
881
  ? undefined
542
882
  : onDecodeDone);
@@ -556,7 +896,10 @@ export class LlamaContextSequence {
556
896
  : contextShiftOptions.size));
557
897
  this._ensureNotDisposed();
558
898
  if (contextShiftOptions.strategy === "eraseBeginning") {
559
- await this.eraseContextTokenRanges([{ start: 0, end: size }]);
899
+ let eraseStartIndex = 0;
900
+ if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
901
+ eraseStartIndex = 1;
902
+ await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
560
903
  }
561
904
  else {
562
905
  const ranges = await contextShiftOptions.strategy({
@@ -566,7 +909,7 @@ export class LlamaContextSequence {
566
909
  if (ranges == null)
567
910
  throw new Error("Invalid delete ranges");
568
911
  await this.eraseContextTokenRanges(ranges);
569
- if (this.nextTokenIndex >= this._context.contextSize)
912
+ if (this.nextTokenIndex >= this._context.contextSize - 1)
570
913
  await this.eraseContextTokenRanges([{ start: 0, end: size }]);
571
914
  }
572
915
  }
@@ -579,10 +922,11 @@ export class LlamaContextSequence {
579
922
  * We need this to make it impossible to manually create instances of this class outside the code of this library
580
923
  * @internal
581
924
  */
582
- static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
925
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
583
926
  return new LlamaContextSequence({
584
927
  sequenceId,
585
928
  context,
929
+ tokenMeter,
586
930
  contextShift: {
587
931
  size: contextShiftSize,
588
932
  strategy: contextShiftStrategy
@@ -590,14 +934,52 @@ export class LlamaContextSequence {
590
934
  });
591
935
  }
592
936
  }
937
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
938
+ if (tokenBias == null)
939
+ return {
940
+ tokenBiasKeys: undefined,
941
+ tokenBiasValues: undefined
942
+ };
943
+ if (tokenBias instanceof Function)
944
+ tokenBias = tokenBias();
945
+ if (tokenBias._tokenizer !== currentModel.tokenizer)
946
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
947
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
948
+ const tokenBiasKeys = [];
949
+ const tokenBiasValues = [];
950
+ for (const [token, bias] of tokenBias._biases) {
951
+ tokenBiasKeys.push(token);
952
+ tokenBiasValues.push(bias);
953
+ }
954
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
955
+ return {
956
+ tokenBiasKeys: undefined,
957
+ tokenBiasValues: undefined
958
+ };
959
+ }
960
+ return {
961
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
962
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
963
+ };
964
+ }
593
965
  function disposeContextIfReferenced(contextRef) {
594
966
  const context = contextRef.deref();
595
967
  if (context != null)
596
- context.dispose();
968
+ void context.dispose();
597
969
  }
598
970
  function disposeContextSequenceIfReferenced(contextRef) {
599
971
  const context = contextRef.deref();
600
972
  if (context != null)
601
973
  context.dispose();
602
974
  }
975
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
976
+ return Math.min(contextSize * sequences, 512);
977
+ }
978
+ export function getDefaultContextSequences() {
979
+ return 1;
980
+ }
981
+ const defaultFallbackContextSize = 4096;
982
+ export function getDefaultModelContextSize({ trainContextSize }) {
983
+ return trainContextSize ?? defaultFallbackContextSize;
984
+ }
603
985
  //# sourceMappingURL=LlamaContext.js.map