node-llama-cpp 3.0.0-beta.9 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (684) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -3
  21. package/dist/bindings/Llama.js +193 -23
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +264 -75
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +321 -190
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +16 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/CMakeLists.txt +134 -5
  586. package/llama/addon/AddonContext.cpp +629 -0
  587. package/llama/addon/AddonContext.h +52 -0
  588. package/llama/addon/AddonGrammar.cpp +39 -0
  589. package/llama/addon/AddonGrammar.h +19 -0
  590. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  591. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  592. package/llama/addon/AddonModel.cpp +672 -0
  593. package/llama/addon/AddonModel.h +61 -0
  594. package/llama/addon/AddonModelData.cpp +25 -0
  595. package/llama/addon/AddonModelData.h +15 -0
  596. package/llama/addon/AddonModelLora.cpp +105 -0
  597. package/llama/addon/AddonModelLora.h +28 -0
  598. package/llama/addon/AddonSampler.cpp +513 -0
  599. package/llama/addon/AddonSampler.h +65 -0
  600. package/llama/addon/RingBuffer.h +109 -0
  601. package/llama/addon/addon.cpp +223 -0
  602. package/llama/addon/addonGlobals.cpp +22 -0
  603. package/llama/addon/addonGlobals.h +12 -0
  604. package/llama/addon/globals/addonLog.cpp +136 -0
  605. package/llama/addon/globals/addonLog.h +21 -0
  606. package/llama/addon/globals/addonProgress.cpp +15 -0
  607. package/llama/addon/globals/addonProgress.h +15 -0
  608. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  609. package/llama/addon/globals/getGpuInfo.h +6 -0
  610. package/llama/binariesGithubRelease.json +1 -1
  611. package/llama/gitRelease.bundle +0 -0
  612. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  613. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  614. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  615. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  616. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  618. package/llama/grammars/README.md +297 -6
  619. package/llama/grammars/json.gbnf +4 -4
  620. package/llama/grammars/json_arr.gbnf +4 -4
  621. package/llama/llama.cpp.info.json +1 -1
  622. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  623. package/package.json +109 -59
  624. package/templates/packed/electron-typescript-react.json +1 -0
  625. package/templates/packed/node-typescript.json +1 -0
  626. package/dist/AbortError.d.ts +0 -2
  627. package/dist/AbortError.js +0 -7
  628. package/dist/AbortError.js.map +0 -1
  629. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  630. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  631. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  634. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  635. package/dist/cli/commands/BuildCommand.js +0 -106
  636. package/dist/cli/commands/BuildCommand.js.map +0 -1
  637. package/dist/cli/commands/ClearCommand.js.map +0 -1
  638. package/dist/cli/commands/DownloadCommand.js +0 -169
  639. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  640. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  643. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  647. package/dist/evaluator/LlamaModel.d.ts +0 -120
  648. package/dist/evaluator/LlamaModel.js +0 -320
  649. package/dist/evaluator/LlamaModel.js.map +0 -1
  650. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  653. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  654. package/dist/utils/parseModelTypeDescription.js +0 -9
  655. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  656. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  657. package/dist/utils/resolveChatWrapper.js +0 -16
  658. package/dist/utils/resolveChatWrapper.js.map +0 -1
  659. package/llama/addon.cpp +0 -950
  660. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  661. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  662. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  663. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  664. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  665. package/llamaBins/linux-x64/llama-addon.node +0 -0
  666. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  667. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  668. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  669. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  670. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  671. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  672. package/llamaBins/mac-x64/llama-addon.node +0 -0
  673. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  674. package/llamaBins/win-x64/llama-addon.exp +0 -0
  675. package/llamaBins/win-x64/llama-addon.lib +0 -0
  676. package/llamaBins/win-x64/llama-addon.node +0 -0
  677. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  678. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  679. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  681. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -1,66 +1,113 @@
1
- import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
1
+ import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
3
  import { compareTokens } from "../../utils/compareTokens.js";
4
- import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
4
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
5
+ import { TokenMeter } from "../TokenMeter.js";
6
+ import { UnsupportedError } from "../../utils/UnsupportedError.js";
7
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
8
+ import { LlamaSampler } from "./LlamaSampler.js";
9
+ const defaultLoraScale = 1;
10
+ const shrinkRetriesMinContextSize = 4096;
11
+ const defaultMaxPunishTokens = 64;
12
+ const defaultFailedCreationRemedy = {
13
+ retries: 6,
14
+ autoContextSizeShrink: 0.16
15
+ };
5
16
  export class LlamaContext {
6
17
  /** @internal */ _llama;
7
18
  /** @internal */ _ctx;
8
19
  /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
20
+ /** @internal */ _backendContextDisposeGuard;
9
21
  /** @internal */ _model;
10
22
  /** @internal */ _contextSize;
11
23
  /** @internal */ _batchSize;
24
+ /** @internal */ _flashAttention;
25
+ /** @internal */ _idealThreads;
26
+ /** @internal */ _minThreads;
27
+ /** @internal */ _performanceTracking;
12
28
  /** @internal */ _totalSequences;
13
29
  /** @internal */ _unusedSequenceIds = [];
14
30
  /** @internal */ _batchingOptions;
15
31
  /** @internal */ _queuedDecodeSequenceIds = new Set();
16
32
  /** @internal */ _queuedDecodes = [];
17
- /** @internal */ _disposeAggregator = new DisposeAggregator();
33
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
34
+ /** @internal */ _modelPreventDisposalHandle;
35
+ /** @internal */ _loraAdapters = new Set();
36
+ /** @internal */ _gcRegistry;
18
37
  /** @internal */ _nextGeneratedSequenceId = 0;
19
38
  /** @internal */ _dispatchDecodeScheduled = false;
20
39
  /** @internal */ _batchDispatchPending = false;
40
+ /** @internal */ _threadSplitterConsumer;
41
+ /** @internal */ _freeReservedThreadsTimeout;
21
42
  /** @internal */ _currentDispatchBatchHandle = {};
22
43
  /** @internal */ _allocatedContextSize;
23
44
  /** @internal */ _disposed = false;
24
45
  onDispose = new EventRelay();
25
- /**
26
- * @param options
27
- */
28
- constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
29
- if (model.disposed)
46
+ constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
47
+ if (_model.disposed)
30
48
  throw new DisposedError();
31
- this._llama = model._llama;
32
- this._model = model;
49
+ this._llama = _model._llama;
50
+ this._model = _model;
51
+ this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
52
+ this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
33
53
  this._totalSequences = Math.max(1, Math.floor(sequences));
34
54
  this._contextSize = Math.max(2, contextSize);
35
55
  this._batchSize = Math.max(batchSize, this._totalSequences);
56
+ this._flashAttention = flashAttention;
57
+ this._idealThreads = typeof threads === "number"
58
+ ? this._llama._threadsSplitter.normalizeThreadsValue(threads)
59
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
60
+ ? this._llama.cpuMathCores
61
+ : this._llama.maxThreads));
62
+ this._minThreads = Math.max(1, typeof threads === "number"
63
+ ? 1
64
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
65
+ this._performanceTracking = !!performanceTracking;
36
66
  this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
37
- seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
38
- contextSize: contextSize * this._totalSequences,
67
+ contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
39
68
  batchSize: this._batchSize,
40
- threads: Math.max(0, Math.floor(threads)),
41
- embedding: _embedding,
42
- noSeed: _noSeed
69
+ sequences: this._totalSequences,
70
+ flashAttention: this._flashAttention,
71
+ threads: this._idealThreads,
72
+ embeddings: _embeddings,
73
+ performanceTracking: this._performanceTracking
43
74
  }));
44
75
  this._batchingOptions = {
45
76
  dispatchSchedule: batchingDispatchSchedule,
46
- itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
77
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
47
78
  };
79
+ this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
80
+ this._gcRegistry.register(this, this._loraAdapters);
48
81
  this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
82
+ this._freeReservedThreads = this._freeReservedThreads.bind(this);
83
+ this._disposeAggregator.add(() => {
84
+ this._disposed = true;
85
+ });
86
+ this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
49
87
  this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
50
88
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
89
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
51
90
  this._disposeAggregator.add(() => {
52
- this._ctx.dispose();
91
+ if (this._loraAdapters.size > 0) {
92
+ const loraAdapters = new Set(this._loraAdapters);
93
+ this._loraAdapters.clear();
94
+ return this._model._removeLoraUsage(loraAdapters);
95
+ }
96
+ });
97
+ this._disposeAggregator.add(async () => {
98
+ await this._backendContextDisposeGuard.acquireDisposeLock();
99
+ await this._ctx.dispose();
100
+ this._modelPreventDisposalHandle.dispose();
53
101
  });
54
- this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
55
102
  }
56
- dispose() {
103
+ async dispose() {
57
104
  if (this._disposed)
58
105
  return;
59
106
  this._disposed = true;
60
- this._disposeAggregator.dispose();
107
+ await this._disposeAggregator.dispose();
61
108
  }
62
109
  /** @hidden */
63
- [Symbol.dispose]() {
110
+ [Symbol.asyncDispose]() {
64
111
  return this.dispose();
65
112
  }
66
113
  get disposed() {
@@ -75,6 +122,30 @@ export class LlamaContext {
75
122
  get batchSize() {
76
123
  return this._batchSize;
77
124
  }
125
+ get flashAttention() {
126
+ return this._flashAttention;
127
+ }
128
+ /**
129
+ * The actual size of the state in the memory in bytes.
130
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
131
+ */
132
+ get stateSize() {
133
+ this._ensureNotDisposed();
134
+ return this._ctx.getStateSize();
135
+ }
136
+ /** The number of threads currently used to evaluate tokens */
137
+ get currentThreads() {
138
+ this._ensureNotDisposed();
139
+ return this._ctx.getThreads();
140
+ }
141
+ /**
142
+ * The number of threads that are preferred to be used to evaluate tokens.
143
+ *
144
+ * The actual number of threads used may be lower when other evaluations are running in parallel.
145
+ */
146
+ get idealThreads() {
147
+ return this._idealThreads;
148
+ }
78
149
  getAllocatedContextSize() {
79
150
  this._ensureNotDisposed();
80
151
  if (this._allocatedContextSize == null)
@@ -90,9 +161,9 @@ export class LlamaContext {
90
161
  /**
91
162
  * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
92
163
  * When there are no sequences left, this method will throw an error.
93
- * @param [options]
94
164
  */
95
- getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
165
+ getSequence(options = {}) {
166
+ const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
96
167
  this._ensureNotDisposed();
97
168
  const nextSequenceId = this._popSequenceId();
98
169
  if (nextSequenceId == null)
@@ -100,6 +171,7 @@ export class LlamaContext {
100
171
  return LlamaContextSequence._create({
101
172
  sequenceId: nextSequenceId,
102
173
  context: this,
174
+ tokenMeter: _tokenMeter,
103
175
  contextShift: {
104
176
  size: contextShiftSize,
105
177
  strategy: contextShiftStrategy
@@ -116,17 +188,18 @@ export class LlamaContext {
116
188
  this._currentDispatchBatchHandle = {};
117
189
  this._dispatchDecodeScheduled = false;
118
190
  this._batchDispatchPending = false;
119
- let prioritizeStrategy;
120
- try {
121
- this._ensureNotDisposed();
122
- prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
123
- }
124
- catch (err) {
125
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
126
- return;
127
- }
128
- let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
129
- while (shouldHaveAnotherBatch) {
191
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
192
+ const resolvePrioritizationStrategy = () => {
193
+ try {
194
+ this._ensureNotDisposed();
195
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
196
+ }
197
+ catch (err) {
198
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
199
+ }
200
+ return null;
201
+ };
202
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
130
203
  const batchItemToQueuedDecodeMap = new Map();
131
204
  const batchItemsList = [];
132
205
  for (const queuedDecode of this._queuedDecodes) {
@@ -139,42 +212,65 @@ export class LlamaContext {
139
212
  }
140
213
  let prioritizedItems;
141
214
  try {
142
- prioritizedItems = prioritizeStrategy({
215
+ prioritizedItems = prioritizationStrategy({
143
216
  items: batchItemsList,
144
217
  size: this._batchSize
145
218
  });
146
219
  }
147
220
  catch (err) {
148
221
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
149
- return;
222
+ return null;
150
223
  }
151
- let batchTokenSlotsLeft = this._batchSize;
152
- const afterDecodeActions = [];
153
- const queuedDecodesToDelete = new Set();
154
- const currentQueuedDecodeItems = new Set();
155
- const currentBatchItems = [];
156
- let currentBatchSize = 0;
157
- for (const prioritizedItem of prioritizedItems) {
224
+ return prioritizedItems.map((prioritizedItem) => {
158
225
  const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
159
226
  if (queuedDecode == null)
160
227
  throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
161
228
  "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
162
- const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
163
- if (processAmount <= 0)
229
+ return {
230
+ queuedDecode,
231
+ processAmount: prioritizedItem.processAmount
232
+ };
233
+ });
234
+ };
235
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
236
+ const currentBatchItems = [];
237
+ let currentBatchSize = 0;
238
+ let batchTokenSlotsLeft = batchSize;
239
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
240
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
241
+ if (resolvedProcessAmount <= 0) {
242
+ if (batchTokenSlotsLeft === 0)
243
+ break;
164
244
  continue;
165
- batchTokenSlotsLeft -= processAmount;
245
+ }
246
+ batchTokenSlotsLeft -= resolvedProcessAmount;
247
+ currentBatchSize += resolvedProcessAmount;
166
248
  currentBatchItems.push({
167
249
  queuedDecode,
168
- processAmount
250
+ processAmount: resolvedProcessAmount
169
251
  });
170
- currentBatchSize += processAmount;
171
252
  }
253
+ return {
254
+ currentBatchItems,
255
+ currentBatchSize
256
+ };
257
+ };
258
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
259
+ const afterDecodeActions = [];
260
+ const queuedDecodesToDelete = new Set();
261
+ const currentQueuedDecodeItems = new Set();
172
262
  if (currentBatchSize !== 0)
173
263
  this._ctx.initBatch(currentBatchSize);
174
- for (const { queuedDecode, processAmount } of currentBatchItems) {
264
+ for (const { queuedDecode, processAmount } of batchItems) {
175
265
  let batchLogitIndex;
176
266
  try {
177
- batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
267
+ const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
268
+ processAmount === queuedDecode.tokens.length;
269
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
270
+ const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
271
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
272
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
273
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
178
274
  }
179
275
  catch (err) {
180
276
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
@@ -193,8 +289,6 @@ export class LlamaContext {
193
289
  queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
194
290
  queuedDecode.firstTokenSequenceIndex += processAmount;
195
291
  }
196
- if (batchTokenSlotsLeft === 0)
197
- break;
198
292
  }
199
293
  for (let i = 0; i < this._queuedDecodes.length; i++) {
200
294
  const queuedDecode = this._queuedDecodes[i];
@@ -204,14 +298,22 @@ export class LlamaContext {
204
298
  i--;
205
299
  }
206
300
  }
207
- shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
208
- try {
209
- if (currentBatchSize !== 0)
301
+ if (currentBatchSize !== 0) {
302
+ const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
303
+ const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
304
+ ? await allocationResult ?? []
305
+ : allocationResult ?? [];
306
+ try {
307
+ if (threadsToUse != null)
308
+ this._ctx.setThreads(threadsToUse);
210
309
  await this._ctx.decodeBatch();
211
- }
212
- catch (err) {
213
- this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
214
- return;
310
+ consumerHandle?.dispose();
311
+ }
312
+ catch (err) {
313
+ consumerHandle?.dispose();
314
+ this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
315
+ return;
316
+ }
215
317
  }
216
318
  for (const action of afterDecodeActions) {
217
319
  const [accept, reject] = action.response;
@@ -225,15 +327,56 @@ export class LlamaContext {
225
327
  }
226
328
  accept(undefined);
227
329
  }
330
+ };
331
+ const prioritizationStrategy = resolvePrioritizationStrategy();
332
+ if (prioritizationStrategy == null)
333
+ return; // all queued items are rejected and dequeued when we get here
334
+ this._reserveThreads();
335
+ try {
336
+ while (shouldHaveAnotherLoop) {
337
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
338
+ if (orderedQueuedDecodes == null)
339
+ return; // all queued items are rejected and dequeued when we get here
340
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
341
+ let preventDisposalHandle;
342
+ try {
343
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
344
+ }
345
+ catch (err) {
346
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
347
+ return;
348
+ }
349
+ try {
350
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
351
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
352
+ }
353
+ finally {
354
+ preventDisposalHandle.dispose();
355
+ }
356
+ }
357
+ }
358
+ finally {
359
+ this._scheduleToFreeReservedThreads();
228
360
  }
229
361
  });
230
362
  }
363
+ /**
364
+ * Print the timings of token evaluation since that last print for this context.
365
+ *
366
+ * Requires the `performanceTracking` option to be enabled.
367
+ *
368
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
369
+ * it won't print anything.
370
+ */
231
371
  async printTimings() {
372
+ this._ensureNotDisposed();
373
+ if (!this._performanceTracking)
374
+ throw new UnsupportedError("Performance tracking is not enabled");
232
375
  this._ctx.printTimings();
233
376
  await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
234
377
  }
235
378
  /** @internal */
236
- async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
379
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
237
380
  return await new Promise((accept, reject) => {
238
381
  this._queuedDecodes.push({
239
382
  sequenceId,
@@ -241,6 +384,7 @@ export class LlamaContext {
241
384
  firstTokenSequenceIndex,
242
385
  generateLogitAtTheEnd,
243
386
  evaluationPriority,
387
+ tokenMeter,
244
388
  response: [accept, reject],
245
389
  onDone
246
390
  });
@@ -253,16 +397,14 @@ export class LlamaContext {
253
397
  if (this._disposed)
254
398
  return;
255
399
  void withLock(this, "context", async () => {
400
+ if (this._disposed)
401
+ return;
256
402
  this._ctx.disposeSequence(sequenceId);
257
403
  this._unusedSequenceIds.push(sequenceId);
258
404
  this._onReclaimUnusedSequenceId.dispatchEvent();
259
405
  });
260
406
  }
261
407
  /** @internal */
262
- _acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
263
- this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
264
- }
265
- /** @internal */
266
408
  _popSequenceId() {
267
409
  if (this._unusedSequenceIds.length > 0)
268
410
  return this._unusedSequenceIds.shift();
@@ -312,20 +454,177 @@ export class LlamaContext {
312
454
  if (this._disposed)
313
455
  throw new DisposedError();
314
456
  }
457
+ /** @internal */
458
+ async _setLora({ filePath, scale }) {
459
+ const lora = await this._model._getOrLoadLora(filePath);
460
+ this._ctx.setLora(lora, scale ?? defaultLoraScale);
461
+ if (!this._loraAdapters.has(lora)) {
462
+ this._loraAdapters.add(lora);
463
+ lora.usages++;
464
+ }
465
+ }
466
+ /** @internal */
467
+ _reserveThreads() {
468
+ clearTimeout(this._freeReservedThreadsTimeout);
469
+ delete this._freeReservedThreadsTimeout;
470
+ if (this._threadSplitterConsumer != null)
471
+ return;
472
+ this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
473
+ }
474
+ /** @internal */
475
+ _freeReservedThreads() {
476
+ clearTimeout(this._freeReservedThreadsTimeout);
477
+ delete this._freeReservedThreadsTimeout;
478
+ if (this._threadSplitterConsumer == null)
479
+ return;
480
+ this._threadSplitterConsumer.dispose();
481
+ delete this._threadSplitterConsumer;
482
+ }
483
+ /** @internal */
484
+ _scheduleToFreeReservedThreads() {
485
+ if (this._threadSplitterConsumer == null)
486
+ return;
487
+ clearTimeout(this._freeReservedThreadsTimeout);
488
+ this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
489
+ }
490
+ /** @internal */
491
+ static async _create(options, { _model }) {
492
+ const sequences = options.sequences ?? getDefaultContextSequences();
493
+ const flashAttention = _model.flashAttentionSupported
494
+ ? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
495
+ : false;
496
+ const loraOptions = typeof options.lora === "string"
497
+ ? { adapters: [{ filePath: options.lora }] }
498
+ : options.lora;
499
+ let failedCreationRetries = options.failedCreationRemedy === false
500
+ ? 0
501
+ : Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
502
+ const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
503
+ ? 0
504
+ : options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
505
+ let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
506
+ batchSize: options.batchSize,
507
+ sequences: sequences,
508
+ modelGpuLayers: _model.gpuLayers,
509
+ modelTrainContextSize: _model.trainContextSize,
510
+ flashAttention,
511
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
512
+ llamaGpu: _model._llama.gpu,
513
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
514
+ isEmbeddingContext: options._embeddings
515
+ });
516
+ const minContextSize = options.contextSize === "auto"
517
+ ? shrinkRetriesMinContextSize
518
+ : (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
519
+ ? options.contextSize.min
520
+ : typeof options.contextSize === "number"
521
+ ? options.contextSize
522
+ : shrinkRetriesMinContextSize;
523
+ const { createSignal } = options;
524
+ async function createContext(contextSize) {
525
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
526
+ const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
527
+ contextSize,
528
+ sequences,
529
+ isEmbeddingContext: options._embeddings,
530
+ modelGpuLayers: _model.gpuLayers,
531
+ batchSize,
532
+ flashAttention
533
+ }).gpuVram;
534
+ const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
535
+ const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
536
+ ? null
537
+ : _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
538
+ try {
539
+ if (createSignal?.aborted)
540
+ throw createSignal.reason;
541
+ const contextLoaded = await context._ctx.init();
542
+ if (createSignal?.aborted) {
543
+ if (contextLoaded)
544
+ await context._ctx.dispose();
545
+ throw createSignal.reason;
546
+ }
547
+ else if (!contextLoaded)
548
+ throw new Error("Failed to create context");
549
+ contextCreationMemoryReservation?.dispose?.();
550
+ if (loraOptions != null && loraOptions.adapters.length > 0) {
551
+ let loadedAdapters = 0;
552
+ for (const adapter of loraOptions.adapters) {
553
+ try {
554
+ await context._setLora({
555
+ filePath: adapter.filePath,
556
+ scale: adapter.scale
557
+ });
558
+ loadedAdapters++;
559
+ try {
560
+ loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
561
+ }
562
+ catch (err) {
563
+ console.error(err);
564
+ }
565
+ }
566
+ catch (err) {
567
+ await context.dispose();
568
+ throw err;
569
+ }
570
+ if (createSignal?.aborted) {
571
+ await context.dispose();
572
+ throw createSignal.reason;
573
+ }
574
+ }
575
+ }
576
+ else if (loraOptions?.onLoadProgress != null) {
577
+ try {
578
+ loraOptions.onLoadProgress(1);
579
+ }
580
+ catch (err) {
581
+ console.error(err);
582
+ }
583
+ }
584
+ return context;
585
+ }
586
+ finally {
587
+ contextCreationMemoryReservation?.dispose?.();
588
+ }
589
+ }
590
+ while (failedCreationRetries >= 0) {
591
+ try {
592
+ return await createContext(contextSize);
593
+ }
594
+ catch (err) {
595
+ if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
596
+ throw err;
597
+ failedCreationRetries--;
598
+ let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
599
+ ? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
600
+ : Math.floor(failedCreationAutoContextSizeShrink(contextSize));
601
+ if (!Number.isFinite(newContextSize))
602
+ throw err;
603
+ if (newContextSize < minContextSize)
604
+ newContextSize = minContextSize;
605
+ if (newContextSize >= contextSize)
606
+ throw err;
607
+ contextSize = newContextSize;
608
+ }
609
+ }
610
+ throw new Error("Failed to create context");
611
+ }
315
612
  }
316
613
  export class LlamaContextSequence {
317
614
  /** @internal */ _sequenceId;
318
615
  /** @internal */ _gcRegistry;
319
616
  /** @internal */ _context;
320
617
  /** @internal */ _contextShift;
618
+ /** @internal */ _tokenMeter;
321
619
  /** @internal */ _disposeAggregator = new DisposeAggregator();
322
620
  /** @internal */ _contextTokens = [];
323
621
  /** @internal */ _nextTokenIndex = 0;
324
622
  /** @internal */ _disposed = false;
325
623
  onDispose = new EventRelay();
326
- constructor({ sequenceId, context, contextShift }) {
624
+ constructor({ sequenceId, context, tokenMeter, contextShift }) {
327
625
  this._sequenceId = sequenceId;
328
626
  this._context = context;
627
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
329
628
  this._contextShift = contextShift;
330
629
  this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
331
630
  this._gcRegistry.register(this, sequenceId);
@@ -362,6 +661,9 @@ export class LlamaContextSequence {
362
661
  get contextTokens() {
363
662
  return this._contextTokens.slice();
364
663
  }
664
+ get tokenMeter() {
665
+ return this._tokenMeter;
666
+ }
365
667
  get isLoadedToMemory() {
366
668
  return !this._disposed;
367
669
  }
@@ -387,7 +689,7 @@ export class LlamaContextSequence {
387
689
  }
388
690
  /**
389
691
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
390
- * the start and end of each range are exclusive.
692
+ * The start of each range is inclusive, and the end of each range is exclusive.
391
693
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
392
694
  */
393
695
  async eraseContextTokenRanges(ranges) {
@@ -396,6 +698,8 @@ export class LlamaContextSequence {
396
698
  this._ensureNotDisposed();
397
699
  if (ranges.length === 0)
398
700
  return;
701
+ // if the deletion fails, we'll have to dispose the sequence and fill it up again
702
+ let deletionSuccessful = true;
399
703
  const resolvedRanges = ranges
400
704
  .map(({ start, end }) => {
401
705
  if (start === end)
@@ -425,34 +729,42 @@ export class LlamaContextSequence {
425
729
  let lastDeleteRangeEndPos = null;
426
730
  for (const range of resolvedRanges) {
427
731
  this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
428
- this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
429
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
732
+ if (deletionSuccessful)
733
+ deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
734
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
430
735
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
431
736
  removedTokens += range.end - range.start;
432
737
  lastDeleteRangeEndPos = range.end;
433
738
  }
434
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
739
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
435
740
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
436
741
  this._nextTokenIndex -= removedTokens;
742
+ if (deletionSuccessful)
743
+ return;
744
+ const newSequenceTokens = this._contextTokens.slice();
745
+ this._nextTokenIndex = 0;
746
+ this._context._ctx.disposeSequence(this._sequenceId);
747
+ await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
437
748
  });
438
749
  }
439
- /**
440
- * @param tokens
441
- * @param [options]
442
- */
443
- evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
750
+ evaluate(tokens, options = {}) {
751
+ const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
444
752
  return this._evaluate(tokens, {
445
753
  temperature,
754
+ minP,
446
755
  topK,
447
756
  topP,
757
+ seed,
448
758
  grammarEvaluationState,
449
759
  repeatPenalty,
760
+ tokenBias,
450
761
  evaluationPriority,
451
762
  contextShiftOptions: {
452
763
  size: contextShiftSize,
453
764
  strategy: contextShiftStrategy
454
765
  },
455
- yieldEosToken
766
+ yieldEogToken,
767
+ _noSampling
456
768
  });
457
769
  }
458
770
  /**
@@ -475,59 +787,85 @@ export class LlamaContextSequence {
475
787
  }
476
788
  }
477
789
  /** @internal */
478
- async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
790
+ async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
479
791
  this._ensureNotDisposed();
480
792
  let evalTokens = tokens;
481
793
  if (evalTokens.length === 0)
482
794
  return;
483
- // eslint-disable-next-line no-constant-condition
484
- while (true) {
485
- this._ensureNotDisposed();
486
- // Evaluate to get the next token.
487
- const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
488
- const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
489
- ? repeatPenalty.punishTokens()
490
- : repeatPenalty?.punishTokens;
491
- const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
492
- ? grammarEvaluationState()
493
- : grammarEvaluationState;
494
- if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
495
- throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
496
- return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
497
- temperature,
498
- topK,
499
- topP,
500
- repeatPenalty: repeatPenalty?.penalty,
501
- repeatPenaltyTokens: repeatPenaltyTokens != null
502
- ? Uint32Array.from(repeatPenaltyTokens)
503
- : undefined,
504
- repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
505
- repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
506
- grammarEvaluationState: resolvedGrammarEvaluationState?._state
507
- }));
508
- });
509
- if (nextToken == null)
510
- return;
511
- // the model finished generating text
512
- if (!yieldEosToken && nextToken === this._context.model.tokens.eos)
513
- break;
514
- yield nextToken;
515
- // Create tokens for the next eval.
516
- evalTokens = [nextToken];
795
+ const sampler = new LlamaSampler(this.model);
796
+ try {
797
+ while (true) {
798
+ this._ensureNotDisposed();
799
+ // Evaluate to get the next token.
800
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
801
+ if (_noSampling)
802
+ return null;
803
+ const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
804
+ ? repeatPenalty.punishTokens()
805
+ : repeatPenalty?.punishTokens;
806
+ const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
807
+ const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
808
+ ? grammarEvaluationState()
809
+ : grammarEvaluationState;
810
+ if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
811
+ throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
812
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
813
+ sampler.applyConfig(removeNullFields({
814
+ temperature,
815
+ minP,
816
+ topK,
817
+ topP,
818
+ seed: Math.max(0, Number.isFinite(seed)
819
+ ? Math.floor(seed ?? (Date.now() / 1000))
820
+ : Math.floor(Date.now() / 1000)),
821
+ repeatPenalty: repeatPenalty?.penalty,
822
+ repeatPenaltyMaxTokens: maxPunishTokens,
823
+ repeatPenaltyTokens: repeatPenaltyTokens != null
824
+ ? Uint32Array.from(repeatPenaltyTokens)
825
+ : undefined,
826
+ repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
827
+ repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
828
+ tokenBiasKeys,
829
+ tokenBiasValues,
830
+ grammarEvaluationState: resolvedGrammarEvaluationState?._state
831
+ }));
832
+ return withLock(sampler, "sample", async () => {
833
+ if (sampler.disposed)
834
+ return null;
835
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
836
+ });
837
+ });
838
+ if (nextToken === -1)
839
+ throw new Error("Failed to sample next token");
840
+ if (nextToken == null)
841
+ return;
842
+ // the model finished generating text
843
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
844
+ break;
845
+ const replacementToken = (yield nextToken);
846
+ // set the tokens for the next evaluation
847
+ if (replacementToken != null)
848
+ evalTokens = [replacementToken];
849
+ else
850
+ evalTokens = [nextToken];
851
+ }
852
+ }
853
+ finally {
854
+ void withLock(sampler, "sample", sampler.asyncDispose);
517
855
  }
518
856
  }
519
857
  /** @internal */
520
- async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
858
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
521
859
  this._ensureNotDisposed();
522
860
  const tokensLeftToDecode = tokens.slice();
523
861
  return await withLock(this, "evaluate", async () => {
524
862
  while (tokensLeftToDecode.length > 0) {
525
863
  this._ensureNotDisposed();
526
- let freeSpace = this._context.contextSize - this._nextTokenIndex;
527
- if (freeSpace <= 1) {
864
+ let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
865
+ if (freeSpace <= 0) {
528
866
  await this._freeUpSpaceForTokens(contextShiftOptions);
529
- freeSpace = this._context.contextSize - this._nextTokenIndex;
530
- if (freeSpace <= 1)
867
+ freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
868
+ if (freeSpace <= 0)
531
869
  throw new Error("Failed to free up space for new tokens");
532
870
  }
533
871
  const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
@@ -537,7 +875,8 @@ export class LlamaContextSequence {
537
875
  tokens: tokensToDecode,
538
876
  firstTokenSequenceIndex: this._nextTokenIndex,
539
877
  generateLogitAtTheEnd,
540
- evaluationPriority
878
+ evaluationPriority,
879
+ tokenMeter
541
880
  }, !generateLogitAtTheEnd
542
881
  ? undefined
543
882
  : onDecodeDone);
@@ -557,7 +896,10 @@ export class LlamaContextSequence {
557
896
  : contextShiftOptions.size));
558
897
  this._ensureNotDisposed();
559
898
  if (contextShiftOptions.strategy === "eraseBeginning") {
560
- await this.eraseContextTokenRanges([{ start: 0, end: size }]);
899
+ let eraseStartIndex = 0;
900
+ if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
901
+ eraseStartIndex = 1;
902
+ await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
561
903
  }
562
904
  else {
563
905
  const ranges = await contextShiftOptions.strategy({
@@ -567,7 +909,7 @@ export class LlamaContextSequence {
567
909
  if (ranges == null)
568
910
  throw new Error("Invalid delete ranges");
569
911
  await this.eraseContextTokenRanges(ranges);
570
- if (this.nextTokenIndex >= this._context.contextSize)
912
+ if (this.nextTokenIndex >= this._context.contextSize - 1)
571
913
  await this.eraseContextTokenRanges([{ start: 0, end: size }]);
572
914
  }
573
915
  }
@@ -580,10 +922,11 @@ export class LlamaContextSequence {
580
922
  * We need this to make it impossible to manually create instances of this class outside the code of this library
581
923
  * @internal
582
924
  */
583
- static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
925
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
584
926
  return new LlamaContextSequence({
585
927
  sequenceId,
586
928
  context,
929
+ tokenMeter,
587
930
  contextShift: {
588
931
  size: contextShiftSize,
589
932
  strategy: contextShiftStrategy
@@ -591,14 +934,52 @@ export class LlamaContextSequence {
591
934
  });
592
935
  }
593
936
  }
937
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
938
+ if (tokenBias == null)
939
+ return {
940
+ tokenBiasKeys: undefined,
941
+ tokenBiasValues: undefined
942
+ };
943
+ if (tokenBias instanceof Function)
944
+ tokenBias = tokenBias();
945
+ if (tokenBias._tokenizer !== currentModel.tokenizer)
946
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
947
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
948
+ const tokenBiasKeys = [];
949
+ const tokenBiasValues = [];
950
+ for (const [token, bias] of tokenBias._biases) {
951
+ tokenBiasKeys.push(token);
952
+ tokenBiasValues.push(bias);
953
+ }
954
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
955
+ return {
956
+ tokenBiasKeys: undefined,
957
+ tokenBiasValues: undefined
958
+ };
959
+ }
960
+ return {
961
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
962
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
963
+ };
964
+ }
594
965
  function disposeContextIfReferenced(contextRef) {
595
966
  const context = contextRef.deref();
596
967
  if (context != null)
597
- context.dispose();
968
+ void context.dispose();
598
969
  }
599
970
  function disposeContextSequenceIfReferenced(contextRef) {
600
971
  const context = contextRef.deref();
601
972
  if (context != null)
602
973
  context.dispose();
603
974
  }
975
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
976
+ return Math.min(contextSize * sequences, 512);
977
+ }
978
+ export function getDefaultContextSequences() {
979
+ return 1;
980
+ }
981
+ const defaultFallbackContextSize = 4096;
982
+ export function getDefaultModelContextSize({ trainContextSize }) {
983
+ return trainContextSize ?? defaultFallbackContextSize;
984
+ }
604
985
  //# sourceMappingURL=LlamaContext.js.map