node-llama-cpp 3.0.0-beta.3 → 3.0.0-beta.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (690) hide show
  1. package/README.md +14 -11
  2. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
  3. package/bins/linux-arm64/llama-addon.node +0 -0
  4. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  5. package/bins/linux-armv7l/llama-addon.node +0 -0
  6. package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
  7. package/bins/linux-x64/llama-addon.node +0 -0
  8. package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  9. package/bins/linux-x64-cuda/llama-addon.node +0 -0
  10. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  11. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  12. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  13. package/bins/mac-arm64-metal/default.metallib +0 -0
  14. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  15. package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
  16. package/bins/mac-x64/llama-addon.node +0 -0
  17. package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
  18. package/bins/win-arm64/llama-addon.exp +0 -0
  19. package/bins/win-arm64/llama-addon.lib +0 -0
  20. package/bins/win-arm64/llama-addon.node +0 -0
  21. package/bins/win-x64/_nlcBuildMetadata.json +1 -0
  22. package/bins/win-x64/llama-addon.exp +0 -0
  23. package/bins/win-x64/llama-addon.lib +0 -0
  24. package/bins/win-x64/llama-addon.node +0 -0
  25. package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  26. package/bins/win-x64-cuda/llama-addon.exp +0 -0
  27. package/bins/win-x64-cuda/llama-addon.lib +0 -0
  28. package/bins/win-x64-cuda/llama-addon.node +0 -0
  29. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  30. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  31. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  32. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  33. package/dist/ChatWrapper.d.ts +8 -39
  34. package/dist/ChatWrapper.js +115 -72
  35. package/dist/ChatWrapper.js.map +1 -1
  36. package/dist/apiDocsIndex.d.ts +1 -0
  37. package/dist/apiDocsIndex.js +7 -0
  38. package/dist/apiDocsIndex.js.map +1 -0
  39. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -8
  40. package/dist/bindings/AddonTypes.js +2 -0
  41. package/dist/bindings/AddonTypes.js.map +1 -0
  42. package/dist/bindings/Llama.d.ts +47 -0
  43. package/dist/bindings/Llama.js +353 -0
  44. package/dist/bindings/Llama.js.map +1 -0
  45. package/dist/bindings/consts.d.ts +2 -0
  46. package/dist/bindings/consts.js +11 -0
  47. package/dist/bindings/consts.js.map +1 -0
  48. package/dist/bindings/getLlama.d.ts +148 -0
  49. package/dist/bindings/getLlama.js +401 -0
  50. package/dist/bindings/getLlama.js.map +1 -0
  51. package/dist/bindings/types.d.ts +56 -0
  52. package/dist/bindings/types.js +77 -0
  53. package/dist/bindings/types.js.map +1 -0
  54. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  55. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  56. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  57. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  58. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  59. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  60. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  61. package/dist/bindings/utils/asyncEvery.js +15 -0
  62. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  63. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  64. package/dist/bindings/utils/asyncSome.js +27 -0
  65. package/dist/bindings/utils/asyncSome.js.map +1 -0
  66. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  67. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  68. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  69. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  70. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  71. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  72. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  73. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  74. package/dist/bindings/utils/compileLLamaCpp.d.ts +17 -0
  75. package/dist/bindings/utils/compileLLamaCpp.js +226 -0
  76. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  77. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  78. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  79. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  80. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  81. package/dist/bindings/utils/detectGlibc.js +46 -0
  82. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  83. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
  84. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  85. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  86. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  87. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  88. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  89. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  90. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  91. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  92. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  93. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  94. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  95. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  96. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  97. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  98. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  99. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  100. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  101. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  102. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  103. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  104. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  105. package/dist/bindings/utils/getPlatform.js +15 -0
  106. package/dist/bindings/utils/getPlatform.js.map +1 -0
  107. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  108. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  109. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  110. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  111. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  112. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  113. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  114. package/dist/bindings/utils/hasFileInPath.js +34 -0
  115. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  116. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  117. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  118. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  119. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  120. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  121. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  122. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  123. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  124. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  125. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  126. package/dist/bindings/utils/resolveCustomCmakeOptions.js +47 -0
  127. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  128. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  129. package/dist/bindings/utils/testBindingBinary.js +100 -0
  130. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  131. package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
  132. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  133. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  134. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  135. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  136. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  137. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
  138. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  139. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  140. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  141. package/dist/chatWrappers/FalconChatWrapper.js +38 -21
  142. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  143. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  144. package/dist/chatWrappers/FunctionaryChatWrapper.js +323 -118
  145. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  146. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  147. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  148. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  149. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  150. package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
  151. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  152. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  153. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
  154. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  155. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  156. package/dist/chatWrappers/Llama3ChatWrapper.js +174 -0
  157. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  158. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +67 -0
  159. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
  160. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  161. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
  162. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  163. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  164. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  165. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  166. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  167. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  168. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  169. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  170. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  171. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
  172. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  173. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  174. package/dist/chatWrappers/utils/resolveChatWrapper.js +243 -0
  175. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  176. package/dist/cli/cli.js +21 -7
  177. package/dist/cli/cli.js.map +1 -1
  178. package/dist/cli/commands/BuildCommand.d.ts +11 -4
  179. package/dist/cli/commands/BuildCommand.js +114 -41
  180. package/dist/cli/commands/BuildCommand.js.map +1 -1
  181. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  182. package/dist/cli/commands/ChatCommand.js +299 -143
  183. package/dist/cli/commands/ChatCommand.js.map +1 -1
  184. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  185. package/dist/cli/commands/ClearCommand.js +11 -12
  186. package/dist/cli/commands/ClearCommand.js.map +1 -1
  187. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  188. package/dist/cli/commands/CompleteCommand.js +365 -0
  189. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  190. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  191. package/dist/cli/commands/DebugCommand.js +54 -0
  192. package/dist/cli/commands/DebugCommand.js.map +1 -0
  193. package/dist/cli/commands/DownloadCommand.d.ts +7 -4
  194. package/dist/cli/commands/DownloadCommand.js +121 -70
  195. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  196. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  197. package/dist/cli/commands/InfillCommand.js +401 -0
  198. package/dist/cli/commands/InfillCommand.js.map +1 -0
  199. package/dist/cli/commands/InitCommand.d.ts +11 -0
  200. package/dist/cli/commands/InitCommand.js +195 -0
  201. package/dist/cli/commands/InitCommand.js.map +1 -0
  202. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  203. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  204. package/dist/cli/commands/PullCommand.d.ts +12 -0
  205. package/dist/cli/commands/PullCommand.js +117 -0
  206. package/dist/cli/commands/PullCommand.js.map +1 -0
  207. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  208. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  209. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  210. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  211. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  212. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  213. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  214. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +164 -0
  215. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  216. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  217. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  218. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  219. package/dist/cli/projectTemplates.d.ts +7 -0
  220. package/dist/cli/projectTemplates.js +10 -0
  221. package/dist/cli/projectTemplates.js.map +1 -0
  222. package/dist/cli/recommendedModels.d.ts +2 -0
  223. package/dist/cli/recommendedModels.js +363 -0
  224. package/dist/cli/recommendedModels.js.map +1 -0
  225. package/dist/cli/startCreateCli.d.ts +2 -0
  226. package/dist/cli/startCreateCli.js +26 -0
  227. package/dist/cli/startCreateCli.js.map +1 -0
  228. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  229. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  230. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  231. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  232. package/dist/cli/utils/ConsoleTable.js +86 -0
  233. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  234. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  235. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  236. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  237. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  238. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  239. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  240. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  241. package/dist/cli/utils/getReadablePath.js +14 -0
  242. package/dist/cli/utils/getReadablePath.js.map +1 -0
  243. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  244. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  245. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  246. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  247. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  248. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  249. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  250. package/dist/cli/utils/printCommonInfoLines.js +75 -0
  251. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  252. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  253. package/dist/cli/utils/printInfoLine.js +54 -0
  254. package/dist/cli/utils/printInfoLine.js.map +1 -0
  255. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  256. package/dist/cli/utils/projectTemplates.js +47 -0
  257. package/dist/cli/utils/projectTemplates.js.map +1 -0
  258. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  259. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  260. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  261. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  262. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  263. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  264. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  265. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  266. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  267. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  268. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  269. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  270. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  271. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  272. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  273. package/dist/commands.d.ts +1 -0
  274. package/dist/commands.js +3 -0
  275. package/dist/commands.js.map +1 -1
  276. package/dist/config.d.ts +38 -5
  277. package/dist/config.js +61 -16
  278. package/dist/config.js.map +1 -1
  279. package/dist/consts.d.ts +3 -0
  280. package/dist/consts.js +10 -0
  281. package/dist/consts.js.map +1 -0
  282. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +112 -39
  283. package/dist/evaluator/LlamaChat/LlamaChat.js +1512 -0
  284. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  285. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  286. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  287. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  289. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  290. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  291. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  292. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  293. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  294. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
  295. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  296. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +288 -0
  297. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +419 -0
  298. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  299. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
  300. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  301. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  302. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  303. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  304. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  305. package/dist/evaluator/LlamaCompletion.d.ts +143 -0
  306. package/dist/evaluator/LlamaCompletion.js +418 -0
  307. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  308. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -21
  309. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +270 -81
  310. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  311. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  312. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  313. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  314. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  315. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  316. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  317. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  318. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  319. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  320. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  321. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +10 -7
  322. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -11
  323. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  324. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  325. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  326. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  327. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  328. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  329. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +236 -0
  330. package/dist/evaluator/LlamaModel/LlamaModel.js +679 -0
  331. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  332. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  333. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  334. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  335. package/dist/evaluator/TokenBias.d.ts +22 -0
  336. package/dist/evaluator/TokenBias.js +33 -0
  337. package/dist/evaluator/TokenBias.js.map +1 -0
  338. package/dist/evaluator/TokenMeter.d.ts +54 -0
  339. package/dist/evaluator/TokenMeter.js +86 -0
  340. package/dist/evaluator/TokenMeter.js.map +1 -0
  341. package/dist/gguf/consts.d.ts +3 -0
  342. package/dist/gguf/consts.js +8 -0
  343. package/dist/gguf/consts.js.map +1 -0
  344. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  345. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  346. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  347. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  348. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  349. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  350. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  351. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  352. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  353. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  354. package/dist/gguf/fileReaders/GgufFsFileReader.js +46 -0
  355. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  356. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  357. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  358. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  359. package/dist/gguf/insights/GgufInsights.d.ts +48 -0
  360. package/dist/gguf/insights/GgufInsights.js +381 -0
  361. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  362. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  363. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +141 -0
  364. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  365. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  366. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  367. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  368. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  369. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  370. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  371. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  372. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  373. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  374. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  375. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  376. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  377. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  378. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  379. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  380. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  381. package/dist/gguf/parser/parseGguf.js +63 -0
  382. package/dist/gguf/parser/parseGguf.js.map +1 -0
  383. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  384. package/dist/gguf/readGgufFileInfo.js +66 -0
  385. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  386. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  387. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  388. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  389. package/dist/gguf/types/GgufMetadataTypes.d.ts +335 -0
  390. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  391. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  392. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  393. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  394. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  395. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  396. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  397. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  398. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  399. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  400. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  401. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  402. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  403. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  404. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  405. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  406. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  407. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  408. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  409. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  410. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  411. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  412. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  413. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  414. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  415. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  416. package/dist/index.d.ts +41 -18
  417. package/dist/index.js +36 -15
  418. package/dist/index.js.map +1 -1
  419. package/dist/state.d.ts +4 -0
  420. package/dist/state.js +14 -0
  421. package/dist/state.js.map +1 -1
  422. package/dist/types.d.ts +116 -5
  423. package/dist/types.js.map +1 -1
  424. package/dist/utils/DisposeGuard.d.ts +13 -0
  425. package/dist/utils/DisposeGuard.js +120 -0
  426. package/dist/utils/DisposeGuard.js.map +1 -0
  427. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  428. package/dist/utils/InsufficientMemoryError.js +6 -0
  429. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  430. package/dist/utils/LlamaText.d.ts +70 -26
  431. package/dist/utils/LlamaText.js +469 -157
  432. package/dist/utils/LlamaText.js.map +1 -1
  433. package/dist/utils/LruCache.d.ts +12 -0
  434. package/dist/utils/LruCache.js +44 -0
  435. package/dist/utils/LruCache.js.map +1 -0
  436. package/dist/utils/ReplHistory.js.map +1 -1
  437. package/dist/utils/StopGenerationDetector.d.ts +25 -9
  438. package/dist/utils/StopGenerationDetector.js +93 -22
  439. package/dist/utils/StopGenerationDetector.js.map +1 -1
  440. package/dist/utils/TokenStreamRegulator.d.ts +9 -4
  441. package/dist/utils/TokenStreamRegulator.js +81 -8
  442. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  443. package/dist/utils/UnsupportedError.d.ts +2 -0
  444. package/dist/utils/UnsupportedError.js +7 -0
  445. package/dist/utils/UnsupportedError.js.map +1 -0
  446. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  447. package/dist/utils/clearTempFolder.js.map +1 -1
  448. package/dist/utils/cmake.js +38 -20
  449. package/dist/utils/cmake.js.map +1 -1
  450. package/dist/utils/createModelDownloader.d.ts +108 -0
  451. package/dist/utils/createModelDownloader.js +231 -0
  452. package/dist/utils/createModelDownloader.js.map +1 -0
  453. package/dist/utils/findBestOption.d.ts +4 -0
  454. package/dist/utils/findBestOption.js +15 -0
  455. package/dist/utils/findBestOption.js.map +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  458. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  459. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  462. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  465. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  477. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  482. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  484. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  489. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  492. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  493. package/dist/utils/gbnfJson/types.d.ts +3 -0
  494. package/dist/utils/gbnfJson/types.js.map +1 -1
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  497. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  500. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
  502. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  503. package/dist/utils/getBuildDefaults.d.ts +1 -2
  504. package/dist/utils/getBuildDefaults.js +2 -3
  505. package/dist/utils/getBuildDefaults.js.map +1 -1
  506. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  507. package/dist/utils/getConsoleLogPrefix.js +10 -0
  508. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  509. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  510. package/dist/utils/getGrammarsFolder.js +8 -7
  511. package/dist/utils/getGrammarsFolder.js.map +1 -1
  512. package/dist/utils/getModuleVersion.d.ts +1 -0
  513. package/dist/utils/getModuleVersion.js +13 -0
  514. package/dist/utils/getModuleVersion.js.map +1 -0
  515. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  516. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  517. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  518. package/dist/utils/getReadableContextSize.d.ts +1 -0
  519. package/dist/utils/getReadableContextSize.js +7 -0
  520. package/dist/utils/getReadableContextSize.js.map +1 -0
  521. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  522. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  523. package/dist/utils/gitReleaseBundles.js +73 -5
  524. package/dist/utils/gitReleaseBundles.js.map +1 -1
  525. package/dist/utils/hashString.d.ts +1 -0
  526. package/dist/utils/hashString.js +8 -0
  527. package/dist/utils/hashString.js.map +1 -0
  528. package/dist/utils/isLockfileActive.d.ts +4 -0
  529. package/dist/utils/isLockfileActive.js +12 -0
  530. package/dist/utils/isLockfileActive.js.map +1 -0
  531. package/dist/utils/isToken.d.ts +2 -0
  532. package/dist/utils/isToken.js +4 -0
  533. package/dist/utils/isToken.js.map +1 -0
  534. package/dist/utils/isUrl.d.ts +1 -0
  535. package/dist/utils/isUrl.js +15 -0
  536. package/dist/utils/isUrl.js.map +1 -0
  537. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  538. package/dist/utils/mergeUnionTypes.js +2 -0
  539. package/dist/utils/mergeUnionTypes.js.map +1 -0
  540. package/dist/utils/parseModelFileName.d.ts +1 -0
  541. package/dist/utils/parseModelFileName.js +6 -1
  542. package/dist/utils/parseModelFileName.js.map +1 -1
  543. package/dist/utils/parseTextTemplate.d.ts +66 -0
  544. package/dist/utils/parseTextTemplate.js +116 -0
  545. package/dist/utils/parseTextTemplate.js.map +1 -0
  546. package/dist/utils/prettyPrintObject.d.ts +10 -0
  547. package/dist/utils/prettyPrintObject.js +84 -0
  548. package/dist/utils/prettyPrintObject.js.map +1 -0
  549. package/dist/utils/removeNullFields.d.ts +2 -1
  550. package/dist/utils/removeNullFields.js +8 -0
  551. package/dist/utils/removeNullFields.js.map +1 -1
  552. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  553. package/dist/utils/resolveGithubRelease.js +36 -0
  554. package/dist/utils/resolveGithubRelease.js.map +1 -0
  555. package/dist/utils/runtime.d.ts +4 -0
  556. package/dist/utils/runtime.js +8 -0
  557. package/dist/utils/runtime.js.map +1 -0
  558. package/dist/utils/safeEventCallback.d.ts +6 -0
  559. package/dist/utils/safeEventCallback.js +29 -0
  560. package/dist/utils/safeEventCallback.js.map +1 -0
  561. package/dist/utils/spawnCommand.d.ts +11 -1
  562. package/dist/utils/spawnCommand.js +56 -6
  563. package/dist/utils/spawnCommand.js.map +1 -1
  564. package/dist/utils/tokenizeInput.d.ts +3 -0
  565. package/dist/utils/tokenizeInput.js +12 -0
  566. package/dist/utils/tokenizeInput.js.map +1 -0
  567. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js +30 -0
  569. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  570. package/dist/utils/utilTypes.d.ts +3 -0
  571. package/dist/utils/utilTypes.js +2 -0
  572. package/dist/utils/utilTypes.js.map +1 -0
  573. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  574. package/dist/utils/waitForLockfileRelease.js +20 -0
  575. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  576. package/dist/utils/withLockfile.d.ts +7 -0
  577. package/dist/utils/withLockfile.js +44 -0
  578. package/dist/utils/withLockfile.js.map +1 -0
  579. package/dist/utils/withOra.d.ts +2 -0
  580. package/dist/utils/withOra.js +22 -6
  581. package/dist/utils/withOra.js.map +1 -1
  582. package/dist/utils/withProgressLog.d.ts +23 -0
  583. package/dist/utils/withProgressLog.js +211 -0
  584. package/dist/utils/withProgressLog.js.map +1 -0
  585. package/dist/utils/withStatusLogs.d.ts +2 -1
  586. package/dist/utils/withStatusLogs.js +12 -9
  587. package/dist/utils/withStatusLogs.js.map +1 -1
  588. package/dist/utils/wrapAbortSignal.d.ts +2 -0
  589. package/dist/utils/wrapAbortSignal.js +9 -0
  590. package/dist/utils/wrapAbortSignal.js.map +1 -0
  591. package/llama/.clang-format +1 -2
  592. package/llama/CMakeLists.txt +115 -4
  593. package/llama/addon.cpp +1300 -97
  594. package/llama/binariesGithubRelease.json +1 -1
  595. package/llama/gitRelease.bundle +0 -0
  596. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  597. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  598. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  599. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  600. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  601. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  602. package/llama/grammars/README.md +58 -5
  603. package/llama/grammars/json.gbnf +4 -4
  604. package/llama/grammars/json_arr.gbnf +4 -4
  605. package/llama/llama.cpp.info.json +4 -0
  606. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  607. package/package.json +78 -53
  608. package/templates/packed/electron-typescript-react.json +1 -0
  609. package/templates/packed/node-typescript.json +1 -0
  610. package/dist/AbortError.d.ts +0 -2
  611. package/dist/AbortError.js +0 -7
  612. package/dist/AbortError.js.map +0 -1
  613. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  614. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  615. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  616. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  617. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  618. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  619. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  620. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  621. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
  622. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  623. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
  624. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
  625. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  626. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  627. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
  628. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
  629. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  630. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  631. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  632. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
  633. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  634. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  635. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  636. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  637. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  638. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
  639. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
  640. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
  641. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  642. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  643. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  644. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  645. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  646. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  647. package/dist/utils/binariesGithubRelease.js.map +0 -1
  648. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  649. package/dist/utils/clearLlamaBuild.js +0 -12
  650. package/dist/utils/clearLlamaBuild.js.map +0 -1
  651. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  652. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  653. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  654. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  655. package/dist/utils/compileLLamaCpp.js +0 -132
  656. package/dist/utils/compileLLamaCpp.js.map +0 -1
  657. package/dist/utils/getBin.js +0 -78
  658. package/dist/utils/getBin.js.map +0 -1
  659. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  660. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  661. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  662. package/dist/utils/getReleaseInfo.d.ts +0 -7
  663. package/dist/utils/getReleaseInfo.js +0 -30
  664. package/dist/utils/getReleaseInfo.js.map +0 -1
  665. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  666. package/dist/utils/parseModelTypeDescription.js +0 -9
  667. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  668. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  669. package/dist/utils/resolveChatWrapper.js +0 -16
  670. package/dist/utils/resolveChatWrapper.js.map +0 -1
  671. package/dist/utils/usedBinFlag.d.ts +0 -6
  672. package/dist/utils/usedBinFlag.js +0 -15
  673. package/dist/utils/usedBinFlag.js.map +0 -1
  674. package/llama/usedBin.json +0 -3
  675. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  676. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  677. package/llamaBins/linux-x64/llama-addon.node +0 -0
  678. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  679. package/llamaBins/mac-x64/llama-addon.node +0 -0
  680. package/llamaBins/win-x64/llama-addon.exp +0 -0
  681. package/llamaBins/win-x64/llama-addon.lib +0 -0
  682. package/llamaBins/win-x64/llama-addon.node +0 -0
  683. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  684. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  685. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  686. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  687. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  688. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  689. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  690. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -1,11 +1,14 @@
1
- import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
1
+ import { DisposeAggregator, EventRelay, withLock, DisposedError, AsyncDisposeAggregator } from "lifecycle-utils";
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
- import { AddonContext } from "../LlamaBins.js";
4
3
  import { compareTokens } from "../../utils/compareTokens.js";
5
- import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
4
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
5
+ import { TokenMeter } from "../TokenMeter.js";
6
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
6
7
  export class LlamaContext {
8
+ /** @internal */ _llama;
7
9
  /** @internal */ _ctx;
8
10
  /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
11
+ /** @internal */ _backendContextDisposeGuard;
9
12
  /** @internal */ _model;
10
13
  /** @internal */ _contextSize;
11
14
  /** @internal */ _batchSize;
@@ -14,7 +17,8 @@ export class LlamaContext {
14
17
  /** @internal */ _batchingOptions;
15
18
  /** @internal */ _queuedDecodeSequenceIds = new Set();
16
19
  /** @internal */ _queuedDecodes = [];
17
- /** @internal */ _disposeAggregator = new DisposeAggregator();
20
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
21
+ /** @internal */ _modelPreventDisposalHandle;
18
22
  /** @internal */ _nextGeneratedSequenceId = 0;
19
23
  /** @internal */ _dispatchDecodeScheduled = false;
20
24
  /** @internal */ _batchDispatchPending = false;
@@ -22,44 +26,50 @@ export class LlamaContext {
22
26
  /** @internal */ _allocatedContextSize;
23
27
  /** @internal */ _disposed = false;
24
28
  onDispose = new EventRelay();
25
- /**
26
- * @param options
27
- */
28
- constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
29
- if (model.disposed)
29
+ constructor({ _model }, { sequences, seed = null, contextSize, batchSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
30
+ if (_model.disposed)
30
31
  throw new DisposedError();
31
- this._model = model;
32
+ this._llama = _model._llama;
33
+ this._model = _model;
34
+ this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
35
+ this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
32
36
  this._totalSequences = Math.max(1, Math.floor(sequences));
33
37
  this._contextSize = Math.max(2, contextSize);
34
38
  this._batchSize = Math.max(batchSize, this._totalSequences);
35
- this._ctx = new AddonContext(this._model._model, removeNullFields({
39
+ this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
36
40
  seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
37
- contextSize: contextSize * this._totalSequences,
41
+ contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
38
42
  batchSize: this._batchSize,
43
+ sequences: this._totalSequences,
39
44
  threads: Math.max(0, Math.floor(threads)),
40
- embedding: _embedding,
45
+ embeddings: _embeddings,
41
46
  noSeed: _noSeed
42
47
  }));
43
48
  this._batchingOptions = {
44
49
  dispatchSchedule: batchingDispatchSchedule,
45
- itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
50
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
46
51
  };
47
52
  this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
48
- this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
49
- this._disposeAggregator.add(this.onDispose.dispatchEvent);
50
53
  this._disposeAggregator.add(() => {
51
- this._ctx.dispose();
54
+ this._disposed = true;
52
55
  });
56
+ this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
57
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
53
58
  this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
59
+ this._disposeAggregator.add(async () => {
60
+ await this._backendContextDisposeGuard.acquireDisposeLock();
61
+ await this._ctx.dispose();
62
+ this._modelPreventDisposalHandle.dispose();
63
+ });
54
64
  }
55
- dispose() {
65
+ async dispose() {
56
66
  if (this._disposed)
57
67
  return;
58
68
  this._disposed = true;
59
- this._disposeAggregator.dispose();
69
+ await this._disposeAggregator.dispose();
60
70
  }
61
71
  /** @hidden */
62
- [Symbol.dispose]() {
72
+ [Symbol.asyncDispose]() {
63
73
  return this.dispose();
64
74
  }
65
75
  get disposed() {
@@ -74,6 +84,14 @@ export class LlamaContext {
74
84
  get batchSize() {
75
85
  return this._batchSize;
76
86
  }
87
+ /**
88
+ * The actual size of the state in the memory in bytes.
89
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
90
+ */
91
+ get stateSize() {
92
+ this._ensureNotDisposed();
93
+ return this._ctx.getStateSize();
94
+ }
77
95
  getAllocatedContextSize() {
78
96
  this._ensureNotDisposed();
79
97
  if (this._allocatedContextSize == null)
@@ -89,9 +107,9 @@ export class LlamaContext {
89
107
  /**
90
108
  * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
91
109
  * When there are no sequences left, this method will throw an error.
92
- * @param [options]
93
110
  */
94
- getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
111
+ getSequence(options = {}) {
112
+ const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
95
113
  this._ensureNotDisposed();
96
114
  const nextSequenceId = this._popSequenceId();
97
115
  if (nextSequenceId == null)
@@ -99,6 +117,7 @@ export class LlamaContext {
99
117
  return LlamaContextSequence._create({
100
118
  sequenceId: nextSequenceId,
101
119
  context: this,
120
+ tokenMeter: _tokenMeter,
102
121
  contextShift: {
103
122
  size: contextShiftSize,
104
123
  strategy: contextShiftStrategy
@@ -115,17 +134,18 @@ export class LlamaContext {
115
134
  this._currentDispatchBatchHandle = {};
116
135
  this._dispatchDecodeScheduled = false;
117
136
  this._batchDispatchPending = false;
118
- let prioritizeStrategy;
119
- try {
120
- this._ensureNotDisposed();
121
- prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
122
- }
123
- catch (err) {
124
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
125
- return;
126
- }
127
- let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
128
- while (shouldHaveAnotherBatch) {
137
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
138
+ const resolvePrioritizationStrategy = () => {
139
+ try {
140
+ this._ensureNotDisposed();
141
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
142
+ }
143
+ catch (err) {
144
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
145
+ }
146
+ return null;
147
+ };
148
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
129
149
  const batchItemToQueuedDecodeMap = new Map();
130
150
  const batchItemsList = [];
131
151
  for (const queuedDecode of this._queuedDecodes) {
@@ -138,42 +158,65 @@ export class LlamaContext {
138
158
  }
139
159
  let prioritizedItems;
140
160
  try {
141
- prioritizedItems = prioritizeStrategy({
161
+ prioritizedItems = prioritizationStrategy({
142
162
  items: batchItemsList,
143
163
  size: this._batchSize
144
164
  });
145
165
  }
146
166
  catch (err) {
147
167
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
148
- return;
168
+ return null;
149
169
  }
150
- let batchTokenSlotsLeft = this._batchSize;
151
- const afterDecodeActions = [];
152
- const queuedDecodesToDelete = new Set();
153
- const currentQueuedDecodeItems = new Set();
154
- const currentBatchItems = [];
155
- let currentBatchSize = 0;
156
- for (const prioritizedItem of prioritizedItems) {
170
+ return prioritizedItems.map((prioritizedItem) => {
157
171
  const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
158
172
  if (queuedDecode == null)
159
173
  throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
160
174
  "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
161
- const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
162
- if (processAmount <= 0)
175
+ return {
176
+ queuedDecode,
177
+ processAmount: prioritizedItem.processAmount
178
+ };
179
+ });
180
+ };
181
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
182
+ const currentBatchItems = [];
183
+ let currentBatchSize = 0;
184
+ let batchTokenSlotsLeft = batchSize;
185
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
186
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
187
+ if (resolvedProcessAmount <= 0) {
188
+ if (batchTokenSlotsLeft === 0)
189
+ break;
163
190
  continue;
164
- batchTokenSlotsLeft -= processAmount;
191
+ }
192
+ batchTokenSlotsLeft -= resolvedProcessAmount;
193
+ currentBatchSize += resolvedProcessAmount;
165
194
  currentBatchItems.push({
166
195
  queuedDecode,
167
- processAmount
196
+ processAmount: resolvedProcessAmount
168
197
  });
169
- currentBatchSize += processAmount;
170
198
  }
199
+ return {
200
+ currentBatchItems,
201
+ currentBatchSize
202
+ };
203
+ };
204
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
205
+ const afterDecodeActions = [];
206
+ const queuedDecodesToDelete = new Set();
207
+ const currentQueuedDecodeItems = new Set();
171
208
  if (currentBatchSize !== 0)
172
209
  this._ctx.initBatch(currentBatchSize);
173
- for (const { queuedDecode, processAmount } of currentBatchItems) {
210
+ for (const { queuedDecode, processAmount } of batchItems) {
174
211
  let batchLogitIndex;
175
212
  try {
176
- batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
213
+ const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
214
+ processAmount === queuedDecode.tokens.length;
215
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
216
+ const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
217
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
218
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
219
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
177
220
  }
178
221
  catch (err) {
179
222
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
@@ -192,8 +235,6 @@ export class LlamaContext {
192
235
  queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
193
236
  queuedDecode.firstTokenSequenceIndex += processAmount;
194
237
  }
195
- if (batchTokenSlotsLeft === 0)
196
- break;
197
238
  }
198
239
  for (let i = 0; i < this._queuedDecodes.length; i++) {
199
240
  const queuedDecode = this._queuedDecodes[i];
@@ -203,7 +244,6 @@ export class LlamaContext {
203
244
  i--;
204
245
  }
205
246
  }
206
- shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
207
247
  try {
208
248
  if (currentBatchSize !== 0)
209
249
  await this._ctx.decodeBatch();
@@ -224,11 +264,45 @@ export class LlamaContext {
224
264
  }
225
265
  accept(undefined);
226
266
  }
267
+ };
268
+ const prioritizationStrategy = resolvePrioritizationStrategy();
269
+ if (prioritizationStrategy == null)
270
+ return; // all queued items are rejected and dequeued when we get here
271
+ while (shouldHaveAnotherLoop) {
272
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
273
+ if (orderedQueuedDecodes == null)
274
+ return; // all queued items are rejected and dequeued when we get here
275
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
276
+ let preventDisposalHandle;
277
+ try {
278
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
279
+ }
280
+ catch (err) {
281
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
282
+ return;
283
+ }
284
+ try {
285
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
286
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
287
+ }
288
+ finally {
289
+ preventDisposalHandle.dispose();
290
+ }
227
291
  }
228
292
  });
229
293
  }
294
+ /**
295
+ * Print the timings of token evaluation since that last print for this context.
296
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
297
+ * it won't print anything.
298
+ */
299
+ async printTimings() {
300
+ this._ensureNotDisposed();
301
+ this._ctx.printTimings();
302
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
303
+ }
230
304
  /** @internal */
231
- async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
305
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
232
306
  return await new Promise((accept, reject) => {
233
307
  this._queuedDecodes.push({
234
308
  sequenceId,
@@ -236,6 +310,7 @@ export class LlamaContext {
236
310
  firstTokenSequenceIndex,
237
311
  generateLogitAtTheEnd,
238
312
  evaluationPriority,
313
+ tokenMeter,
239
314
  response: [accept, reject],
240
315
  onDone
241
316
  });
@@ -248,6 +323,8 @@ export class LlamaContext {
248
323
  if (this._disposed)
249
324
  return;
250
325
  void withLock(this, "context", async () => {
326
+ if (this._disposed)
327
+ return;
251
328
  this._ctx.disposeSequence(sequenceId);
252
329
  this._unusedSequenceIds.push(sequenceId);
253
330
  this._onReclaimUnusedSequenceId.dispatchEvent();
@@ -258,6 +335,10 @@ export class LlamaContext {
258
335
  this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
259
336
  }
260
337
  /** @internal */
338
+ _canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
339
+ return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
340
+ }
341
+ /** @internal */
261
342
  _popSequenceId() {
262
343
  if (this._unusedSequenceIds.length > 0)
263
344
  return this._unusedSequenceIds.shift();
@@ -307,20 +388,63 @@ export class LlamaContext {
307
388
  if (this._disposed)
308
389
  throw new DisposedError();
309
390
  }
391
+ /** @internal */
392
+ static async _create(options, { _model }) {
393
+ const sequences = options.sequences ?? getDefaultContextSequences();
394
+ const contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
395
+ batchSize: options.batchSize,
396
+ sequences: sequences,
397
+ modelGpuLayers: _model.gpuLayers,
398
+ modelTrainContextSize: _model.trainContextSize,
399
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
400
+ llamaGpu: _model._llama.gpu,
401
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
402
+ isEmbeddingContext: options._embeddings
403
+ });
404
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
405
+ const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
406
+ contextSize,
407
+ sequences,
408
+ isEmbeddingContext: options._embeddings,
409
+ modelGpuLayers: _model.gpuLayers,
410
+ batchSize
411
+ }).gpuVram;
412
+ const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences });
413
+ const { createSignal } = options;
414
+ const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
415
+ ? null
416
+ : _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
417
+ try {
418
+ const contextLoaded = await context._ctx.init();
419
+ if (createSignal?.aborted) {
420
+ if (contextLoaded)
421
+ await context._ctx.dispose();
422
+ throw createSignal.reason;
423
+ }
424
+ else if (!contextLoaded)
425
+ throw new Error("Failed to create context");
426
+ return context;
427
+ }
428
+ finally {
429
+ contextCreationMemoryReservation?.dispose?.();
430
+ }
431
+ }
310
432
  }
311
433
  export class LlamaContextSequence {
312
434
  /** @internal */ _sequenceId;
313
435
  /** @internal */ _gcRegistry;
314
436
  /** @internal */ _context;
315
437
  /** @internal */ _contextShift;
438
+ /** @internal */ _tokenMeter;
316
439
  /** @internal */ _disposeAggregator = new DisposeAggregator();
317
440
  /** @internal */ _contextTokens = [];
318
441
  /** @internal */ _nextTokenIndex = 0;
319
442
  /** @internal */ _disposed = false;
320
443
  onDispose = new EventRelay();
321
- constructor({ sequenceId, context, contextShift }) {
444
+ constructor({ sequenceId, context, tokenMeter, contextShift }) {
322
445
  this._sequenceId = sequenceId;
323
446
  this._context = context;
447
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
324
448
  this._contextShift = contextShift;
325
449
  this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
326
450
  this._gcRegistry.register(this, sequenceId);
@@ -357,6 +481,9 @@ export class LlamaContextSequence {
357
481
  get contextTokens() {
358
482
  return this._contextTokens.slice();
359
483
  }
484
+ get tokenMeter() {
485
+ return this._tokenMeter;
486
+ }
360
487
  get isLoadedToMemory() {
361
488
  return !this._disposed;
362
489
  }
@@ -382,7 +509,7 @@ export class LlamaContextSequence {
382
509
  }
383
510
  /**
384
511
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
385
- * the start and end of each range are exclusive.
512
+ * The start of each range is inclusive, and the end of each range is exclusive.
386
513
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
387
514
  */
388
515
  async eraseContextTokenRanges(ranges) {
@@ -391,6 +518,8 @@ export class LlamaContextSequence {
391
518
  this._ensureNotDisposed();
392
519
  if (ranges.length === 0)
393
520
  return;
521
+ // if the deletion fails, we'll have to dispose the sequence and fill it up again
522
+ let deletionSuccessful = true;
394
523
  const resolvedRanges = ranges
395
524
  .map(({ start, end }) => {
396
525
  if (start === end)
@@ -420,34 +549,41 @@ export class LlamaContextSequence {
420
549
  let lastDeleteRangeEndPos = null;
421
550
  for (const range of resolvedRanges) {
422
551
  this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
423
- this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
424
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
552
+ if (deletionSuccessful)
553
+ deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
554
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
425
555
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
426
556
  removedTokens += range.end - range.start;
427
557
  lastDeleteRangeEndPos = range.end;
428
558
  }
429
- if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
559
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
430
560
  this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
431
561
  this._nextTokenIndex -= removedTokens;
562
+ if (deletionSuccessful)
563
+ return;
564
+ const newSequenceTokens = this._contextTokens.slice();
565
+ this._nextTokenIndex = 0;
566
+ this._context._ctx.disposeSequence(this._sequenceId);
567
+ await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
432
568
  });
433
569
  }
434
- /**
435
- * @param tokens
436
- * @param [options]
437
- */
438
- evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
570
+ evaluate(tokens, options = {}) {
571
+ const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
439
572
  return this._evaluate(tokens, {
440
573
  temperature,
574
+ minP,
441
575
  topK,
442
576
  topP,
443
577
  grammarEvaluationState,
444
578
  repeatPenalty,
579
+ tokenBias,
445
580
  evaluationPriority,
446
581
  contextShiftOptions: {
447
582
  size: contextShiftSize,
448
583
  strategy: contextShiftStrategy
449
584
  },
450
- yieldEosToken
585
+ yieldEogToken,
586
+ _noSampling
451
587
  });
452
588
  }
453
589
  /**
@@ -470,24 +606,29 @@ export class LlamaContextSequence {
470
606
  }
471
607
  }
472
608
  /** @internal */
473
- async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
609
+ async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
474
610
  this._ensureNotDisposed();
475
611
  let evalTokens = tokens;
476
612
  if (evalTokens.length === 0)
477
613
  return;
478
- // eslint-disable-next-line no-constant-condition
479
614
  while (true) {
480
615
  this._ensureNotDisposed();
481
616
  // Evaluate to get the next token.
482
- const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
617
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
618
+ if (_noSampling)
619
+ return null;
483
620
  const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
484
621
  ? repeatPenalty.punishTokens()
485
622
  : repeatPenalty?.punishTokens;
486
623
  const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
487
624
  ? grammarEvaluationState()
488
625
  : grammarEvaluationState;
626
+ if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
627
+ throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
628
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
489
629
  return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
490
630
  temperature,
631
+ minP,
491
632
  topK,
492
633
  topP,
493
634
  repeatPenalty: repeatPenalty?.penalty,
@@ -496,31 +637,36 @@ export class LlamaContextSequence {
496
637
  : undefined,
497
638
  repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
498
639
  repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
640
+ tokenBiasKeys,
641
+ tokenBiasValues,
499
642
  grammarEvaluationState: resolvedGrammarEvaluationState?._state
500
643
  }));
501
644
  });
502
645
  if (nextToken == null)
503
646
  return;
504
647
  // the model finished generating text
505
- if (!yieldEosToken && nextToken === this._context.model.tokens.eos)
648
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
506
649
  break;
507
- yield nextToken;
508
- // Create tokens for the next eval.
509
- evalTokens = [nextToken];
650
+ const replacementToken = (yield nextToken);
651
+ // set the tokens for the next evaluation
652
+ if (replacementToken != null)
653
+ evalTokens = [replacementToken];
654
+ else
655
+ evalTokens = [nextToken];
510
656
  }
511
657
  }
512
658
  /** @internal */
513
- async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
659
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
514
660
  this._ensureNotDisposed();
515
661
  const tokensLeftToDecode = tokens.slice();
516
662
  return await withLock(this, "evaluate", async () => {
517
663
  while (tokensLeftToDecode.length > 0) {
518
664
  this._ensureNotDisposed();
519
- let freeSpace = this._context.contextSize - this._nextTokenIndex;
520
- if (freeSpace <= 1) {
665
+ let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
666
+ if (freeSpace <= 0) {
521
667
  await this._freeUpSpaceForTokens(contextShiftOptions);
522
- freeSpace = this._context.contextSize - this._nextTokenIndex;
523
- if (freeSpace <= 1)
668
+ freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
669
+ if (freeSpace <= 0)
524
670
  throw new Error("Failed to free up space for new tokens");
525
671
  }
526
672
  const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
@@ -530,7 +676,8 @@ export class LlamaContextSequence {
530
676
  tokens: tokensToDecode,
531
677
  firstTokenSequenceIndex: this._nextTokenIndex,
532
678
  generateLogitAtTheEnd,
533
- evaluationPriority
679
+ evaluationPriority,
680
+ tokenMeter
534
681
  }, !generateLogitAtTheEnd
535
682
  ? undefined
536
683
  : onDecodeDone);
@@ -550,7 +697,10 @@ export class LlamaContextSequence {
550
697
  : contextShiftOptions.size));
551
698
  this._ensureNotDisposed();
552
699
  if (contextShiftOptions.strategy === "eraseBeginning") {
553
- await this.eraseContextTokenRanges([{ start: 0, end: size }]);
700
+ let eraseStartIndex = 0;
701
+ if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
702
+ eraseStartIndex = 1;
703
+ await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
554
704
  }
555
705
  else {
556
706
  const ranges = await contextShiftOptions.strategy({
@@ -560,7 +710,7 @@ export class LlamaContextSequence {
560
710
  if (ranges == null)
561
711
  throw new Error("Invalid delete ranges");
562
712
  await this.eraseContextTokenRanges(ranges);
563
- if (this.nextTokenIndex >= this._context.contextSize)
713
+ if (this.nextTokenIndex >= this._context.contextSize - 1)
564
714
  await this.eraseContextTokenRanges([{ start: 0, end: size }]);
565
715
  }
566
716
  }
@@ -573,10 +723,11 @@ export class LlamaContextSequence {
573
723
  * We need this to make it impossible to manually create instances of this class outside the code of this library
574
724
  * @internal
575
725
  */
576
- static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
726
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
577
727
  return new LlamaContextSequence({
578
728
  sequenceId,
579
729
  context,
730
+ tokenMeter,
580
731
  contextShift: {
581
732
  size: contextShiftSize,
582
733
  strategy: contextShiftStrategy
@@ -584,14 +735,52 @@ export class LlamaContextSequence {
584
735
  });
585
736
  }
586
737
  }
738
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
739
+ if (tokenBias == null)
740
+ return {
741
+ tokenBiasKeys: undefined,
742
+ tokenBiasValues: undefined
743
+ };
744
+ if (tokenBias instanceof Function)
745
+ tokenBias = tokenBias();
746
+ if (tokenBias._model !== currentModel)
747
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
748
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
749
+ const tokenBiasKeys = [];
750
+ const tokenBiasValues = [];
751
+ for (const [token, bias] of tokenBias._biases) {
752
+ tokenBiasKeys.push(token);
753
+ tokenBiasValues.push(bias);
754
+ }
755
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
756
+ return {
757
+ tokenBiasKeys: undefined,
758
+ tokenBiasValues: undefined
759
+ };
760
+ }
761
+ return {
762
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
763
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
764
+ };
765
+ }
587
766
  function disposeContextIfReferenced(contextRef) {
588
767
  const context = contextRef.deref();
589
768
  if (context != null)
590
- context.dispose();
769
+ void context.dispose();
591
770
  }
592
771
  function disposeContextSequenceIfReferenced(contextRef) {
593
772
  const context = contextRef.deref();
594
773
  if (context != null)
595
774
  context.dispose();
596
775
  }
776
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
777
+ return Math.min(contextSize * sequences, 512);
778
+ }
779
+ export function getDefaultContextSequences() {
780
+ return 1;
781
+ }
782
+ const defaultFallbackContextSize = 4096;
783
+ export function getDefaultModelContextSize({ trainContextSize }) {
784
+ return trainContextSize ?? defaultFallbackContextSize;
785
+ }
597
786
  //# sourceMappingURL=LlamaContext.js.map