node-llama-cpp 3.0.0-beta.14 → 3.0.0-beta.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. package/README.md +1 -1
  2. package/dist/ChatWrapper.js +4 -0
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/bindings/AddonTypes.d.ts +23 -0
  5. package/dist/bindings/Llama.d.ts +11 -0
  6. package/dist/bindings/Llama.js +56 -4
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/getLlama.d.ts +20 -2
  9. package/dist/bindings/getLlama.js +15 -5
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +15 -0
  12. package/dist/bindings/types.js +27 -2
  13. package/dist/bindings/types.js.map +1 -1
  14. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  15. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  16. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  17. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  18. package/dist/bindings/utils/cloneLlamaCppRepo.js +26 -25
  19. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  20. package/dist/bindings/utils/compileLLamaCpp.js +2 -2
  21. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  22. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  23. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  24. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  25. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
  26. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  27. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  28. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  29. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  30. package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
  31. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  32. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  33. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  34. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  35. package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
  36. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  37. package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
  38. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  39. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  40. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  41. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
  43. package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
  44. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
  45. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  46. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
  47. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  48. package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +6 -9
  49. package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
  50. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  51. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  52. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  53. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  54. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  55. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  56. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  57. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
  58. package/dist/chatWrappers/utils/resolveChatWrapper.js +208 -0
  59. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  60. package/dist/cli/cli.js +1 -1
  61. package/dist/cli/cli.js.map +1 -1
  62. package/dist/cli/commands/BuildCommand.js +1 -1
  63. package/dist/cli/commands/BuildCommand.js.map +1 -1
  64. package/dist/cli/commands/ChatCommand.d.ts +9 -5
  65. package/dist/cli/commands/ChatCommand.js +203 -118
  66. package/dist/cli/commands/ChatCommand.js.map +1 -1
  67. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  68. package/dist/cli/commands/ClearCommand.js +5 -5
  69. package/dist/cli/commands/ClearCommand.js.map +1 -1
  70. package/dist/cli/commands/CompleteCommand.d.ts +5 -3
  71. package/dist/cli/commands/CompleteCommand.js +136 -85
  72. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  73. package/dist/cli/commands/DebugCommand.js +4 -4
  74. package/dist/cli/commands/DownloadCommand.js +3 -4
  75. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  76. package/dist/cli/commands/InfillCommand.d.ts +5 -3
  77. package/dist/cli/commands/InfillCommand.js +138 -89
  78. package/dist/cli/commands/InfillCommand.js.map +1 -1
  79. package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
  80. package/dist/cli/commands/inspect/InspectCommand.js +17 -0
  81. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  82. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +11 -0
  83. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +121 -0
  84. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  85. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  86. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +136 -0
  87. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  88. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +15 -0
  89. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +579 -0
  90. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  91. package/dist/cli/recommendedModels.d.ts +2 -0
  92. package/dist/cli/recommendedModels.js +281 -0
  93. package/dist/cli/recommendedModels.js.map +1 -0
  94. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  95. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  96. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  97. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  98. package/dist/cli/utils/ConsoleTable.js +86 -0
  99. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  100. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  101. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  102. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  103. package/dist/cli/utils/consolePromptQuestion.d.ts +5 -0
  104. package/dist/cli/utils/consolePromptQuestion.js +80 -0
  105. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  106. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  107. package/dist/cli/utils/getReadablePath.js +14 -0
  108. package/dist/cli/utils/getReadablePath.js.map +1 -0
  109. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  110. package/dist/cli/utils/printCommonInfoLines.js +70 -0
  111. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  112. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  113. package/dist/cli/utils/printInfoLine.js +54 -0
  114. package/dist/cli/utils/printInfoLine.js.map +1 -0
  115. package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -0
  116. package/dist/cli/utils/resolveCommandGgufPath.js +494 -0
  117. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  118. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  119. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  120. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  121. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  122. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  123. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  124. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  125. package/dist/cli/utils/splitAnsiToLines.js +17 -0
  126. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  127. package/dist/config.d.ts +5 -0
  128. package/dist/config.js +11 -2
  129. package/dist/config.js.map +1 -1
  130. package/dist/consts.d.ts +2 -0
  131. package/dist/consts.js +8 -0
  132. package/dist/consts.js.map +1 -1
  133. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +8 -1
  134. package/dist/evaluator/LlamaChat/LlamaChat.js +15 -6
  135. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  136. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +9 -2
  137. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +5 -3
  138. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  139. package/dist/evaluator/LlamaCompletion.d.ts +9 -2
  140. package/dist/evaluator/LlamaCompletion.js +11 -6
  141. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  142. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +30 -3
  143. package/dist/evaluator/LlamaContext/LlamaContext.js +227 -102
  144. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  145. package/dist/evaluator/LlamaContext/types.d.ts +57 -6
  146. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  147. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  148. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  149. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
  150. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  151. package/dist/evaluator/LlamaEmbeddingContext.d.ts +23 -2
  152. package/dist/evaluator/LlamaEmbeddingContext.js +4 -5
  153. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  154. package/dist/evaluator/LlamaGrammar.d.ts +3 -2
  155. package/dist/evaluator/LlamaGrammar.js +3 -2
  156. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  157. package/dist/evaluator/LlamaModel.d.ts +56 -6
  158. package/dist/evaluator/LlamaModel.js +99 -7
  159. package/dist/evaluator/LlamaModel.js.map +1 -1
  160. package/dist/evaluator/TokenBias.d.ts +22 -0
  161. package/dist/evaluator/TokenBias.js +33 -0
  162. package/dist/evaluator/TokenBias.js.map +1 -0
  163. package/dist/evaluator/TokenMeter.d.ts +54 -0
  164. package/dist/evaluator/TokenMeter.js +86 -0
  165. package/dist/evaluator/TokenMeter.js.map +1 -0
  166. package/dist/gguf/consts.d.ts +3 -0
  167. package/dist/gguf/consts.js +8 -0
  168. package/dist/gguf/consts.js.map +1 -0
  169. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  170. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  171. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  172. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  173. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  174. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  175. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  176. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  177. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  178. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  179. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  180. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  181. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  182. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  183. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  184. package/dist/gguf/insights/GgufInsights.d.ts +42 -0
  185. package/dist/gguf/insights/GgufInsights.js +361 -0
  186. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  187. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  188. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
  189. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  190. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  191. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +52 -0
  192. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  193. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  194. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  195. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  196. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  197. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  198. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  199. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  200. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  201. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  202. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  203. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  204. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  205. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  206. package/dist/gguf/parser/parseGguf.js +58 -0
  207. package/dist/gguf/parser/parseGguf.js.map +1 -0
  208. package/dist/gguf/readGgufFileInfo.d.ts +30 -0
  209. package/dist/gguf/readGgufFileInfo.js +38 -0
  210. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  211. package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
  212. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  213. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  214. package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
  215. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  216. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  217. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  218. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  219. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  220. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  221. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  222. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  223. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  224. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  225. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  226. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  227. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  228. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  229. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  230. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  231. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  232. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  233. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  234. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  235. package/dist/index.d.ts +14 -7
  236. package/dist/index.js +12 -6
  237. package/dist/index.js.map +1 -1
  238. package/dist/types.d.ts +1 -1
  239. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  240. package/dist/utils/InsufficientMemoryError.js +6 -0
  241. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  242. package/dist/utils/LlamaText.d.ts +25 -10
  243. package/dist/utils/LlamaText.js +205 -23
  244. package/dist/utils/LlamaText.js.map +1 -1
  245. package/dist/utils/StopGenerationDetector.js +3 -1
  246. package/dist/utils/StopGenerationDetector.js.map +1 -1
  247. package/dist/utils/findBestOption.d.ts +4 -0
  248. package/dist/utils/findBestOption.js +15 -0
  249. package/dist/utils/findBestOption.js.map +1 -0
  250. package/dist/utils/getConsoleLogPrefix.js +1 -1
  251. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
  252. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
  253. package/dist/utils/getReadableContextSize.d.ts +1 -0
  254. package/dist/utils/getReadableContextSize.js +7 -0
  255. package/dist/utils/getReadableContextSize.js.map +1 -0
  256. package/dist/utils/gitReleaseBundles.js +68 -1
  257. package/dist/utils/gitReleaseBundles.js.map +1 -1
  258. package/dist/utils/isToken.d.ts +2 -0
  259. package/dist/utils/isToken.js +4 -0
  260. package/dist/utils/isToken.js.map +1 -0
  261. package/dist/utils/isUrl.d.ts +1 -0
  262. package/dist/utils/isUrl.js +15 -0
  263. package/dist/utils/isUrl.js.map +1 -0
  264. package/dist/utils/mergeUnionTypes.d.ts +4 -0
  265. package/dist/utils/parseModelFileName.d.ts +1 -0
  266. package/dist/utils/parseModelFileName.js +6 -1
  267. package/dist/utils/parseModelFileName.js.map +1 -1
  268. package/dist/utils/prettyPrintObject.d.ts +10 -1
  269. package/dist/utils/prettyPrintObject.js +57 -13
  270. package/dist/utils/prettyPrintObject.js.map +1 -1
  271. package/dist/utils/spawnCommand.js.map +1 -1
  272. package/dist/utils/tokenizeInput.d.ts +1 -1
  273. package/dist/utils/tokenizeInput.js +6 -3
  274. package/dist/utils/tokenizeInput.js.map +1 -1
  275. package/dist/utils/withOra.d.ts +2 -0
  276. package/dist/utils/withOra.js +14 -8
  277. package/dist/utils/withOra.js.map +1 -1
  278. package/dist/utils/withProgressLog.d.ts +23 -0
  279. package/dist/utils/withProgressLog.js +211 -0
  280. package/dist/utils/withProgressLog.js.map +1 -0
  281. package/dist/utils/withStatusLogs.js +1 -1
  282. package/dist/utils/withStatusLogs.js.map +1 -1
  283. package/llama/CMakeLists.txt +5 -5
  284. package/llama/addon.cpp +159 -9
  285. package/llama/binariesGithubRelease.json +1 -1
  286. package/llama/gitRelease.bundle +0 -0
  287. package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
  288. package/llama/gpuInfo/cuda-gpu-info.h +3 -0
  289. package/llama/gpuInfo/metal-gpu-info.h +4 -1
  290. package/llama/gpuInfo/metal-gpu-info.mm +14 -1
  291. package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
  292. package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
  293. package/llama/grammars/README.md +10 -0
  294. package/llama/llama.cpp.info.json +1 -1
  295. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  296. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
  297. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  298. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  299. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  300. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
  301. package/llamaBins/linux-x64/llama-addon.node +0 -0
  302. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
  303. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  304. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  305. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  306. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  307. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  308. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  309. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
  310. package/llamaBins/mac-x64/llama-addon.node +0 -0
  311. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  312. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  313. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  314. package/llamaBins/win-arm64/llama-addon.node +0 -0
  315. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
  316. package/llamaBins/win-x64/llama-addon.node +0 -0
  317. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
  318. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  319. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  320. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  321. package/package.json +15 -12
  322. package/dist/TemplateChatWrapper.js.map +0 -1
  323. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
  324. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
  325. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
  326. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  327. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
  328. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  329. package/dist/cli/commands/InspectCommand.js +0 -113
  330. package/dist/cli/commands/InspectCommand.js.map +0 -1
  331. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  332. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  333. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  334. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  335. package/dist/gguf/GGUFInsights.d.ts +0 -28
  336. package/dist/gguf/GGUFInsights.js +0 -58
  337. package/dist/gguf/GGUFInsights.js.map +0 -1
  338. package/dist/gguf/GGUFMetadata.d.ts +0 -19
  339. package/dist/gguf/GGUFMetadata.js +0 -38
  340. package/dist/gguf/GGUFMetadata.js.map +0 -1
  341. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
  342. package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
  343. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
  344. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
  345. package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
  346. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
  347. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
  348. package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
  349. package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
  350. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
  351. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -11
  352. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
  353. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
  354. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
  355. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
  356. package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
  357. package/dist/gguf/ggufParser/GGUFParser.js +0 -123
  358. package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
  359. package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
  360. package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
  361. package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
  362. package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
  363. package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
  364. package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
  365. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
  366. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
  367. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
  368. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
  369. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
  370. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
  371. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
  372. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
  373. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
  374. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  375. package/dist/utils/parseModelTypeDescription.js +0 -9
  376. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  377. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  378. package/dist/utils/resolveChatWrapper.js +0 -16
  379. package/dist/utils/resolveChatWrapper.js.map +0 -1
  380. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  381. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  382. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  383. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -1,6 +1,8 @@
1
1
  import { EventRelay } from "lifecycle-utils";
2
2
  import { Token } from "../../types.js";
3
3
  import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
4
+ import { TokenMeter } from "../TokenMeter.js";
5
+ import { TokenBias } from "../TokenBias.js";
4
6
  import { ContextShiftOptions, ContextTokensDeleteRange, EvaluationPriority, LlamaContextSequenceRepeatPenalty } from "./types.js";
5
7
  import type { LlamaModel } from "../LlamaModel.js";
6
8
  export declare class LlamaContext {
@@ -13,6 +15,11 @@ export declare class LlamaContext {
13
15
  get model(): LlamaModel;
14
16
  get contextSize(): number;
15
17
  get batchSize(): number;
18
+ /**
19
+ * The actual size of the state in the memory in bytes.
20
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
21
+ */
22
+ get stateSize(): number;
16
23
  getAllocatedContextSize(): number;
17
24
  get totalSequences(): number;
18
25
  get sequencesLeft(): number;
@@ -21,10 +28,15 @@ export declare class LlamaContext {
21
28
  * When there are no sequences left, this method will throw an error.
22
29
  * @param [options]
23
30
  */
24
- getSequence({ contextShift: { size: contextShiftSize, strategy: contextShiftStrategy } }?: {
31
+ getSequence({ contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, _tokenMeter }?: {
25
32
  contextShift?: ContextShiftOptions;
26
33
  }): LlamaContextSequence;
27
34
  dispatchPendingBatch(): void;
35
+ /**
36
+ * Print the timings of token evaluation since that last print for this context.
37
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
38
+ * it won't print anything.
39
+ */
28
40
  printTimings(): Promise<void>;
29
41
  }
30
42
  export declare class LlamaContextSequence {
@@ -38,6 +50,7 @@ export declare class LlamaContextSequence {
38
50
  get model(): LlamaModel;
39
51
  get nextTokenIndex(): number;
40
52
  get contextTokens(): Token[];
53
+ get tokenMeter(): TokenMeter;
41
54
  get isLoadedToMemory(): boolean;
42
55
  compareContextTokens(tokens: Token[]): {
43
56
  firstDifferentIndex: number;
@@ -49,7 +62,7 @@ export declare class LlamaContextSequence {
49
62
  clearHistory(): Promise<void>;
50
63
  /**
51
64
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
52
- * the start and end of each range are exclusive.
65
+ * The start of each range is inclusive, and the end of each range is exclusive.
53
66
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
54
67
  */
55
68
  eraseContextTokenRanges(ranges: ContextTokensDeleteRange[]): Promise<void>;
@@ -57,13 +70,19 @@ export declare class LlamaContextSequence {
57
70
  * @param tokens
58
71
  * @param [options]
59
72
  */
60
- evaluate(tokens: Token[], { temperature, minP, topK, topP, grammarEvaluationState, repeatPenalty, evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, yieldEosToken }?: {
73
+ evaluate(tokens: Token[], { temperature, minP, topK, topP, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, yieldEosToken }?: {
61
74
  temperature?: number;
62
75
  minP?: number;
63
76
  topK?: number;
64
77
  topP?: number;
65
78
  grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined);
66
79
  repeatPenalty?: LlamaContextSequenceRepeatPenalty;
80
+ /**
81
+ * Adjust the probability of tokens being generated.
82
+ * Can be used to bias the model to generate tokens that you want it to lean towards,
83
+ * or to avoid generating tokens that you want it to avoid.
84
+ */
85
+ tokenBias?: TokenBias | (() => TokenBias);
67
86
  /**
68
87
  * When a lot of tokens are queued for the next batch, more than the configured `batchSize`, the tokens for each sequence will be
69
88
  * evaluated based on the strategy chosen for the context.
@@ -104,3 +123,11 @@ export declare class LlamaContextSequence {
104
123
  contextShift?: ContextShiftOptions;
105
124
  }): Promise<void>;
106
125
  }
126
+ export declare function getDefaultContextBatchSize({ contextSize, sequences }: {
127
+ contextSize: number;
128
+ sequences: number;
129
+ }): number;
130
+ export declare function getDefaultContextSequences(): number;
131
+ export declare function getDefaultModelContextSize({ trainContextSize }: {
132
+ trainContextSize?: number;
133
+ }): number;
@@ -2,7 +2,8 @@ import { DisposeAggregator, EventRelay, withLock, DisposedError, AsyncDisposeAgg
2
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
3
3
  import { compareTokens } from "../../utils/compareTokens.js";
4
4
  import { DisposeGuard } from "../../utils/DisposeGuard.js";
5
- import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
5
+ import { TokenMeter } from "../TokenMeter.js";
6
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
6
7
  export class LlamaContext {
7
8
  /** @internal */ _llama;
8
9
  /** @internal */ _ctx;
@@ -25,7 +26,7 @@ export class LlamaContext {
25
26
  /** @internal */ _allocatedContextSize;
26
27
  /** @internal */ _disposed = false;
27
28
  onDispose = new EventRelay();
28
- constructor({ _model }, { sequences = 1, seed = null, contextSize = _model.trainContextSize, batchSize = Math.min(contextSize * sequences, 512), threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
29
+ constructor({ _model }, { sequences, seed = null, contextSize, batchSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
29
30
  if (_model.disposed)
30
31
  throw new DisposedError();
31
32
  this._llama = _model._llama;
@@ -39,13 +40,14 @@ export class LlamaContext {
39
40
  seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
40
41
  contextSize: this._contextSize * this._totalSequences,
41
42
  batchSize: this._batchSize,
43
+ sequences: this._totalSequences,
42
44
  threads: Math.max(0, Math.floor(threads)),
43
45
  embeddings: _embeddings,
44
46
  noSeed: _noSeed
45
47
  }));
46
48
  this._batchingOptions = {
47
49
  dispatchSchedule: batchingDispatchSchedule,
48
- itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
50
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
49
51
  };
50
52
  this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
51
53
  this._disposeAggregator.add(() => {
@@ -82,6 +84,14 @@ export class LlamaContext {
82
84
  get batchSize() {
83
85
  return this._batchSize;
84
86
  }
87
+ /**
88
+ * The actual size of the state in the memory in bytes.
89
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
90
+ */
91
+ get stateSize() {
92
+ this._ensureNotDisposed();
93
+ return this._ctx.getStateSize();
94
+ }
85
95
  getAllocatedContextSize() {
86
96
  this._ensureNotDisposed();
87
97
  if (this._allocatedContextSize == null)
@@ -99,7 +109,7 @@ export class LlamaContext {
99
109
  * When there are no sequences left, this method will throw an error.
100
110
  * @param [options]
101
111
  */
102
- getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
112
+ getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = {}) {
103
113
  this._ensureNotDisposed();
104
114
  const nextSequenceId = this._popSequenceId();
105
115
  if (nextSequenceId == null)
@@ -107,6 +117,7 @@ export class LlamaContext {
107
117
  return LlamaContextSequence._create({
108
118
  sequenceId: nextSequenceId,
109
119
  context: this,
120
+ tokenMeter: _tokenMeter,
110
121
  contextShift: {
111
122
  size: contextShiftSize,
112
123
  strategy: contextShiftStrategy
@@ -123,17 +134,18 @@ export class LlamaContext {
123
134
  this._currentDispatchBatchHandle = {};
124
135
  this._dispatchDecodeScheduled = false;
125
136
  this._batchDispatchPending = false;
126
- let prioritizeStrategy;
127
- try {
128
- this._ensureNotDisposed();
129
- prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
130
- }
131
- catch (err) {
132
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
133
- return;
134
- }
135
- let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
136
- while (shouldHaveAnotherBatch) {
137
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
138
+ const resolvePrioritizationStrategy = () => {
139
+ try {
140
+ this._ensureNotDisposed();
141
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
142
+ }
143
+ catch (err) {
144
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
145
+ }
146
+ return null;
147
+ };
148
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
137
149
  const batchItemToQueuedDecodeMap = new Map();
138
150
  const batchItemsList = [];
139
151
  for (const queuedDecode of this._queuedDecodes) {
@@ -146,101 +158,132 @@ export class LlamaContext {
146
158
  }
147
159
  let prioritizedItems;
148
160
  try {
149
- prioritizedItems = prioritizeStrategy({
161
+ prioritizedItems = prioritizationStrategy({
150
162
  items: batchItemsList,
151
163
  size: this._batchSize
152
164
  });
153
165
  }
154
166
  catch (err) {
155
167
  this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
156
- return;
168
+ return null;
157
169
  }
158
- let batchTokenSlotsLeft = this._batchSize;
159
- const afterDecodeActions = [];
160
- const queuedDecodesToDelete = new Set();
161
- const currentQueuedDecodeItems = new Set();
162
- const currentBatchItems = [];
163
- let currentBatchSize = 0;
164
- for (const prioritizedItem of prioritizedItems) {
170
+ return prioritizedItems.map((prioritizedItem) => {
165
171
  const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
166
172
  if (queuedDecode == null)
167
173
  throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
168
174
  "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
169
- const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
170
- if (processAmount <= 0)
175
+ return {
176
+ queuedDecode,
177
+ processAmount: prioritizedItem.processAmount
178
+ };
179
+ });
180
+ };
181
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
182
+ const currentBatchItems = [];
183
+ let currentBatchSize = 0;
184
+ let batchTokenSlotsLeft = batchSize;
185
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
186
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
187
+ if (resolvedProcessAmount <= 0) {
188
+ if (batchTokenSlotsLeft === 0)
189
+ break;
171
190
  continue;
172
- batchTokenSlotsLeft -= processAmount;
191
+ }
192
+ batchTokenSlotsLeft -= resolvedProcessAmount;
193
+ currentBatchSize += resolvedProcessAmount;
173
194
  currentBatchItems.push({
174
195
  queuedDecode,
175
- processAmount
196
+ processAmount: resolvedProcessAmount
176
197
  });
177
- currentBatchSize += processAmount;
178
198
  }
179
- let preventDisposalHandle;
199
+ return {
200
+ currentBatchItems,
201
+ currentBatchSize
202
+ };
203
+ };
204
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
205
+ const afterDecodeActions = [];
206
+ const queuedDecodesToDelete = new Set();
207
+ const currentQueuedDecodeItems = new Set();
208
+ if (currentBatchSize !== 0)
209
+ this._ctx.initBatch(currentBatchSize);
210
+ for (const { queuedDecode, processAmount } of batchItems) {
211
+ let batchLogitIndex;
212
+ try {
213
+ const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
214
+ processAmount === queuedDecode.tokens.length;
215
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
216
+ const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
217
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
218
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
219
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
220
+ }
221
+ catch (err) {
222
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
223
+ continue;
224
+ }
225
+ currentQueuedDecodeItems.add(queuedDecode);
226
+ if (queuedDecode.tokens.length === processAmount) {
227
+ queuedDecodesToDelete.add(queuedDecode);
228
+ afterDecodeActions.push({
229
+ batchLogitIndex,
230
+ response: queuedDecode.response,
231
+ onDone: queuedDecode.onDone
232
+ });
233
+ }
234
+ else {
235
+ queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
236
+ queuedDecode.firstTokenSequenceIndex += processAmount;
237
+ }
238
+ }
239
+ for (let i = 0; i < this._queuedDecodes.length; i++) {
240
+ const queuedDecode = this._queuedDecodes[i];
241
+ if (queuedDecodesToDelete.has(queuedDecode)) {
242
+ this._queuedDecodes.splice(i, 1);
243
+ this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
244
+ i--;
245
+ }
246
+ }
180
247
  try {
181
- preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
248
+ if (currentBatchSize !== 0)
249
+ await this._ctx.decodeBatch();
182
250
  }
183
251
  catch (err) {
184
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
252
+ this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
185
253
  return;
186
254
  }
187
- try {
188
- if (currentBatchSize !== 0)
189
- this._ctx.initBatch(currentBatchSize);
190
- for (const { queuedDecode, processAmount } of currentBatchItems) {
191
- let batchLogitIndex;
255
+ for (const action of afterDecodeActions) {
256
+ const [accept, reject] = action.response;
257
+ if (action.onDone != null && action.batchLogitIndex != null) {
192
258
  try {
193
- batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
259
+ accept(action.onDone(action.batchLogitIndex ?? null));
194
260
  }
195
261
  catch (err) {
196
- this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
197
- continue;
198
- }
199
- currentQueuedDecodeItems.add(queuedDecode);
200
- if (queuedDecode.tokens.length === processAmount) {
201
- queuedDecodesToDelete.add(queuedDecode);
202
- afterDecodeActions.push({
203
- batchLogitIndex,
204
- response: queuedDecode.response,
205
- onDone: queuedDecode.onDone
206
- });
207
- }
208
- else {
209
- queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
210
- queuedDecode.firstTokenSequenceIndex += processAmount;
211
- }
212
- if (batchTokenSlotsLeft === 0)
213
- break;
214
- }
215
- for (let i = 0; i < this._queuedDecodes.length; i++) {
216
- const queuedDecode = this._queuedDecodes[i];
217
- if (queuedDecodesToDelete.has(queuedDecode)) {
218
- this._queuedDecodes.splice(i, 1);
219
- this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
220
- i--;
262
+ reject(err);
221
263
  }
222
264
  }
223
- shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
224
- try {
225
- if (currentBatchSize !== 0)
226
- await this._ctx.decodeBatch();
227
- }
228
- catch (err) {
229
- this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
230
- return;
231
- }
232
- for (const action of afterDecodeActions) {
233
- const [accept, reject] = action.response;
234
- if (action.onDone != null && action.batchLogitIndex != null) {
235
- try {
236
- accept(action.onDone(action.batchLogitIndex ?? null));
237
- }
238
- catch (err) {
239
- reject(err);
240
- }
241
- }
242
- accept(undefined);
243
- }
265
+ accept(undefined);
266
+ }
267
+ };
268
+ const prioritizationStrategy = resolvePrioritizationStrategy();
269
+ if (prioritizationStrategy == null)
270
+ return; // all queued items are rejected and dequeued when we get here
271
+ while (shouldHaveAnotherLoop) {
272
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
273
+ if (orderedQueuedDecodes == null)
274
+ return; // all queued items are rejected and dequeued when we get here
275
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
276
+ let preventDisposalHandle;
277
+ try {
278
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
279
+ }
280
+ catch (err) {
281
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
282
+ return;
283
+ }
284
+ try {
285
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
286
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
244
287
  }
245
288
  finally {
246
289
  preventDisposalHandle.dispose();
@@ -248,13 +291,18 @@ export class LlamaContext {
248
291
  }
249
292
  });
250
293
  }
294
+ /**
295
+ * Print the timings of token evaluation since that last print for this context.
296
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
297
+ * it won't print anything.
298
+ */
251
299
  async printTimings() {
252
300
  this._ensureNotDisposed();
253
301
  this._ctx.printTimings();
254
302
  await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
255
303
  }
256
304
  /** @internal */
257
- async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
305
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
258
306
  return await new Promise((accept, reject) => {
259
307
  this._queuedDecodes.push({
260
308
  sequenceId,
@@ -262,6 +310,7 @@ export class LlamaContext {
262
310
  firstTokenSequenceIndex,
263
311
  generateLogitAtTheEnd,
264
312
  evaluationPriority,
313
+ tokenMeter,
265
314
  response: [accept, reject],
266
315
  onDone
267
316
  });
@@ -337,17 +386,44 @@ export class LlamaContext {
337
386
  }
338
387
  /** @internal */
339
388
  static async _create(options, { _model }) {
340
- const context = new LlamaContext({ _model }, options);
389
+ const sequences = options.sequences ?? getDefaultContextSequences();
390
+ const contextSize = _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
391
+ batchSize: options.batchSize,
392
+ sequences: sequences,
393
+ modelGpuLayers: _model.gpuLayers,
394
+ modelTrainContextSize: _model.trainContextSize,
395
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
396
+ llamaGpu: _model._llama.gpu,
397
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
398
+ isEmbeddingContext: options._embeddings
399
+ });
400
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
401
+ const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
402
+ contextSize,
403
+ sequences,
404
+ isEmbeddingContext: options._embeddings,
405
+ modelGpuLayers: _model.gpuLayers,
406
+ batchSize
407
+ }).gpuVram;
408
+ const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences });
341
409
  const { createSignal } = options;
342
- const contextLoaded = await context._ctx.init();
343
- if (createSignal?.aborted) {
344
- if (contextLoaded)
345
- await context._ctx.dispose();
346
- throw createSignal.reason;
410
+ const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
411
+ ? null
412
+ : _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
413
+ try {
414
+ const contextLoaded = await context._ctx.init();
415
+ if (createSignal?.aborted) {
416
+ if (contextLoaded)
417
+ await context._ctx.dispose();
418
+ throw createSignal.reason;
419
+ }
420
+ else if (!contextLoaded)
421
+ throw new Error("Failed to create context");
422
+ return context;
423
+ }
424
+ finally {
425
+ contextCreationMemoryReservation?.dispose?.();
347
426
  }
348
- else if (!contextLoaded)
349
- throw new Error("Failed to create context");
350
- return context;
351
427
  }
352
428
  }
353
429
  export class LlamaContextSequence {
@@ -355,14 +431,16 @@ export class LlamaContextSequence {
355
431
  /** @internal */ _gcRegistry;
356
432
  /** @internal */ _context;
357
433
  /** @internal */ _contextShift;
434
+ /** @internal */ _tokenMeter;
358
435
  /** @internal */ _disposeAggregator = new DisposeAggregator();
359
436
  /** @internal */ _contextTokens = [];
360
437
  /** @internal */ _nextTokenIndex = 0;
361
438
  /** @internal */ _disposed = false;
362
439
  onDispose = new EventRelay();
363
- constructor({ sequenceId, context, contextShift }) {
440
+ constructor({ sequenceId, context, tokenMeter, contextShift }) {
364
441
  this._sequenceId = sequenceId;
365
442
  this._context = context;
443
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
366
444
  this._contextShift = contextShift;
367
445
  this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
368
446
  this._gcRegistry.register(this, sequenceId);
@@ -399,6 +477,9 @@ export class LlamaContextSequence {
399
477
  get contextTokens() {
400
478
  return this._contextTokens.slice();
401
479
  }
480
+ get tokenMeter() {
481
+ return this._tokenMeter;
482
+ }
402
483
  get isLoadedToMemory() {
403
484
  return !this._disposed;
404
485
  }
@@ -424,7 +505,7 @@ export class LlamaContextSequence {
424
505
  }
425
506
  /**
426
507
  * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
427
- * the start and end of each range are exclusive.
508
+ * The start of each range is inclusive, and the end of each range is exclusive.
428
509
  * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
429
510
  */
430
511
  async eraseContextTokenRanges(ranges) {
@@ -486,7 +567,7 @@ export class LlamaContextSequence {
486
567
  * @param tokens
487
568
  * @param [options]
488
569
  */
489
- evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
570
+ evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
490
571
  return this._evaluate(tokens, {
491
572
  temperature,
492
573
  minP,
@@ -494,6 +575,7 @@ export class LlamaContextSequence {
494
575
  topP,
495
576
  grammarEvaluationState,
496
577
  repeatPenalty,
578
+ tokenBias,
497
579
  evaluationPriority,
498
580
  contextShiftOptions: {
499
581
  size: contextShiftSize,
@@ -522,7 +604,7 @@ export class LlamaContextSequence {
522
604
  }
523
605
  }
524
606
  /** @internal */
525
- async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
607
+ async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
526
608
  this._ensureNotDisposed();
527
609
  let evalTokens = tokens;
528
610
  if (evalTokens.length === 0)
@@ -531,7 +613,7 @@ export class LlamaContextSequence {
531
613
  while (true) {
532
614
  this._ensureNotDisposed();
533
615
  // Evaluate to get the next token.
534
- const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
616
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
535
617
  const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
536
618
  ? repeatPenalty.punishTokens()
537
619
  : repeatPenalty?.punishTokens;
@@ -540,6 +622,7 @@ export class LlamaContextSequence {
540
622
  : grammarEvaluationState;
541
623
  if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
542
624
  throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
625
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
543
626
  return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
544
627
  temperature,
545
628
  minP,
@@ -551,6 +634,8 @@ export class LlamaContextSequence {
551
634
  : undefined,
552
635
  repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
553
636
  repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
637
+ tokenBiasKeys,
638
+ tokenBiasValues,
554
639
  grammarEvaluationState: resolvedGrammarEvaluationState?._state
555
640
  }));
556
641
  });
@@ -565,7 +650,7 @@ export class LlamaContextSequence {
565
650
  }
566
651
  }
567
652
  /** @internal */
568
- async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
653
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
569
654
  this._ensureNotDisposed();
570
655
  const tokensLeftToDecode = tokens.slice();
571
656
  return await withLock(this, "evaluate", async () => {
@@ -585,7 +670,8 @@ export class LlamaContextSequence {
585
670
  tokens: tokensToDecode,
586
671
  firstTokenSequenceIndex: this._nextTokenIndex,
587
672
  generateLogitAtTheEnd,
588
- evaluationPriority
673
+ evaluationPriority,
674
+ tokenMeter
589
675
  }, !generateLogitAtTheEnd
590
676
  ? undefined
591
677
  : onDecodeDone);
@@ -632,10 +718,11 @@ export class LlamaContextSequence {
632
718
  * We need this to make it impossible to manually create instances of this class outside the code of this library
633
719
  * @internal
634
720
  */
635
- static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
721
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
636
722
  return new LlamaContextSequence({
637
723
  sequenceId,
638
724
  context,
725
+ tokenMeter,
639
726
  contextShift: {
640
727
  size: contextShiftSize,
641
728
  strategy: contextShiftStrategy
@@ -643,6 +730,34 @@ export class LlamaContextSequence {
643
730
  });
644
731
  }
645
732
  }
733
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
734
+ if (tokenBias == null)
735
+ return {
736
+ tokenBiasKeys: undefined,
737
+ tokenBiasValues: undefined
738
+ };
739
+ if (tokenBias instanceof Function)
740
+ tokenBias = tokenBias();
741
+ if (tokenBias._model !== currentModel)
742
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
743
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
744
+ const tokenBiasKeys = [];
745
+ const tokenBiasValues = [];
746
+ for (const [token, bias] of tokenBias._biases) {
747
+ tokenBiasKeys.push(token);
748
+ tokenBiasValues.push(bias);
749
+ }
750
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
751
+ return {
752
+ tokenBiasKeys: undefined,
753
+ tokenBiasValues: undefined
754
+ };
755
+ }
756
+ return {
757
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
758
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
759
+ };
760
+ }
646
761
  function disposeContextIfReferenced(contextRef) {
647
762
  const context = contextRef.deref();
648
763
  if (context != null)
@@ -653,4 +768,14 @@ function disposeContextSequenceIfReferenced(contextRef) {
653
768
  if (context != null)
654
769
  context.dispose();
655
770
  }
771
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
772
+ return Math.min(contextSize * sequences, 512);
773
+ }
774
+ export function getDefaultContextSequences() {
775
+ return 1;
776
+ }
777
+ const defaultFallbackContextSize = 4096;
778
+ export function getDefaultModelContextSize({ trainContextSize }) {
779
+ return trainContextSize ?? defaultFallbackContextSize;
780
+ }
656
781
  //# sourceMappingURL=LlamaContext.js.map