node-llama-cpp 3.0.0-beta.9 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (684) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -3
  21. package/dist/bindings/Llama.js +193 -23
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +264 -75
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +321 -190
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +16 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/CMakeLists.txt +134 -5
  586. package/llama/addon/AddonContext.cpp +629 -0
  587. package/llama/addon/AddonContext.h +52 -0
  588. package/llama/addon/AddonGrammar.cpp +39 -0
  589. package/llama/addon/AddonGrammar.h +19 -0
  590. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  591. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  592. package/llama/addon/AddonModel.cpp +672 -0
  593. package/llama/addon/AddonModel.h +61 -0
  594. package/llama/addon/AddonModelData.cpp +25 -0
  595. package/llama/addon/AddonModelData.h +15 -0
  596. package/llama/addon/AddonModelLora.cpp +105 -0
  597. package/llama/addon/AddonModelLora.h +28 -0
  598. package/llama/addon/AddonSampler.cpp +513 -0
  599. package/llama/addon/AddonSampler.h +65 -0
  600. package/llama/addon/RingBuffer.h +109 -0
  601. package/llama/addon/addon.cpp +223 -0
  602. package/llama/addon/addonGlobals.cpp +22 -0
  603. package/llama/addon/addonGlobals.h +12 -0
  604. package/llama/addon/globals/addonLog.cpp +136 -0
  605. package/llama/addon/globals/addonLog.h +21 -0
  606. package/llama/addon/globals/addonProgress.cpp +15 -0
  607. package/llama/addon/globals/addonProgress.h +15 -0
  608. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  609. package/llama/addon/globals/getGpuInfo.h +6 -0
  610. package/llama/binariesGithubRelease.json +1 -1
  611. package/llama/gitRelease.bundle +0 -0
  612. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  613. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  614. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  615. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  616. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  618. package/llama/grammars/README.md +297 -6
  619. package/llama/grammars/json.gbnf +4 -4
  620. package/llama/grammars/json_arr.gbnf +4 -4
  621. package/llama/llama.cpp.info.json +1 -1
  622. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  623. package/package.json +109 -59
  624. package/templates/packed/electron-typescript-react.json +1 -0
  625. package/templates/packed/node-typescript.json +1 -0
  626. package/dist/AbortError.d.ts +0 -2
  627. package/dist/AbortError.js +0 -7
  628. package/dist/AbortError.js.map +0 -1
  629. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  630. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  631. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  634. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  635. package/dist/cli/commands/BuildCommand.js +0 -106
  636. package/dist/cli/commands/BuildCommand.js.map +0 -1
  637. package/dist/cli/commands/ClearCommand.js.map +0 -1
  638. package/dist/cli/commands/DownloadCommand.js +0 -169
  639. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  640. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  643. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  647. package/dist/evaluator/LlamaModel.d.ts +0 -120
  648. package/dist/evaluator/LlamaModel.js +0 -320
  649. package/dist/evaluator/LlamaModel.js.map +0 -1
  650. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  653. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  654. package/dist/utils/parseModelTypeDescription.js +0 -9
  655. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  656. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  657. package/dist/utils/resolveChatWrapper.js +0 -16
  658. package/dist/utils/resolveChatWrapper.js.map +0 -1
  659. package/llama/addon.cpp +0 -950
  660. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  661. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  662. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  663. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  664. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  665. package/llamaBins/linux-x64/llama-addon.node +0 -0
  666. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  667. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  668. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  669. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  670. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  671. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  672. package/llamaBins/mac-x64/llama-addon.node +0 -0
  673. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  674. package/llamaBins/win-x64/llama-addon.exp +0 -0
  675. package/llamaBins/win-x64/llama-addon.lib +0 -0
  676. package/llamaBins/win-x64/llama-addon.node +0 -0
  677. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  678. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  679. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  681. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -0,0 +1,780 @@
1
+ import process from "process";
2
+ import path from "path";
3
+ import { AsyncDisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
4
+ import { removeNullFields } from "../../utils/removeNullFields.js";
5
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
6
+ import { LlamaLocks, LlamaLogLevel, LlamaVocabularyType, LlamaVocabularyTypeValues } from "../../bindings/types.js";
7
+ import { readGgufFileInfo } from "../../gguf/readGgufFileInfo.js";
8
+ import { GgufInsights } from "../../gguf/insights/GgufInsights.js";
9
+ import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
10
+ import { getReadablePath } from "../../cli/utils/getReadablePath.js";
11
+ import { LlamaContext } from "../LlamaContext/LlamaContext.js";
12
+ import { LlamaEmbeddingContext } from "../LlamaEmbeddingContext.js";
13
+ import { GgufArchitectureType } from "../../gguf/types/GgufMetadataTypes.js";
14
+ import { maxRecentDetokenizerTokens } from "../../consts.js";
15
+ import { TokenAttribute, TokenAttributes } from "./utils/TokenAttributes.js";
16
+ const defaultUseMmap = true;
17
+ const defaultContextFlashAttentionEnabled = false;
18
+ export class LlamaModel {
19
+ /** @internal */ _llama;
20
+ /** @internal */ _model;
21
+ /** @internal */ _backendModelDisposeGuard;
22
+ /** @internal */ _tokens;
23
+ /** @internal */ _modelPath;
24
+ /** @internal */ _fileInfo;
25
+ /** @internal */ _fileInsights;
26
+ /** @internal */ _gpuLayers;
27
+ /** @internal */ _vocabOnly;
28
+ /** @internal */ _filename;
29
+ /** @internal */ _disposedState = { disposed: false };
30
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
31
+ /** @internal */ _llamaPreventDisposalHandle;
32
+ /** @internal */ _defaultContextFlashAttentionOptionEnabled;
33
+ /** @internal */ _defaultContextFlashAttention;
34
+ /** @internal */ _flashAttentionSupported;
35
+ /** @internal */ _loraAdapters = new Map();
36
+ /** @internal */ _typeDescription;
37
+ /** @internal */ _trainContextSize;
38
+ /** @internal */ _embeddingVectorSize;
39
+ /** @internal */ _vocabularyType;
40
+ tokenizer;
41
+ onDispose = new EventRelay();
42
+ constructor({ modelPath, gpuLayers, vocabOnly = false, useMmap, useMlock, checkTensors, onLoadProgress, loadSignal, metadataOverrides }, { _llama, _fileInfo, _fileInsights, _defaultContextFlashAttentionOptionEnabled, _defaultContextFlashAttention, _flashAttentionSupported }) {
43
+ this._llama = _llama;
44
+ this._fileInfo = _fileInfo;
45
+ this._modelPath = path.resolve(process.cwd(), modelPath);
46
+ this._fileInsights = _fileInsights;
47
+ this._gpuLayers = gpuLayers;
48
+ this._vocabOnly = vocabOnly ?? false;
49
+ this._backendModelDisposeGuard = new DisposeGuard([this._llama._backendDisposeGuard]);
50
+ this._llamaPreventDisposalHandle = this._llama._backendDisposeGuard.createPreventDisposalHandle();
51
+ this._defaultContextFlashAttentionOptionEnabled = _defaultContextFlashAttentionOptionEnabled;
52
+ this._defaultContextFlashAttention = _defaultContextFlashAttention;
53
+ this._flashAttentionSupported = _flashAttentionSupported;
54
+ const overridesList = ggufMetadataOverridesToList(metadataOverrides);
55
+ this._model = new this._llama._bindings.AddonModel(this._modelPath, removeNullFields({
56
+ addonExports: this._llama._bindings,
57
+ gpuLayers,
58
+ vocabOnly: this._vocabOnly,
59
+ useMmap,
60
+ useMlock: _llama.supportsMlock
61
+ ? useMlock
62
+ : undefined,
63
+ checkTensors: checkTensors ?? false,
64
+ onLoadProgress: onLoadProgress == null
65
+ ? undefined
66
+ : (loadPercentage) => {
67
+ try {
68
+ onLoadProgress(loadPercentage);
69
+ }
70
+ catch (err) {
71
+ // the native addon code calls this function, so there's no use to throw an error here
72
+ console.error(err);
73
+ }
74
+ },
75
+ hasLoadAbortSignal: loadSignal != null,
76
+ overridesList: overridesList.length > 0
77
+ ? overridesList
78
+ : undefined
79
+ }));
80
+ this._tokens = LlamaModelTokens._create(this._model, this._disposedState);
81
+ this._filename = path.basename(modelPath);
82
+ this._disposeAggregator.add(() => {
83
+ this._disposedState.disposed = true;
84
+ });
85
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
86
+ this._disposeAggregator.add(this._llama.onDispose.createListener(disposeModelIfReferenced.bind(null, new WeakRef(this))));
87
+ this._disposeAggregator.add(async () => {
88
+ await this._backendModelDisposeGuard.acquireDisposeLock();
89
+ await this._model.dispose();
90
+ this._llamaPreventDisposalHandle.dispose();
91
+ });
92
+ this._removeLoraUsage = this._removeLoraUsage.bind(this);
93
+ this.tokenize = this.tokenize.bind(this);
94
+ this.detokenize = this.detokenize.bind(this);
95
+ this.isSpecialToken = this.isSpecialToken.bind(this);
96
+ this.isEogToken = this.isEogToken.bind(this);
97
+ this.tokenize.detokenize = this.detokenize;
98
+ this.tokenize.isSpecialToken = this.isSpecialToken;
99
+ this.tokenize.isEogToken = this.isEogToken;
100
+ Object.freeze(this.tokenize);
101
+ this.tokenizer = this.tokenize;
102
+ }
103
+ async dispose() {
104
+ if (this._disposedState.disposed)
105
+ return;
106
+ this._disposedState.disposed = true;
107
+ await this._disposeAggregator.dispose();
108
+ }
109
+ /** @hidden */
110
+ async [Symbol.asyncDispose]() {
111
+ await this.dispose();
112
+ }
113
+ get disposed() {
114
+ return this._disposedState.disposed;
115
+ }
116
+ get tokens() {
117
+ return this._tokens;
118
+ }
119
+ get filename() {
120
+ return this._filename;
121
+ }
122
+ get fileInfo() {
123
+ return this._fileInfo;
124
+ }
125
+ get fileInsights() {
126
+ return this._fileInsights;
127
+ }
128
+ /**
129
+ * Number of layers offloaded to the GPU.
130
+ * If GPU support is disabled, this will always be `0`.
131
+ */
132
+ get gpuLayers() {
133
+ return this._gpuLayers;
134
+ }
135
+ /**
136
+ * Total model size in memory in bytes
137
+ */
138
+ get size() {
139
+ this._ensureNotDisposed();
140
+ return this._model.getModelSize();
141
+ }
142
+ get flashAttentionSupported() {
143
+ return this._flashAttentionSupported;
144
+ }
145
+ get defaultContextFlashAttention() {
146
+ return this._defaultContextFlashAttention;
147
+ }
148
+ tokenize(text, specialTokens = false, options) {
149
+ this._ensureNotDisposed();
150
+ if (text === "")
151
+ return [];
152
+ if (specialTokens === "builtin") {
153
+ const builtinToken = text;
154
+ switch (builtinToken) {
155
+ case "BOS": return this.tokens.bos == null ? [] : [this.tokens.bos];
156
+ case "EOS": return this.tokens.eos == null ? [] : [this.tokens.eos];
157
+ case "NL": return this.tokens.nl == null ? [] : [this.tokens.nl];
158
+ case "EOT": return this.tokens.eot == null ? [] : [this.tokens.eot];
159
+ }
160
+ void builtinToken;
161
+ throw new Error(`Unknown builtin special token: ${builtinToken}`);
162
+ }
163
+ if (options === "trimLeadingSpace") {
164
+ if (specialTokens) {
165
+ const countLeadingSpaces = (text) => {
166
+ let count = 0;
167
+ for (; count < text.length; count++) {
168
+ if (text[count] !== " ")
169
+ break;
170
+ }
171
+ return count;
172
+ };
173
+ const textLeadingSpaces = countLeadingSpaces(text);
174
+ const [workaroundToken, workaroundTokenString] = (this.tokens.bos != null && this.tokens.bosString != null)
175
+ ? [this.tokens.bos, this.tokens.bosString]
176
+ : (this.tokens.eos != null && this.tokens.eosString != null)
177
+ ? [this.tokens.eos, this.tokens.eosString]
178
+ : (this.tokens.nl != null && this.tokens.nlString != null)
179
+ ? [this.tokens.nl, this.tokens.nlString]
180
+ : (this.tokens.eot != null && this.tokens.eotString != null)
181
+ ? [this.tokens.eot, this.tokens.eotString]
182
+ : [null, null];
183
+ if (workaroundToken != null && workaroundTokenString != null) {
184
+ const tokens = Array.from(this._model.tokenize(workaroundTokenString + text, true));
185
+ const workaroundTokenIndex = tokens.indexOf(workaroundToken);
186
+ // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
187
+ if (workaroundTokenIndex >= 0 && workaroundTokenIndex <= 1) {
188
+ tokens.splice(0, workaroundTokenIndex + 1);
189
+ if (countLeadingSpaces(this.detokenize(tokens, true)) === textLeadingSpaces)
190
+ return tokens;
191
+ }
192
+ }
193
+ const workaroundTokensString = "\n";
194
+ const workaroundTokens = Array.from(this._model.tokenize(workaroundTokensString, true));
195
+ if (text.startsWith(workaroundTokensString)) {
196
+ const tokens = Array.from(this._model.tokenize(text, true));
197
+ if (this.detokenize(tokens, true).startsWith(workaroundTokensString))
198
+ return tokens;
199
+ }
200
+ const tokens = Array.from(this._model.tokenize(workaroundTokensString + text, true));
201
+ // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
202
+ if (workaroundTokens.length > 0 && workaroundTokens.every((token, index) => tokens[index] === token)) {
203
+ tokens.splice(0, workaroundTokens.length);
204
+ if (countLeadingSpaces(this.detokenize(tokens, true)) === textLeadingSpaces)
205
+ return tokens;
206
+ }
207
+ }
208
+ else {
209
+ const workaroundTokensString = "\n";
210
+ const workaroundTokens = Array.from(this._model.tokenize(workaroundTokensString, false));
211
+ if (text.startsWith(workaroundTokensString)) {
212
+ const tokens = Array.from(this._model.tokenize(text, false));
213
+ if (this.detokenize(tokens, false).startsWith(workaroundTokensString))
214
+ return tokens;
215
+ }
216
+ const tokens = Array.from(this._model.tokenize(workaroundTokensString + text, false));
217
+ // only use the tokenized output if it can be corrected, otherwise fallback to the default tokenization
218
+ if (workaroundTokens.length > 0 && workaroundTokens.every((token, index) => tokens[index] === token)) {
219
+ tokens.splice(0, workaroundTokens.length);
220
+ return tokens;
221
+ }
222
+ }
223
+ }
224
+ return Array.from(this._model.tokenize(text, specialTokens));
225
+ }
226
+ /**
227
+ * Transform tokens into text
228
+ * @param tokens - the tokens to detokenize.
229
+ * @param [specialTokens] - if set to `true`, special tokens will be detokenized to their corresponding token text representation.
230
+ *
231
+ * Recommended for debugging purposes only.
232
+ *
233
+ * > **Note:** there may be additional spaces around special tokens that were not present in the original text - this is not a bug,
234
+ * this is [how the tokenizer is supposed to work](https://github.com/ggerganov/llama.cpp/pull/7697#issuecomment-2144003246).
235
+ *
236
+ * Defaults to `false`.
237
+ * @param [lastTokens] - the last few tokens that preceded the tokens to detokenize.
238
+ * If provided, the last few tokens will be used to determine whether a space has to be added before the current tokens or not,
239
+ * and apply other detokenizer-specific heuristics to provide the correct text continuation to the existing tokens.
240
+ *
241
+ * Using it may have no effect with some models, but it is still recommended.
242
+ */
243
+ detokenize(tokens, specialTokens = false, lastTokens) {
244
+ this._ensureNotDisposed();
245
+ if (tokens.length === 0)
246
+ return "";
247
+ if (lastTokens == null || lastTokens.length === 0)
248
+ return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
249
+ const addedTokens = lastTokens.slice(-maxRecentDetokenizerTokens);
250
+ const addedTokensText = this._model.detokenize(Uint32Array.from(addedTokens), Boolean(specialTokens));
251
+ if (addedTokensText === "")
252
+ return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
253
+ const text = this._model.detokenize(Uint32Array.from([...addedTokens, ...tokens]), Boolean(specialTokens));
254
+ if (text.startsWith(addedTokensText))
255
+ return text.slice(addedTokensText.length);
256
+ return this._model.detokenize(Uint32Array.from(tokens), Boolean(specialTokens));
257
+ }
258
+ getTokenAttributes(token) {
259
+ if (token == null)
260
+ throw new Error("Token cannot be null");
261
+ if (this.vocabularyType === LlamaVocabularyType.none)
262
+ return TokenAttributes._create(token, TokenAttribute.undefined);
263
+ return TokenAttributes._create(token, this._model.getTokenAttributes(token));
264
+ }
265
+ /** Check whether the given token is a special token (a control-type token or a token with no normal text representation) */
266
+ isSpecialToken(token) {
267
+ if (token == null)
268
+ return false;
269
+ if (this.getTokenAttributes(token).control)
270
+ return true;
271
+ const normalText = this.detokenize([token], false);
272
+ if (normalText === "")
273
+ return this.detokenize([token], true) !== "";
274
+ return false;
275
+ }
276
+ *iterateAllTokens() {
277
+ if (this.vocabularyType === LlamaVocabularyType.none)
278
+ return;
279
+ const totalTokens = this.fileInfo.metadata?.tokenizer?.ggml?.tokens?.length;
280
+ if (typeof totalTokens !== "number")
281
+ return;
282
+ for (let i = 0; i < totalTokens; i++)
283
+ yield i;
284
+ }
285
+ /** Check whether the given token is an EOG (End Of Generation) token, like EOS or EOT. */
286
+ isEogToken(token) {
287
+ if (token == null)
288
+ return false;
289
+ return token === this.tokens.eos || token === this.tokens.eot || this._model.isEogToken(token);
290
+ }
291
+ async createContext(options = {}) {
292
+ if (this._vocabOnly)
293
+ throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
294
+ return await withLock(this._llama._memoryLock, LlamaLocks.loadToMemory, options.createSignal, async () => {
295
+ const preventDisposalHandle = this._backendModelDisposeGuard.createPreventDisposalHandle();
296
+ try {
297
+ return await LlamaContext._create(options, { _model: this });
298
+ }
299
+ finally {
300
+ preventDisposalHandle.dispose();
301
+ }
302
+ });
303
+ }
304
+ async createEmbeddingContext(options = {}) {
305
+ if (this._vocabOnly)
306
+ throw new Error("Model is loaded in vocabOnly mode, so no context can be created");
307
+ return await withLock(this._llama._memoryLock, LlamaLocks.loadToMemory, options.createSignal, async () => {
308
+ const preventDisposalHandle = this._backendModelDisposeGuard.createPreventDisposalHandle();
309
+ try {
310
+ return await LlamaEmbeddingContext._create({ _model: this }, options);
311
+ }
312
+ finally {
313
+ preventDisposalHandle.dispose();
314
+ }
315
+ });
316
+ }
317
+ /**
318
+ * Get warnings about the model file that would affect its usage.
319
+ *
320
+ * These warnings include all the warnings generated by `GgufInsights`, but are more comprehensive.
321
+ */
322
+ getWarnings() {
323
+ this._ensureNotDisposed();
324
+ const warnings = this._fileInsights.getWarnings(this._modelPath);
325
+ const modelFilePathText = `("${getReadablePath(this._modelPath)}")`;
326
+ try {
327
+ const beforeTextNoSpecialTokens = "some test text here";
328
+ const afterTextNoSpecialTokens = this.detokenize(this.tokenize(beforeTextNoSpecialTokens, false, "trimLeadingSpace"), false);
329
+ if (beforeTextNoSpecialTokens !== afterTextNoSpecialTokens)
330
+ warnings.push(`Using this model ${modelFilePathText} to tokenize text and then detokenize it resulted in a different text. ` +
331
+ "There might be an issue with the model or the tokenizer implementation. " +
332
+ "Using this model may not work as intended");
333
+ }
334
+ catch (err) {
335
+ // do nothing
336
+ }
337
+ try {
338
+ if (this._defaultContextFlashAttentionOptionEnabled && !this._flashAttentionSupported) {
339
+ if (this.fileInfo.metadata?.general?.architecture === GgufArchitectureType.grok)
340
+ warnings.push("Flash attention is incompatible with Grok and thus was turned off");
341
+ else if (this.fileInfo.metadata?.general?.architecture === GgufArchitectureType.gemma2)
342
+ warnings.push("Flash attention is incompatible with Gemma2 and thus was turned off");
343
+ else {
344
+ const nHead = this.fileInfo.architectureMetadata?.attention?.head_count ?? 0;
345
+ const nEmbd = this.fileInfo.architectureMetadata?.embedding_length ?? 0;
346
+ const nEmbdHeadK = this.fileInfo.architectureMetadata?.attention?.key_length ?? ((nHead == 0) ? 0 : (nEmbd / nHead));
347
+ const nEmbdHeadV = this.fileInfo.architectureMetadata?.attention?.value_length ?? ((nHead == 0) ? 0 : nEmbd / nHead);
348
+ if (nEmbdHeadK !== nEmbdHeadV)
349
+ warnings.push("Flash attention is incompatible with this model and thus was turned off");
350
+ }
351
+ }
352
+ }
353
+ catch (err) {
354
+ // do nothing
355
+ }
356
+ return warnings;
357
+ }
358
+ /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
359
+ get typeDescription() {
360
+ this._ensureNotDisposed();
361
+ if (this._typeDescription == null)
362
+ this._typeDescription = this._model.getModelDescription();
363
+ return this._typeDescription;
364
+ }
365
+ /** The context size the model was trained on */
366
+ get trainContextSize() {
367
+ this._ensureNotDisposed();
368
+ if (this._trainContextSize == null)
369
+ this._trainContextSize = this._model.getTrainContextSize();
370
+ return this._trainContextSize;
371
+ }
372
+ /** The size of an embedding vector the model can produce */
373
+ get embeddingVectorSize() {
374
+ this._ensureNotDisposed();
375
+ if (this._embeddingVectorSize == null)
376
+ this._embeddingVectorSize = this._model.getEmbeddingVectorSize();
377
+ return this._embeddingVectorSize;
378
+ }
379
+ get vocabularyType() {
380
+ this._ensureNotDisposed();
381
+ if (this._vocabularyType == null) {
382
+ const vocabType = this._model.getVocabularyType();
383
+ this._vocabularyType = LlamaVocabularyTypeValues[vocabType];
384
+ if (this._vocabularyType == null) {
385
+ console.warn(getConsoleLogPrefix() + "Unknown vocabulary type:", vocabType);
386
+ this._vocabularyType = LlamaVocabularyType.none;
387
+ }
388
+ }
389
+ return this._vocabularyType;
390
+ }
391
+ /** @internal */
392
+ _ensureNotDisposed() {
393
+ if (this._disposedState.disposed)
394
+ throw new DisposedError();
395
+ }
396
+ /** @internal */
397
+ async _getOrLoadLora(filePath) {
398
+ const resolvedPath = path.resolve(process.cwd(), filePath);
399
+ if (this._loraAdapters.has(resolvedPath))
400
+ return this._loraAdapters.get(resolvedPath);
401
+ return await withLock(this._loraAdapters, "modify", async () => {
402
+ if (this._loraAdapters.has(resolvedPath))
403
+ return this._loraAdapters.get(resolvedPath);
404
+ const lora = new this._llama._bindings.AddonModelLora(this._model, resolvedPath);
405
+ await this._model.loadLora(lora);
406
+ this._loraAdapters.set(resolvedPath, lora);
407
+ return lora;
408
+ });
409
+ }
410
+ /** @internal */
411
+ async _removeLoraUsage(loraAdapters) {
412
+ return await withLock(this._loraAdapters, "modify", async () => {
413
+ await Promise.all([...loraAdapters].map(async (lora) => {
414
+ lora.usages--;
415
+ if (lora.usages <= 0 && this._loraAdapters.get(lora.filePath) === lora) {
416
+ this._loraAdapters.delete(lora.filePath);
417
+ await lora.dispose();
418
+ }
419
+ }));
420
+ });
421
+ }
422
+ /** @internal */
423
+ static async _create(modelOptions, { _llama }) {
424
+ const { loadSignal, defaultContextFlashAttention } = modelOptions;
425
+ const useMmap = modelOptions.useMmap ?? defaultUseMmap;
426
+ const fileInfo = await readGgufFileInfo(modelOptions.modelPath, {
427
+ sourceType: "filesystem",
428
+ signal: loadSignal
429
+ });
430
+ applyGgufMetadataOverrides(fileInfo, modelOptions.metadataOverrides);
431
+ const ggufInsights = await GgufInsights.from(fileInfo, _llama);
432
+ const flashAttentionSupported = ggufInsights.flashAttentionSupported;
433
+ const resolvedDefaultContextFlashAttention = flashAttentionSupported
434
+ ? (defaultContextFlashAttention ?? defaultContextFlashAttentionEnabled)
435
+ : false;
436
+ const gpuLayers = await ggufInsights.configurationResolver.resolveModelGpuLayers(modelOptions.gpuLayers, {
437
+ ignoreMemorySafetyChecks: modelOptions.ignoreMemorySafetyChecks,
438
+ defaultContextFlashAttention: resolvedDefaultContextFlashAttention
439
+ });
440
+ const vramRequiredEstimate = ggufInsights.estimateModelResourceRequirements({ gpuLayers: gpuLayers }).gpuVram;
441
+ const model = new LlamaModel({ ...modelOptions, gpuLayers, useMmap }, {
442
+ _fileInfo: fileInfo,
443
+ _fileInsights: ggufInsights,
444
+ _llama,
445
+ _defaultContextFlashAttentionOptionEnabled: defaultContextFlashAttention ?? false,
446
+ _flashAttentionSupported: flashAttentionSupported,
447
+ _defaultContextFlashAttention: resolvedDefaultContextFlashAttention
448
+ });
449
+ const modelCreationMemoryReservation = modelOptions.ignoreMemorySafetyChecks
450
+ ? null
451
+ : _llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
452
+ const loggedWarnings = new Set();
453
+ function onAbort() {
454
+ model._model.abortActiveModelLoad();
455
+ loadSignal?.removeEventListener("abort", onAbort);
456
+ }
457
+ function logWarnings(warnings) {
458
+ for (const warning of warnings) {
459
+ if (loggedWarnings.has(warning))
460
+ continue;
461
+ _llama._log(LlamaLogLevel.warn, warning);
462
+ loggedWarnings.add(warning);
463
+ }
464
+ }
465
+ if (loadSignal != null) {
466
+ if (loadSignal.aborted)
467
+ throw loadSignal.reason;
468
+ loadSignal.addEventListener("abort", onAbort);
469
+ }
470
+ logWarnings(ggufInsights.getWarnings(modelOptions.modelPath));
471
+ try {
472
+ const modelLoaded = await model._model.init();
473
+ if (loadSignal?.aborted) {
474
+ if (modelLoaded)
475
+ await model._model.dispose();
476
+ throw loadSignal.reason;
477
+ }
478
+ else if (!modelLoaded)
479
+ throw new Error("Failed to load model");
480
+ loadSignal?.removeEventListener("abort", onAbort);
481
+ logWarnings(model.getWarnings());
482
+ return model;
483
+ }
484
+ finally {
485
+ loadSignal?.removeEventListener("abort", onAbort);
486
+ modelCreationMemoryReservation?.dispose?.();
487
+ }
488
+ }
489
+ }
490
+ export class LlamaModelTokens {
491
+ /** @internal */ _model;
492
+ /** @internal */ _disposedState;
493
+ /** @internal */ _infillTokens;
494
+ /** @internal */ _bosToken;
495
+ /** @internal */ _eosToken;
496
+ /** @internal */ _eotToken;
497
+ /** @internal */ _nlToken;
498
+ /** @internal */ _bosString;
499
+ /** @internal */ _eosString;
500
+ /** @internal */ _eotString;
501
+ /** @internal */ _nlString;
502
+ /** @internal */ _shouldPrependBosToken;
503
+ constructor(model, disposedState) {
504
+ this._model = model;
505
+ this._disposedState = disposedState;
506
+ }
507
+ /**
508
+ * @returns infill tokens
509
+ */
510
+ get infill() {
511
+ this._ensureNotDisposed();
512
+ if (this._infillTokens == null)
513
+ this._infillTokens = LlamaModelInfillTokens._create(this._model, this._disposedState);
514
+ return this._infillTokens;
515
+ }
516
+ /**
517
+ * @returns The BOS (Beginning Of Sequence) token.
518
+ */
519
+ get bos() {
520
+ this._ensureNotDisposed();
521
+ if (this._bosToken == null)
522
+ this._bosToken = this._model.tokenBos();
523
+ if (this._bosToken === -1)
524
+ return null;
525
+ return this._bosToken;
526
+ }
527
+ /**
528
+ * @returns The EOS (End Of Sequence) token.
529
+ */
530
+ get eos() {
531
+ this._ensureNotDisposed();
532
+ if (this._eosToken == null)
533
+ this._eosToken = this._model.tokenEos();
534
+ if (this._eosToken === -1)
535
+ return null;
536
+ return this._eosToken;
537
+ }
538
+ /**
539
+ * @returns The EOT (End Of Turn) token.
540
+ */
541
+ get eot() {
542
+ this._ensureNotDisposed();
543
+ if (this._eotToken == null)
544
+ this._eotToken = this._model.eotToken();
545
+ if (this._eotToken === -1)
546
+ return null;
547
+ return this._eotToken;
548
+ }
549
+ /**
550
+ * @returns The NL (New Line) token.
551
+ */
552
+ get nl() {
553
+ this._ensureNotDisposed();
554
+ if (this._nlToken == null)
555
+ this._nlToken = this._model.tokenNl();
556
+ if (this._nlToken === -1)
557
+ return null;
558
+ return this._nlToken;
559
+ }
560
+ /**
561
+ * @returns The BOS (Beginning Of Sequence) token text representation.
562
+ */
563
+ get bosString() {
564
+ this._ensureNotDisposed();
565
+ const bosToken = this.bos;
566
+ if (bosToken == null)
567
+ return null;
568
+ if (this._bosString == null)
569
+ this._bosString = this._model.getTokenString(bosToken);
570
+ return this._bosString;
571
+ }
572
+ /**
573
+ * @returns The EOS (End Of Sequence) token text representation.
574
+ */
575
+ get eosString() {
576
+ this._ensureNotDisposed();
577
+ const eosToken = this.eos;
578
+ if (eosToken == null)
579
+ return null;
580
+ if (this._eosString == null)
581
+ this._eosString = this._model.getTokenString(eosToken);
582
+ return this._eosString;
583
+ }
584
+ /**
585
+ * @returns The EOT (End Of Turn) token text representation.
586
+ */
587
+ get eotString() {
588
+ this._ensureNotDisposed();
589
+ const eotToken = this.eot;
590
+ if (eotToken == null)
591
+ return null;
592
+ if (this._eotString == null)
593
+ this._eotString = this._model.getTokenString(eotToken);
594
+ return this._eotString;
595
+ }
596
+ /**
597
+ * @returns The NL (New Line) token text representation.
598
+ */
599
+ get nlString() {
600
+ this._ensureNotDisposed();
601
+ const nlToken = this.nl;
602
+ if (nlToken == null)
603
+ return null;
604
+ if (this._nlString == null)
605
+ this._nlString = this._model.getTokenString(nlToken);
606
+ return this._nlString;
607
+ }
608
+ /**
609
+ * @returns Whether we should prepend a BOS (Beginning Of Sequence) token for evaluations with this model.
610
+ */
611
+ get shouldPrependBosToken() {
612
+ this._ensureNotDisposed();
613
+ if (this._shouldPrependBosToken == null)
614
+ this._shouldPrependBosToken = this.bos != null && this._model.shouldPrependBosToken();
615
+ return this._shouldPrependBosToken;
616
+ }
617
+ /** @internal */
618
+ _ensureNotDisposed() {
619
+ if (this._disposedState.disposed)
620
+ throw new DisposedError();
621
+ }
622
+ /** @internal */
623
+ static _create(model, disposedState) {
624
+ return new LlamaModelTokens(model, disposedState);
625
+ }
626
+ }
627
+ export class LlamaModelInfillTokens {
628
+ /** @internal */ _model;
629
+ /** @internal */ _disposedState;
630
+ /** @internal */ _prefixToken;
631
+ /** @internal */ _middleToken;
632
+ /** @internal */ _suffixToken;
633
+ /** @internal */ _prefixString;
634
+ /** @internal */ _middleString;
635
+ /** @internal */ _suffixString;
636
+ constructor(model, disposedState) {
637
+ this._model = model;
638
+ this._disposedState = disposedState;
639
+ }
640
+ /**
641
+ * @returns The beginning of infill prefix token.
642
+ */
643
+ get prefix() {
644
+ this._ensureNotDisposed();
645
+ if (this._prefixToken == null)
646
+ this._prefixToken = this._resolveSpecialToken(this._model.prefixToken(), ["<fim_prefix>"]);
647
+ if (this._prefixToken === -1)
648
+ return null;
649
+ return this._prefixToken;
650
+ }
651
+ /**
652
+ * @returns The beginning of infill middle token.
653
+ */
654
+ get middle() {
655
+ this._ensureNotDisposed();
656
+ if (this._middleToken == null)
657
+ this._middleToken = this._resolveSpecialToken(this._model.middleToken(), ["<fim_middle>"]);
658
+ if (this._middleToken === -1)
659
+ return null;
660
+ return this._middleToken;
661
+ }
662
+ /**
663
+ * @returns The beginning of infill suffix token.
664
+ */
665
+ get suffix() {
666
+ this._ensureNotDisposed();
667
+ if (this._suffixToken == null)
668
+ this._suffixToken = this._resolveSpecialToken(this._model.suffixToken(), ["<fim_suffix>"]);
669
+ if (this._suffixToken === -1)
670
+ return null;
671
+ return this._suffixToken;
672
+ }
673
+ /**
674
+ * @returns The beginning of infill prefix token as a string.
675
+ */
676
+ get prefixString() {
677
+ this._ensureNotDisposed();
678
+ const prefixToken = this.prefix;
679
+ if (prefixToken == null)
680
+ return null;
681
+ if (this._prefixString == null)
682
+ this._prefixString = this._model.getTokenString(prefixToken);
683
+ return this._prefixString;
684
+ }
685
+ /**
686
+ * @returns The beginning of infill middle token as a string.
687
+ */
688
+ get middleString() {
689
+ this._ensureNotDisposed();
690
+ const middleToken = this.middle;
691
+ if (middleToken == null)
692
+ return null;
693
+ if (this._middleString == null)
694
+ this._middleString = this._model.getTokenString(middleToken);
695
+ return this._middleString;
696
+ }
697
+ /**
698
+ * @returns The beginning of infill suffix token as a string.
699
+ */
700
+ get suffixString() {
701
+ this._ensureNotDisposed();
702
+ const suffixToken = this.suffix;
703
+ if (suffixToken == null)
704
+ return null;
705
+ if (this._suffixString == null)
706
+ this._suffixString = this._model.getTokenString(suffixToken);
707
+ return this._suffixString;
708
+ }
709
+ /** @internal */
710
+ _ensureNotDisposed() {
711
+ if (this._disposedState.disposed)
712
+ throw new DisposedError();
713
+ }
714
+ /** @internal */
715
+ _resolveSpecialToken(token, fallbackTexts) {
716
+ if (token != null && token !== -1)
717
+ return token;
718
+ for (const text of fallbackTexts) {
719
+ const tokens = this._model.tokenize(text, true);
720
+ if (tokens.length !== 1)
721
+ continue;
722
+ return tokens[0];
723
+ }
724
+ return -1;
725
+ }
726
+ /** @internal */
727
+ static _create(model, disposedState) {
728
+ return new LlamaModelInfillTokens(model, disposedState);
729
+ }
730
+ }
731
+ function applyGgufMetadataOverrides(ggufFileInfo, overrides) {
732
+ function applyOverride(object, override) {
733
+ if (override == null || object == null)
734
+ return;
735
+ if (object instanceof Array || typeof object !== "object" || typeof override !== "object")
736
+ return;
737
+ for (const [key, value] of Object.entries(override)) {
738
+ if (value instanceof Array || typeof value !== "object" || (typeof value === "object" && typeof object[key] !== "object"))
739
+ object[key] = value;
740
+ else
741
+ applyOverride(object[key], value);
742
+ }
743
+ }
744
+ applyOverride(ggufFileInfo.metadata, overrides);
745
+ }
746
+ function ggufMetadataOverridesToList(overrides) {
747
+ const maxStringLength = 127;
748
+ const maxKeyLength = 127;
749
+ const res = [];
750
+ function addItem(object, path) {
751
+ if (object == null || object instanceof Array)
752
+ return;
753
+ if (typeof object !== "object") {
754
+ if (typeof object === "string" && object.length > maxStringLength)
755
+ throw new Error(`Metadata key "${path.join(".")}" override string value (${JSON.stringify(object)}) is longer than ${maxStringLength} characters`);
756
+ const key = path.join(".");
757
+ if (key.length > maxKeyLength)
758
+ throw new Error(`Metadata key "${key}" override path is longer than ${maxKeyLength} characters`);
759
+ let type = undefined;
760
+ if (typeof object === "number") {
761
+ if (typeof object === "bigint" || Number.isInteger(object))
762
+ type = 0;
763
+ else
764
+ type = 1;
765
+ }
766
+ res.push([key, object, type]);
767
+ return;
768
+ }
769
+ for (const [key, value] of Object.entries(object))
770
+ addItem(value, [...path, key]);
771
+ }
772
+ addItem(overrides ?? {}, []);
773
+ return res;
774
+ }
775
+ function disposeModelIfReferenced(modelRef) {
776
+ const model = modelRef.deref();
777
+ if (model != null)
778
+ void model.dispose();
779
+ }
780
+ //# sourceMappingURL=LlamaModel.js.map