node-llama-cpp 3.0.0-beta.9 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (684) hide show
  1. package/README.md +42 -27
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-cuda.moved.txt +1 -0
  5. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  6. package/bins/_linux-x64.moved.txt +1 -0
  7. package/bins/_mac-arm64-metal.moved.txt +1 -0
  8. package/bins/_mac-x64.moved.txt +1 -0
  9. package/bins/_win-arm64.moved.txt +1 -0
  10. package/bins/_win-x64-cuda.moved.txt +1 -0
  11. package/bins/_win-x64-vulkan.moved.txt +1 -0
  12. package/bins/_win-x64.moved.txt +1 -0
  13. package/dist/ChatWrapper.d.ts +19 -39
  14. package/dist/ChatWrapper.js +129 -72
  15. package/dist/ChatWrapper.js.map +1 -1
  16. package/dist/apiDocsIndex.d.ts +1 -0
  17. package/dist/apiDocsIndex.js +7 -0
  18. package/dist/apiDocsIndex.js.map +1 -0
  19. package/dist/bindings/AddonTypes.d.ts +88 -20
  20. package/dist/bindings/Llama.d.ts +43 -3
  21. package/dist/bindings/Llama.js +193 -23
  22. package/dist/bindings/Llama.js.map +1 -1
  23. package/dist/bindings/consts.d.ts +2 -0
  24. package/dist/bindings/consts.js +13 -0
  25. package/dist/bindings/consts.js.map +1 -0
  26. package/dist/bindings/getLlama.d.ts +123 -18
  27. package/dist/bindings/getLlama.js +264 -75
  28. package/dist/bindings/getLlama.js.map +1 -1
  29. package/dist/bindings/types.d.ts +29 -5
  30. package/dist/bindings/types.js +51 -2
  31. package/dist/bindings/types.js.map +1 -1
  32. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  33. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  34. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  35. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  36. package/dist/bindings/utils/asyncEvery.js +15 -0
  37. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  38. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  39. package/dist/bindings/utils/asyncSome.js +27 -0
  40. package/dist/bindings/utils/asyncSome.js.map +1 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
  46. package/dist/bindings/utils/compileLLamaCpp.js +250 -81
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  52. package/dist/bindings/utils/detectGlibc.js +46 -0
  53. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  55. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  56. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
  58. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  59. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
  60. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  61. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  62. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  63. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  64. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  65. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  66. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  67. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  68. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  69. package/dist/bindings/utils/getPlatform.js.map +1 -1
  70. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  71. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  72. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  73. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  74. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  75. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  76. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  77. package/dist/bindings/utils/hasFileInPath.js +34 -0
  78. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  79. package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
  80. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  81. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  82. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  83. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  84. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  85. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  86. package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
  87. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  88. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  89. package/dist/bindings/utils/testBindingBinary.js +100 -0
  90. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  91. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  92. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  93. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  94. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  95. package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
  96. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  97. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
  98. package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
  99. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  100. package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
  101. package/dist/chatWrappers/FalconChatWrapper.js +39 -21
  102. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  103. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
  104. package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
  105. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  107. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  108. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  109. package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
  110. package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
  111. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  113. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
  114. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  115. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  116. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  117. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  118. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
  119. package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
  120. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  121. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  122. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  123. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  124. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
  125. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  127. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
  128. package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  130. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
  131. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  133. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
  134. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  136. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  137. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  138. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
  141. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  142. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  143. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  144. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  145. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
  146. package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
  147. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  148. package/dist/cli/cli.js +19 -11
  149. package/dist/cli/cli.js.map +1 -1
  150. package/dist/cli/commands/ChatCommand.d.ts +16 -7
  151. package/dist/cli/commands/ChatCommand.js +321 -190
  152. package/dist/cli/commands/ChatCommand.js.map +1 -1
  153. package/dist/cli/commands/CompleteCommand.d.ts +31 -0
  154. package/dist/cli/commands/CompleteCommand.js +402 -0
  155. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  156. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  157. package/dist/cli/commands/DebugCommand.js +54 -0
  158. package/dist/cli/commands/DebugCommand.js.map +1 -0
  159. package/dist/cli/commands/InfillCommand.d.ts +33 -0
  160. package/dist/cli/commands/InfillCommand.js +438 -0
  161. package/dist/cli/commands/InfillCommand.js.map +1 -0
  162. package/dist/cli/commands/InitCommand.d.ts +11 -0
  163. package/dist/cli/commands/InitCommand.js +195 -0
  164. package/dist/cli/commands/InitCommand.js.map +1 -0
  165. package/dist/cli/commands/OnPostInstallCommand.js +6 -2
  166. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  167. package/dist/cli/commands/PullCommand.d.ts +13 -0
  168. package/dist/cli/commands/PullCommand.js +158 -0
  169. package/dist/cli/commands/PullCommand.js.map +1 -0
  170. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  171. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  172. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  173. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  174. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  175. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  176. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  177. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  179. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  180. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  182. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
  183. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  185. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  186. package/dist/cli/commands/source/SourceCommand.js +19 -0
  187. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  188. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  189. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  190. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  191. package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
  192. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
  193. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  194. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
  195. package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
  196. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  197. package/dist/cli/projectTemplates.d.ts +7 -0
  198. package/dist/cli/projectTemplates.js +10 -0
  199. package/dist/cli/projectTemplates.js.map +1 -0
  200. package/dist/cli/recommendedModels.d.ts +2 -0
  201. package/dist/cli/recommendedModels.js +585 -0
  202. package/dist/cli/recommendedModels.js.map +1 -0
  203. package/dist/cli/startCreateCli.d.ts +2 -0
  204. package/dist/cli/startCreateCli.js +26 -0
  205. package/dist/cli/startCreateCli.js.map +1 -0
  206. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  207. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  208. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  209. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  210. package/dist/cli/utils/ConsoleTable.js +86 -0
  211. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  212. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  213. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  214. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  215. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  216. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  217. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  218. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  219. package/dist/cli/utils/getReadablePath.js +14 -0
  220. package/dist/cli/utils/getReadablePath.js.map +1 -0
  221. package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
  222. package/dist/cli/utils/interactivelyAskForModel.js +450 -0
  223. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  224. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  225. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  226. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  227. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  228. package/dist/cli/utils/printCommonInfoLines.js +82 -0
  229. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  230. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  231. package/dist/cli/utils/printInfoLine.js +54 -0
  232. package/dist/cli/utils/printInfoLine.js.map +1 -0
  233. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  234. package/dist/cli/utils/projectTemplates.js +47 -0
  235. package/dist/cli/utils/projectTemplates.js.map +1 -0
  236. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  237. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  238. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  239. package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
  240. package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
  241. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  242. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  243. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  244. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  245. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  246. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  247. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  248. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  249. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  250. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  251. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  252. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  253. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  254. package/dist/commands.d.ts +4 -3
  255. package/dist/commands.js +6 -3
  256. package/dist/commands.js.map +1 -1
  257. package/dist/config.d.ts +35 -4
  258. package/dist/config.js +58 -17
  259. package/dist/config.js.map +1 -1
  260. package/dist/consts.d.ts +4 -0
  261. package/dist/consts.js +11 -0
  262. package/dist/consts.js.map +1 -0
  263. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
  264. package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
  265. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  266. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  267. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  268. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  269. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  270. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  271. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  272. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  273. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  274. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  275. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
  276. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  277. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
  278. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
  279. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  280. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
  281. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
  282. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  283. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
  284. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  285. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  286. package/dist/evaluator/LlamaCompletion.d.ts +168 -0
  287. package/dist/evaluator/LlamaCompletion.js +470 -0
  288. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  289. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
  290. package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
  291. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  292. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  293. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  294. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  295. package/dist/evaluator/LlamaContext/types.d.ts +177 -16
  296. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  297. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  298. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  299. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  301. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  302. package/dist/evaluator/LlamaEmbedding.js +53 -0
  303. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  304. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
  305. package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
  306. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  307. package/dist/evaluator/LlamaGrammar.d.ts +16 -13
  308. package/dist/evaluator/LlamaGrammar.js +17 -10
  309. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  310. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  311. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  312. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  313. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  314. package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
  315. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  316. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
  317. package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
  318. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  319. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  320. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  321. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  322. package/dist/evaluator/TokenBias.d.ts +34 -0
  323. package/dist/evaluator/TokenBias.js +65 -0
  324. package/dist/evaluator/TokenBias.js.map +1 -0
  325. package/dist/evaluator/TokenMeter.d.ts +45 -0
  326. package/dist/evaluator/TokenMeter.js +74 -0
  327. package/dist/evaluator/TokenMeter.js.map +1 -0
  328. package/dist/gguf/consts.d.ts +4 -0
  329. package/dist/gguf/consts.js +12 -0
  330. package/dist/gguf/consts.js.map +1 -0
  331. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  332. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  333. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  334. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  335. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  336. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  337. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  338. package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
  339. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  340. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  341. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  342. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  343. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
  344. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
  345. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  346. package/dist/gguf/insights/GgufInsights.d.ts +50 -0
  347. package/dist/gguf/insights/GgufInsights.js +401 -0
  348. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  349. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
  350. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
  351. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  352. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
  353. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
  354. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  355. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
  356. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
  357. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  358. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  359. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  360. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  361. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  362. package/dist/gguf/parser/GgufV2Parser.js +156 -0
  363. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  364. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  365. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  366. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  367. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  368. package/dist/gguf/parser/parseGguf.js +61 -0
  369. package/dist/gguf/parser/parseGguf.js.map +1 -0
  370. package/dist/gguf/readGgufFileInfo.d.ts +45 -0
  371. package/dist/gguf/readGgufFileInfo.js +71 -0
  372. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  373. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  374. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  375. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  376. package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
  377. package/dist/gguf/types/GgufMetadataTypes.js +114 -0
  378. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  379. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  380. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  381. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  382. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  383. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  384. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  385. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  386. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  387. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  388. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  389. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  390. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  391. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  392. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  393. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  394. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  395. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  396. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  397. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  398. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  399. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  400. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  401. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  402. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  403. package/dist/index.d.ts +39 -14
  404. package/dist/index.js +29 -8
  405. package/dist/index.js.map +1 -1
  406. package/dist/state.d.ts +2 -0
  407. package/dist/state.js +7 -0
  408. package/dist/state.js.map +1 -1
  409. package/dist/tsconfig.tsbuildinfo +1 -0
  410. package/dist/types.d.ts +131 -5
  411. package/dist/types.js.map +1 -1
  412. package/dist/utils/DisposeGuard.d.ts +13 -0
  413. package/dist/utils/DisposeGuard.js +120 -0
  414. package/dist/utils/DisposeGuard.js.map +1 -0
  415. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  416. package/dist/utils/InsufficientMemoryError.js +6 -0
  417. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  418. package/dist/utils/LlamaText.d.ts +73 -26
  419. package/dist/utils/LlamaText.js +475 -157
  420. package/dist/utils/LlamaText.js.map +1 -1
  421. package/dist/utils/LruCache.d.ts +12 -0
  422. package/dist/utils/LruCache.js +44 -0
  423. package/dist/utils/LruCache.js.map +1 -0
  424. package/dist/utils/OverridesObject.d.ts +7 -0
  425. package/dist/utils/OverridesObject.js +2 -0
  426. package/dist/utils/OverridesObject.js.map +1 -0
  427. package/dist/utils/ReplHistory.js +5 -1
  428. package/dist/utils/ReplHistory.js.map +1 -1
  429. package/dist/utils/StopGenerationDetector.d.ts +27 -8
  430. package/dist/utils/StopGenerationDetector.js +108 -22
  431. package/dist/utils/StopGenerationDetector.js.map +1 -1
  432. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  433. package/dist/utils/ThreadsSplitter.js +177 -0
  434. package/dist/utils/ThreadsSplitter.js.map +1 -0
  435. package/dist/utils/TokenStreamRegulator.d.ts +10 -4
  436. package/dist/utils/TokenStreamRegulator.js +102 -10
  437. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  438. package/dist/utils/UnsupportedError.d.ts +2 -0
  439. package/dist/utils/UnsupportedError.js +7 -0
  440. package/dist/utils/UnsupportedError.js.map +1 -0
  441. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  442. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  443. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  444. package/dist/utils/clearTempFolder.js.map +1 -1
  445. package/dist/utils/cmake.js +23 -10
  446. package/dist/utils/cmake.js.map +1 -1
  447. package/dist/utils/compareTokens.d.ts +1 -1
  448. package/dist/utils/compareTokens.js.map +1 -1
  449. package/dist/utils/createModelDownloader.d.ts +199 -0
  450. package/dist/utils/createModelDownloader.js +405 -0
  451. package/dist/utils/createModelDownloader.js.map +1 -0
  452. package/dist/utils/findBestOption.d.ts +4 -0
  453. package/dist/utils/findBestOption.js +15 -0
  454. package/dist/utils/findBestOption.js.map +1 -0
  455. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
  456. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
  457. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  458. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  459. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  460. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  461. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  462. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  463. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  464. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  465. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  466. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  467. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  468. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  469. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  470. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  471. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  472. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  473. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  474. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  475. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  476. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  477. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  478. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  479. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  480. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  481. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  482. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  483. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  484. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  485. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  486. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  487. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  488. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  489. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  490. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  491. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  492. package/dist/utils/gbnfJson/types.d.ts +3 -0
  493. package/dist/utils/gbnfJson/types.js.map +1 -1
  494. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  495. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  496. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  497. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  498. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  499. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  500. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
  501. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  502. package/dist/utils/getBuildDefaults.d.ts +1 -2
  503. package/dist/utils/getBuildDefaults.js +2 -3
  504. package/dist/utils/getBuildDefaults.js.map +1 -1
  505. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  506. package/dist/utils/getConsoleLogPrefix.js +5 -4
  507. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  508. package/dist/utils/getGrammarsFolder.js +1 -1
  509. package/dist/utils/getGrammarsFolder.js.map +1 -1
  510. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  511. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  512. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  513. package/dist/utils/getReadableContextSize.d.ts +1 -0
  514. package/dist/utils/getReadableContextSize.js +7 -0
  515. package/dist/utils/getReadableContextSize.js.map +1 -0
  516. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  517. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  518. package/dist/utils/gitReleaseBundles.js +68 -1
  519. package/dist/utils/gitReleaseBundles.js.map +1 -1
  520. package/dist/utils/isToken.d.ts +2 -0
  521. package/dist/utils/isToken.js +4 -0
  522. package/dist/utils/isToken.js.map +1 -0
  523. package/dist/utils/isUrl.d.ts +1 -0
  524. package/dist/utils/isUrl.js +15 -0
  525. package/dist/utils/isUrl.js.map +1 -0
  526. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  527. package/dist/utils/mergeUnionTypes.js +2 -0
  528. package/dist/utils/mergeUnionTypes.js.map +1 -0
  529. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  530. package/dist/utils/modelFileAccesTokens.js +40 -0
  531. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  532. package/dist/utils/parseModelFileName.d.ts +1 -0
  533. package/dist/utils/parseModelFileName.js +6 -1
  534. package/dist/utils/parseModelFileName.js.map +1 -1
  535. package/dist/utils/parseTextTemplate.d.ts +66 -0
  536. package/dist/utils/parseTextTemplate.js +116 -0
  537. package/dist/utils/parseTextTemplate.js.map +1 -0
  538. package/dist/utils/prettyPrintObject.d.ts +10 -1
  539. package/dist/utils/prettyPrintObject.js +61 -15
  540. package/dist/utils/prettyPrintObject.js.map +1 -1
  541. package/dist/utils/pushAll.d.ts +6 -0
  542. package/dist/utils/pushAll.js +11 -0
  543. package/dist/utils/pushAll.js.map +1 -0
  544. package/dist/utils/removeNullFields.d.ts +2 -2
  545. package/dist/utils/removeNullFields.js.map +1 -1
  546. package/dist/utils/resolveGithubRelease.d.ts +2 -2
  547. package/dist/utils/resolveGithubRelease.js.map +1 -1
  548. package/dist/utils/resolveLastTokens.d.ts +2 -0
  549. package/dist/utils/resolveLastTokens.js +12 -0
  550. package/dist/utils/resolveLastTokens.js.map +1 -0
  551. package/dist/utils/runtime.d.ts +4 -0
  552. package/dist/utils/runtime.js +8 -0
  553. package/dist/utils/runtime.js.map +1 -0
  554. package/dist/utils/safeEventCallback.d.ts +6 -0
  555. package/dist/utils/safeEventCallback.js +29 -0
  556. package/dist/utils/safeEventCallback.js.map +1 -0
  557. package/dist/utils/spawnCommand.d.ts +11 -2
  558. package/dist/utils/spawnCommand.js +55 -7
  559. package/dist/utils/spawnCommand.js.map +1 -1
  560. package/dist/utils/tokenizeInput.d.ts +1 -1
  561. package/dist/utils/tokenizeInput.js +6 -3
  562. package/dist/utils/tokenizeInput.js.map +1 -1
  563. package/dist/utils/transformPromisable.d.ts +40 -0
  564. package/dist/utils/transformPromisable.js +64 -0
  565. package/dist/utils/transformPromisable.js.map +1 -0
  566. package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
  567. package/dist/utils/truncateTextAndRoundToWords.js +32 -0
  568. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  569. package/dist/utils/utilTypes.d.ts +3 -0
  570. package/dist/utils/utilTypes.js +2 -0
  571. package/dist/utils/utilTypes.js.map +1 -0
  572. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  573. package/dist/utils/withLockfile.js.map +1 -1
  574. package/dist/utils/withOra.d.ts +2 -0
  575. package/dist/utils/withOra.js +16 -6
  576. package/dist/utils/withOra.js.map +1 -1
  577. package/dist/utils/withProgressLog.d.ts +22 -0
  578. package/dist/utils/withProgressLog.js +211 -0
  579. package/dist/utils/withProgressLog.js.map +1 -0
  580. package/dist/utils/withStatusLogs.js +1 -1
  581. package/dist/utils/withStatusLogs.js.map +1 -1
  582. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  583. package/dist/utils/wrapAbortSignal.js +9 -0
  584. package/dist/utils/wrapAbortSignal.js.map +1 -0
  585. package/llama/CMakeLists.txt +134 -5
  586. package/llama/addon/AddonContext.cpp +629 -0
  587. package/llama/addon/AddonContext.h +52 -0
  588. package/llama/addon/AddonGrammar.cpp +39 -0
  589. package/llama/addon/AddonGrammar.h +19 -0
  590. package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
  591. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  592. package/llama/addon/AddonModel.cpp +672 -0
  593. package/llama/addon/AddonModel.h +61 -0
  594. package/llama/addon/AddonModelData.cpp +25 -0
  595. package/llama/addon/AddonModelData.h +15 -0
  596. package/llama/addon/AddonModelLora.cpp +105 -0
  597. package/llama/addon/AddonModelLora.h +28 -0
  598. package/llama/addon/AddonSampler.cpp +513 -0
  599. package/llama/addon/AddonSampler.h +65 -0
  600. package/llama/addon/RingBuffer.h +109 -0
  601. package/llama/addon/addon.cpp +223 -0
  602. package/llama/addon/addonGlobals.cpp +22 -0
  603. package/llama/addon/addonGlobals.h +12 -0
  604. package/llama/addon/globals/addonLog.cpp +136 -0
  605. package/llama/addon/globals/addonLog.h +21 -0
  606. package/llama/addon/globals/addonProgress.cpp +15 -0
  607. package/llama/addon/globals/addonProgress.h +15 -0
  608. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  609. package/llama/addon/globals/getGpuInfo.h +6 -0
  610. package/llama/binariesGithubRelease.json +1 -1
  611. package/llama/gitRelease.bundle +0 -0
  612. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  613. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  614. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  615. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  616. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  617. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  618. package/llama/grammars/README.md +297 -6
  619. package/llama/grammars/json.gbnf +4 -4
  620. package/llama/grammars/json_arr.gbnf +4 -4
  621. package/llama/llama.cpp.info.json +1 -1
  622. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  623. package/package.json +109 -59
  624. package/templates/packed/electron-typescript-react.json +1 -0
  625. package/templates/packed/node-typescript.json +1 -0
  626. package/dist/AbortError.d.ts +0 -2
  627. package/dist/AbortError.js +0 -7
  628. package/dist/AbortError.js.map +0 -1
  629. package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
  630. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  631. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  632. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
  633. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  634. package/dist/cli/commands/BuildCommand.d.ts +0 -11
  635. package/dist/cli/commands/BuildCommand.js +0 -106
  636. package/dist/cli/commands/BuildCommand.js.map +0 -1
  637. package/dist/cli/commands/ClearCommand.js.map +0 -1
  638. package/dist/cli/commands/DownloadCommand.js +0 -169
  639. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  640. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
  641. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
  642. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  643. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  644. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  645. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  646. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  647. package/dist/evaluator/LlamaModel.d.ts +0 -120
  648. package/dist/evaluator/LlamaModel.js +0 -320
  649. package/dist/evaluator/LlamaModel.js.map +0 -1
  650. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  651. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  652. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  653. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  654. package/dist/utils/parseModelTypeDescription.js +0 -9
  655. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  656. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  657. package/dist/utils/resolveChatWrapper.js +0 -16
  658. package/dist/utils/resolveChatWrapper.js.map +0 -1
  659. package/llama/addon.cpp +0 -950
  660. package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
  661. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  662. package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
  663. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  664. package/llamaBins/linux-x64/.buildMetadata.json +0 -1
  665. package/llamaBins/linux-x64/llama-addon.node +0 -0
  666. package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
  667. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  668. package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
  669. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
  670. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  671. package/llamaBins/mac-x64/.buildMetadata.json +0 -1
  672. package/llamaBins/mac-x64/llama-addon.node +0 -0
  673. package/llamaBins/win-x64/.buildMetadata.json +0 -1
  674. package/llamaBins/win-x64/llama-addon.exp +0 -0
  675. package/llamaBins/win-x64/llama-addon.lib +0 -0
  676. package/llamaBins/win-x64/llama-addon.node +0 -0
  677. package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
  678. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  679. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  680. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  681. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  682. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  683. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  684. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -1,77 +1,70 @@
1
- import { DisposeAggregator, EventRelay, withLock } from "lifecycle-utils";
1
+ import { AsyncDisposeAggregator, EventRelay, withLock } from "lifecycle-utils";
2
2
  import { tokenizeInput } from "../utils/tokenizeInput.js";
3
- import { LlamaContext } from "./LlamaContext/LlamaContext.js";
3
+ import { LlamaEmbedding } from "./LlamaEmbedding.js";
4
4
  export class LlamaEmbeddingContext {
5
5
  /** @internal */ _llamaContext;
6
6
  /** @internal */ _sequence;
7
- /** @internal */ _disposeAggregator = new DisposeAggregator();
7
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
8
8
  onDispose = new EventRelay();
9
- constructor({ model, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6 }) {
10
- const resolvedContextSize = Math.min(contextSize, model.trainContextSize);
11
- const resolvedBatchSize = Math.min(batchSize, resolvedContextSize);
12
- this._llamaContext = new LlamaContext({
13
- model,
14
- contextSize: resolvedContextSize,
15
- batchSize: resolvedBatchSize,
16
- threads,
17
- _embedding: true,
18
- _noSeed: true
19
- });
9
+ constructor({ _llamaContext }) {
10
+ this._llamaContext = _llamaContext;
20
11
  this._sequence = this._llamaContext.getSequence();
21
12
  this._disposeAggregator.add(this._llamaContext.onDispose.createListener(() => {
22
- this._disposeAggregator.dispose();
13
+ void this._disposeAggregator.dispose();
23
14
  }));
24
15
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
25
- this._disposeAggregator.add(() => {
26
- this._llamaContext.dispose();
16
+ this._disposeAggregator.add(async () => {
17
+ await this._llamaContext.dispose();
27
18
  });
28
19
  }
29
20
  async getEmbeddingFor(input) {
30
- const resolvedInput = tokenizeInput(input, this._llamaContext.model.tokenize);
21
+ const resolvedInput = tokenizeInput(input, this._llamaContext.model.tokenizer);
31
22
  if (resolvedInput.length > this._llamaContext.contextSize)
32
23
  throw new Error("Input is longer than the context size. " +
33
24
  "Try to increase the context size or use another model that supports longer contexts.");
34
25
  else if (resolvedInput.length === 0)
35
- return new LlamaEmbedding({ vector: [] });
26
+ return new LlamaEmbedding({
27
+ vector: []
28
+ });
36
29
  return await withLock(this, "evaluate", async () => {
37
30
  await this._sequence.eraseContextTokenRanges([{
38
31
  start: 0,
39
32
  end: this._sequence.nextTokenIndex
40
33
  }]);
41
- await this._sequence.evaluateWithoutGeneratingNewTokens(resolvedInput);
42
- const embedding = this._llamaContext._ctx.getEmbedding();
34
+ const iterator = this._sequence.evaluate(resolvedInput, { _noSampling: true });
35
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
36
+ for await (const token of iterator) {
37
+ break; // only generate one token to get embeddings
38
+ }
39
+ const embedding = this._llamaContext._ctx.getEmbedding(resolvedInput.length);
43
40
  const embeddingVector = Array.from(embedding);
44
- return new LlamaEmbedding({ vector: embeddingVector });
41
+ return new LlamaEmbedding({
42
+ vector: embeddingVector
43
+ });
45
44
  });
46
45
  }
47
- dispose() {
48
- this._disposeAggregator.dispose();
46
+ async dispose() {
47
+ await this._disposeAggregator.dispose();
49
48
  }
50
49
  /** @hidden */
51
- [Symbol.dispose]() {
50
+ [Symbol.asyncDispose]() {
52
51
  return this.dispose();
53
52
  }
54
53
  get disposed() {
55
54
  return this._llamaContext.disposed;
56
55
  }
57
- }
58
- export class LlamaEmbedding {
59
- vector;
60
- constructor({ vector }) {
61
- this.vector = vector;
62
- }
63
- toJSON() {
64
- return {
65
- type: "LlamaEmbedding",
66
- vector: this.vector
67
- };
68
- }
69
- static fromJSON(json) {
70
- if (json == null || json.type !== "LlamaEmbedding" || !(json.vector instanceof Array) ||
71
- json.vector.some(v => typeof v !== "number"))
72
- throw new Error("Invalid LlamaEmbedding JSON");
73
- return new LlamaEmbedding({
74
- vector: json.vector
56
+ /** @internal */
57
+ static async _create({ _model }, { contextSize, batchSize, threads = 6, createSignal, ignoreMemorySafetyChecks }) {
58
+ const llamaContext = await _model.createContext({
59
+ contextSize,
60
+ batchSize,
61
+ threads,
62
+ createSignal,
63
+ ignoreMemorySafetyChecks,
64
+ _embeddings: true
65
+ });
66
+ return new LlamaEmbeddingContext({
67
+ _llamaContext: llamaContext
75
68
  });
76
69
  }
77
70
  }
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaEmbeddingContext.js","sourceRoot":"","sources":["../../src/evaluator/LlamaEmbeddingContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,iBAAiB,EAAE,UAAU,EAAE,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAGxE,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AAExD,OAAO,EAAC,YAAY,EAAuB,MAAM,gCAAgC,CAAC;AAkBlF,MAAM,OAAO,qBAAqB;IAC9B,gBAAgB,CAAkB,aAAa,CAAe;IAC9D,gBAAgB,CAAkB,SAAS,CAAuB;IAClE,gBAAgB,CAAkB,kBAAkB,GAAG,IAAI,iBAAiB,EAAE,CAAC;IAE/D,SAAS,GAAG,IAAI,UAAU,EAAQ,CAAC;IAEnD,YAAmB,EACf,KAAK,EACL,WAAW,GAAG,KAAK,CAAC,gBAAgB,EACpC,SAAS,GAAG,WAAW,EACvB,OAAO,GAAG,CAAC,EACgB;QAC3B,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC1E,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,mBAAmB,CAAC,CAAC;QAEnE,IAAI,CAAC,aAAa,GAAG,IAAI,YAAY,CAAC;YAClC,KAAK;YACL,WAAW,EAAE,mBAAmB;YAChC,SAAS,EAAE,iBAAiB;YAC5B,OAAO;YACP,UAAU,EAAE,IAAI;YAChB,OAAO,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC;QAElD,IAAI,CAAC,kBAAkB,CAAC,GAAG,CACvB,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,cAAc,CAAC,GAAG,EAAE;YAC7C,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,CAAC;QACtC,CAAC,CAAC,CACL,CAAC;QACF,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QAC1D,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,GAAG,EAAE;YAC7B,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;QACjC,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,eAAe,CAAC,KAAmC;QAC5D,MAAM,aAAa,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAE9E,IAAI,aAAa,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW;YACrD,MAAM,IAAI,KAAK,CACX,yCAAyC;gBACzC,sFAAsF,CACzF,CAAC;aACD,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;YAC/B,OAAO,IAAI,cAAc,CAAC,EAAC,MAAM,EAAE,EAAE,EAAC,CAAC,CAAC;QAE5C,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;oBAC1C,KAAK,EAAE,CAAC;oBACR,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc;iBACrC,CAAC,CAAC,CAAC;YAEJ,MAAM,IAAI,CAAC,SAAS,CAAC,kCAAkC,CAAC,aAAa,CAAC,CAAC;YAEvE,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACzD,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE9C,OAAO,IAAI,cAAc,CAAC,EAAC,MAAM,EAAE,eAAe,EAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,OAAO;QACV,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,CAAC;IACtC,CAAC;IAED,cAAc;IACP,CAAC,MAAM,CAAC,OAAO,CAAC;QACnB,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;IAED,IAAW,QAAQ;QACf,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;IACvC,CAAC;CACJ;AAOD,MAAM,OAAO,cAAc;IACP,MAAM,CAAW;IAEjC,YAAmB,EAAC,MAAM,EAAqB;QAC3C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACzB,CAAC;IAEM,MAAM;QACT,OAAO;YACH,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE,IAAI,CAAC,MAAM;SACtB,CAAC;IACN,CAAC;IAEM,MAAM,CAAC,QAAQ,CAAC,IAAwB;QAC3C,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,gBAAgB,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,YAAY,KAAK,CAAC;YACjF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YAE5C,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAEnD,OAAO,IAAI,cAAc,CAAC;YACtB,MAAM,EAAE,IAAI,CAAC,MAAM;SACtB,CAAC,CAAC;IACP,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaEmbeddingContext.js","sourceRoot":"","sources":["../../src/evaluator/LlamaEmbeddingContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,sBAAsB,EAAE,UAAU,EAAE,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAG7E,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAC,cAAc,EAAC,MAAM,qBAAqB,CAAC;AA2CnD,MAAM,OAAO,qBAAqB;IAC9B,gBAAgB,CAAkB,aAAa,CAAe;IAC9D,gBAAgB,CAAkB,SAAS,CAAuB;IAClE,gBAAgB,CAAkB,kBAAkB,GAAG,IAAI,sBAAsB,EAAE,CAAC;IAEpE,SAAS,GAAG,IAAI,UAAU,EAAQ,CAAC;IAEnD,YAAoB,EAChB,aAAa,EAGhB;QACG,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC;QAElD,IAAI,CAAC,kBAAkB,CAAC,GAAG,CACvB,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,cAAc,CAAC,GAAG,EAAE;YAC7C,KAAK,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,CAAC;QAC3C,CAAC,CAAC,CACL,CAAC;QACF,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QAC1D,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE;YACnC,MAAM,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC;QACvC,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,eAAe,CAAC,KAAmC;QAC5D,MAAM,aAAa,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAE/E,IAAI,aAAa,CAAC,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW;YACrD,MAAM,IAAI,KAAK,CACX,yCAAyC;gBACzC,sFAAsF,CACzF,CAAC;aACD,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;YAC/B,OAAO,IAAI,cAAc,CAAC;gBACtB,MAAM,EAAE,EAAE;aACb,CAAC,CAAC;QAEP,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,KAAK,IAAI,EAAE;YAC/C,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;oBAC1C,KAAK,EAAE,CAAC;oBACR,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc;iBACrC,CAAC,CAAC,CAAC;YAEJ,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,EAAE,EAAC,WAAW,EAAE,IAAI,EAAC,CAAC,CAAC;YAC7E,6DAA6D;YAC7D,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBACjC,MAAM,CAAC,4CAA4C;YACvD,CAAC;YAED,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC7E,MAAM,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE9C,OAAO,IAAI,cAAc,CAAC;gBACtB,MAAM,EAAE,eAAe;aAC1B,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,OAAO;QAChB,MAAM,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,CAAC;IAC5C,CAAC;IAED,cAAc;IACP,CAAC,MAAM,CAAC,YAAY,CAAC;QACxB,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;IAED,IAAW,QAAQ;QACf,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;IACvC,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EACxB,MAAM,EAGT,EAAE,EACC,WAAW,EACX,SAAS,EACT,OAAO,GAAG,CAAC,EACX,YAAY,EACZ,wBAAwB,EACG;QAC3B,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC;YAC5C,WAAW;YACX,SAAS;YACT,OAAO;YACP,YAAY;YACZ,wBAAwB;YACxB,WAAW,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,OAAO,IAAI,qBAAqB,CAAC;YAC7B,aAAa,EAAE,YAAY;SAC9B,CAAC,CAAC;IACP,CAAC;CACJ"}
@@ -1,30 +1,33 @@
1
1
  import { LlamaText } from "../utils/LlamaText.js";
2
- import { StopGenerationTrigger } from "../utils/StopGenerationDetector.js";
3
2
  import { Llama } from "../bindings/Llama.js";
3
+ import { Token } from "../types.js";
4
4
  export type LlamaGrammarOptions = {
5
- llama: Llama;
6
5
  /** GBNF grammar */
7
6
  grammar: string;
8
- /** print the grammar to stdout */
9
- printGrammar?: boolean;
10
7
  /** Consider any of these as EOS for the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
11
- stopGenerationTriggers?: readonly (StopGenerationTrigger | LlamaText)[];
8
+ stopGenerationTriggers?: readonly (LlamaText | string | readonly (string | Token)[])[];
12
9
  /** Trim whitespace from the end of the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
13
10
  trimWhitespaceSuffix?: boolean;
11
+ /**
12
+ * Root rule name.
13
+ *
14
+ * Defaults to `"root"`.
15
+ */
16
+ rootRuleName?: string;
14
17
  };
15
18
  export declare class LlamaGrammar {
16
- private readonly _stopGenerationTriggers;
17
- private readonly _trimWhitespaceSuffix;
18
- private readonly _grammarText;
19
19
  /**
20
20
  * > GBNF files are supported.
21
- * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
22
- * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
21
+ * > More info here: [
22
+ * github:ggerganov/llama.cpp:grammars/README.md
23
+ * ](https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
24
+ * @param llama
23
25
  * @param options
24
26
  */
25
- constructor({ llama, grammar, stopGenerationTriggers, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
27
+ constructor(llama: Llama, { grammar, stopGenerationTriggers, trimWhitespaceSuffix, rootRuleName }: LlamaGrammarOptions);
26
28
  get grammar(): string;
27
- get stopGenerationTriggers(): readonly (StopGenerationTrigger | LlamaText)[];
29
+ get rootRuleName(): string;
30
+ get stopGenerationTriggers(): readonly (string | import("../utils/LlamaText.js")._LlamaText | readonly (string | Token)[])[];
28
31
  get trimWhitespaceSuffix(): boolean;
29
- static getFor(llama: Llama, type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
32
+ static getFor(llama: Llama, type: "json" | "json_arr" | "list" | "c" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
30
33
  }
@@ -5,27 +5,35 @@ import { LlamaText } from "../utils/LlamaText.js";
5
5
  export class LlamaGrammar {
6
6
  /** @internal */ _llama;
7
7
  /** @internal */ _grammar;
8
- _stopGenerationTriggers;
9
- _trimWhitespaceSuffix;
10
- _grammarText;
8
+ /** @internal */ _stopGenerationTriggers;
9
+ /** @internal */ _trimWhitespaceSuffix;
10
+ /** @internal */ _grammarText;
11
+ /** @internal */ _rootRuleName;
11
12
  /**
12
13
  * > GBNF files are supported.
13
- * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
14
- * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
14
+ * > More info here: [
15
+ * github:ggerganov/llama.cpp:grammars/README.md
16
+ * ](https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
17
+ * @param llama
15
18
  * @param options
16
19
  */
17
- constructor({ llama, grammar, stopGenerationTriggers = [], trimWhitespaceSuffix = false, printGrammar = false }) {
20
+ constructor(llama, { grammar, stopGenerationTriggers = [], trimWhitespaceSuffix = false, rootRuleName = "root" }) {
18
21
  this._llama = llama;
19
22
  this._grammar = new this._llama._bindings.AddonGrammar(grammar, {
20
- printGrammar
23
+ addonExports: this._llama._bindings,
24
+ rootRuleName
21
25
  });
22
26
  this._stopGenerationTriggers = stopGenerationTriggers ?? [];
23
27
  this._trimWhitespaceSuffix = trimWhitespaceSuffix;
24
28
  this._grammarText = grammar;
29
+ this._rootRuleName = rootRuleName;
25
30
  }
26
31
  get grammar() {
27
32
  return this._grammarText;
28
33
  }
34
+ get rootRuleName() {
35
+ return this._rootRuleName;
36
+ }
29
37
  get stopGenerationTriggers() {
30
38
  return this._stopGenerationTriggers;
31
39
  }
@@ -37,10 +45,9 @@ export class LlamaGrammar {
37
45
  const grammarFile = path.join(grammarsFolder, type + ".gbnf");
38
46
  if (await fs.pathExists(grammarFile)) {
39
47
  const grammar = await fs.readFile(grammarFile, "utf8");
40
- return new LlamaGrammar({
41
- llama,
48
+ return new LlamaGrammar(llama, {
42
49
  grammar,
43
- stopGenerationTriggers: [LlamaText(["\n".repeat(10)])],
50
+ stopGenerationTriggers: [LlamaText(["\n".repeat(10)])], // this is a workaround for the model not stopping to generate text,
44
51
  trimWhitespaceSuffix: true
45
52
  });
46
53
  }
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/evaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAsBhD,MAAM,OAAO,YAAY;IACrB,gBAAgB,CAAiB,MAAM,CAAQ;IAC/C,gBAAgB,CAAiB,QAAQ,CAAe;IACvC,uBAAuB,CAAiD;IACxE,qBAAqB,CAAU;IAC/B,YAAY,CAAS;IAEtC;;;;;OAKG;IACH,YAAmB,EACf,KAAK,EAAE,OAAO,EAAE,sBAAsB,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,EAC7E;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE;YAC5D,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,uBAAuB,GAAG,sBAAsB,IAAI,EAAE,CAAC;QAC5D,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,sBAAsB;QAC7B,OAAO,IAAI,CAAC,uBAAuB,CAAC;IACxC,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAY,EAAE,IAA2D;QAChG,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAEhE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,KAAK;gBACL,OAAO;gBACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBACtD,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/evaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAwBhD,MAAM,OAAO,YAAY;IACrB,gBAAgB,CAAiB,MAAM,CAAQ;IAC/C,gBAAgB,CAAiB,QAAQ,CAAe;IACxD,gBAAgB,CAAkB,uBAAuB,CAAgE;IACzH,gBAAgB,CAAkB,qBAAqB,CAAU;IACjE,gBAAgB,CAAkB,YAAY,CAAS;IACvD,gBAAgB,CAAkB,aAAa,CAAS;IAExD;;;;;;;OAOG;IACH,YAAmB,KAAY,EAAE,EAC7B,OAAO,EAAE,sBAAsB,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,MAAM,EACvE;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE;YAC5D,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YACnC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,uBAAuB,GAAG,sBAAsB,IAAI,EAAE,CAAC;QAC5D,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;QAC5B,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED,IAAW,sBAAsB;QAC7B,OAAO,IAAI,CAAC,uBAAuB,CAAC;IACxC,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAY,EAAE,IAA8E;QACnH,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAEhE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC,KAAK,EAAE;gBAC3B,OAAO;gBACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,oEAAoE;gBAC5H,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;QACP,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
@@ -1,15 +1,19 @@
1
- import { LlamaGrammar } from "./LlamaGrammar.js";
1
+ import type { LlamaGrammar } from "./LlamaGrammar.js";
2
+ import type { LlamaModel } from "./LlamaModel/LlamaModel.js";
2
3
  export type LlamaGrammarEvaluationStateOptions = {
4
+ model: LlamaModel;
3
5
  grammar: LlamaGrammar;
4
6
  };
5
7
  /**
6
8
  * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
9
+ *
7
10
  * Create a new grammar evaluation state for every response you generate with the model.
8
- * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
11
+ *
12
+ * This is only needed when using the `LlamaContext` class directly, since `LlamaChatSession` already handles this for you.
9
13
  */
10
14
  export declare class LlamaGrammarEvaluationState {
11
15
  /**
12
16
  * @param options
13
17
  */
14
- constructor({ grammar }: LlamaGrammarEvaluationStateOptions);
18
+ constructor({ model, grammar }: LlamaGrammarEvaluationStateOptions);
15
19
  }
@@ -1,7 +1,9 @@
1
1
  /**
2
2
  * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
3
+ *
3
4
  * Create a new grammar evaluation state for every response you generate with the model.
4
- * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
5
+ *
6
+ * This is only needed when using the `LlamaContext` class directly, since `LlamaChatSession` already handles this for you.
5
7
  */
6
8
  export class LlamaGrammarEvaluationState {
7
9
  /** @internal */ _llama;
@@ -9,9 +11,11 @@ export class LlamaGrammarEvaluationState {
9
11
  /**
10
12
  * @param options
11
13
  */
12
- constructor({ grammar }) {
13
- this._llama = grammar._llama;
14
- this._state = new grammar._llama._bindings.AddonGrammarEvaluationState(grammar._grammar);
14
+ constructor({ model, grammar }) {
15
+ this._llama = model._llama;
16
+ if (model._llama !== grammar._llama)
17
+ throw new Error("The given LlamaModel and LlamaGrammar must be from the same Llama instance");
18
+ this._state = new model._llama._bindings.AddonGrammarEvaluationState(model._model, grammar._grammar);
15
19
  }
16
20
  }
17
21
  //# sourceMappingURL=LlamaGrammarEvaluationState.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/evaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AASA;;;;GAIG;AACH,MAAM,OAAO,2BAA2B;IACpC,gBAAgB,CAAiB,MAAM,CAAQ;IAC/C,gBAAgB,CAAiB,MAAM,CAA8B;IAErE;;OAEG;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,IAAI,CAAC,MAAM,GAAG,IAAI,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC7F,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/evaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAWA;;;;;;GAMG;AACH,MAAM,OAAO,2BAA2B;IACpC,gBAAgB,CAAiB,MAAM,CAAQ;IAC/C,gBAAgB,CAAiB,MAAM,CAA8B;IAErE;;OAEG;IACH,YAAmB,EAAC,KAAK,EAAE,OAAO,EAAqC;QACnE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;QAE3B,IAAI,KAAK,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM;YAC/B,MAAM,IAAI,KAAK,CAAC,4EAA4E,CAAC,CAAC;QAElG,IAAI,CAAC,MAAM,GAAG,IAAI,KAAK,CAAC,MAAM,CAAC,SAAS,CAAC,2BAA2B,CAAC,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACzG,CAAC;CACJ"}
@@ -3,6 +3,9 @@ import { Llama } from "../bindings/Llama.js";
3
3
  import { LlamaGrammar } from "./LlamaGrammar.js";
4
4
  export declare class LlamaJsonSchemaGrammar<const T extends Readonly<GbnfJsonSchema>> extends LlamaGrammar {
5
5
  private readonly _schema;
6
+ /**
7
+ * Prefer to create a new instance of this class by using `llama.createGrammarForJsonSchema(...)`.
8
+ */
6
9
  constructor(llama: Llama, schema: T);
7
10
  parse(json: string): GbnfJsonSchemaToType<T>;
8
11
  }
@@ -1,13 +1,15 @@
1
- import { getGbnfGrammarForGbnfJsonSchema } from "../utils/getGbnfGrammarForGbnfJsonSchema.js";
1
+ import { getGbnfGrammarForGbnfJsonSchema } from "../utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js";
2
2
  import { validateObjectAgainstGbnfSchema } from "../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
3
3
  import { LlamaText } from "../utils/LlamaText.js";
4
4
  import { LlamaGrammar } from "./LlamaGrammar.js";
5
5
  export class LlamaJsonSchemaGrammar extends LlamaGrammar {
6
6
  _schema;
7
+ /**
8
+ * Prefer to create a new instance of this class by using `llama.createGrammarForJsonSchema(...)`.
9
+ */
7
10
  constructor(llama, schema) {
8
11
  const grammar = getGbnfGrammarForGbnfJsonSchema(schema);
9
- super({
10
- llama,
12
+ super(llama, {
11
13
  grammar,
12
14
  stopGenerationTriggers: [LlamaText(["\n".repeat(4)])],
13
15
  trimWhitespaceSuffix: true
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/evaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,6CAA6C,CAAC;AAC5F,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAEhD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B,YAAmB,KAAY,EAAE,MAAS;QACtC,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC;YACF,KAAK;YACL,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/evaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,sDAAsD,CAAC;AACrG,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAEhD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B;;OAEG;IACH,YAAmB,KAAY,EAAE,MAAS;QACtC,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC,KAAK,EAAE;YACT,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}
@@ -0,0 +1,255 @@
1
+ import { EventRelay } from "lifecycle-utils";
2
+ import { Token, Tokenizer } from "../../types.js";
3
+ import { ModelTypeDescription } from "../../bindings/AddonTypes.js";
4
+ import { LlamaVocabularyType } from "../../bindings/types.js";
5
+ import { GgufFileInfo } from "../../gguf/types/GgufFileInfoTypes.js";
6
+ import { GgufInsights } from "../../gguf/insights/GgufInsights.js";
7
+ import { LlamaContextOptions } from "../LlamaContext/types.js";
8
+ import { LlamaContext } from "../LlamaContext/LlamaContext.js";
9
+ import { LlamaEmbeddingContext, LlamaEmbeddingContextOptions } from "../LlamaEmbeddingContext.js";
10
+ import { GgufMetadata } from "../../gguf/types/GgufMetadataTypes.js";
11
+ import { OverridesObject } from "../../utils/OverridesObject.js";
12
+ import { TokenAttributes } from "./utils/TokenAttributes.js";
13
+ import type { BuiltinSpecialTokenValue } from "../../utils/LlamaText.js";
14
+ export type LlamaModelOptions = {
15
+ /** path to the model on the filesystem */
16
+ modelPath: string;
17
+ /**
18
+ * Number of layers to store in VRAM.
19
+ * - **`"auto"`** - adapt to the current VRAM state and try to fit as many layers as possible in it.
20
+ * Takes into account the VRAM required to create a context with a `contextSize` set to `"auto"`.
21
+ * - **`"max"`** - store all layers in VRAM. If there's not enough VRAM, an error will be thrown. Use with caution.
22
+ * - **`number`** - store the specified number of layers in VRAM. If there's not enough VRAM, an error will be thrown. Use with caution.
23
+ * - **`{min?: number, max?: number, fitContext?: {contextSize: number}}`** - adapt to the current VRAM state and try to fit as
24
+ * many layers as possible in it, but at least `min` and at most `max` layers. Set `fitContext` to the parameters of a context you
25
+ * intend to create with the model, so it'll take it into account in the calculations and leave enough memory for such a context.
26
+ *
27
+ * If GPU support is disabled, will be set to `0` automatically.
28
+ *
29
+ * Defaults to `"auto"`.
30
+ */
31
+ gpuLayers?: "auto" | "max" | number | {
32
+ min?: number;
33
+ max?: number;
34
+ fitContext?: {
35
+ contextSize?: number;
36
+ /**
37
+ * Defaults to `false`.
38
+ */
39
+ embeddingContext?: boolean;
40
+ };
41
+ };
42
+ /**
43
+ * Only load the vocabulary, not weight tensors.
44
+ *
45
+ * Useful when you only want to use the model to use its tokenizer but not for evaluation.
46
+ *
47
+ * Defaults to `false`.
48
+ */
49
+ vocabOnly?: boolean;
50
+ /**
51
+ * Use mmap if possible.
52
+ *
53
+ * Defaults to `true`.
54
+ */
55
+ useMmap?: boolean;
56
+ /**
57
+ * Force the system to keep the model in the RAM/VRAM.
58
+ * Use with caution as this can crash your system if the available resources are insufficient.
59
+ */
60
+ useMlock?: boolean;
61
+ /**
62
+ * Check for tensor validity before actually loading the model.
63
+ * Using it increases the time it takes to load the model.
64
+ *
65
+ * Defaults to `false`.
66
+ */
67
+ checkTensors?: boolean;
68
+ /**
69
+ * Enable flash attention by default for contexts created with this model.
70
+ * Only works with models that support flash attention.
71
+ *
72
+ * Flash attention is an optimization in the attention mechanism that makes inference faster, more efficient and uses less memory.
73
+ *
74
+ * The support for flash attention is currently experimental and may not always work as expected.
75
+ * Use with caution.
76
+ *
77
+ * This option will be ignored if flash attention is not supported by the model.
78
+ *
79
+ * Enabling this affects the calculations of default values for the model and contexts created with it
80
+ * as flash attention reduces the amount of memory required,
81
+ * which allows for more layers to be offloaded to the GPU and for context sizes to be bigger.
82
+ *
83
+ * Defaults to `false`.
84
+ *
85
+ * Upon flash attention exiting the experimental status, the default value will become `true`.
86
+ */
87
+ defaultContextFlashAttention?: boolean;
88
+ /**
89
+ * Called with the load percentage when the model is being loaded.
90
+ * @param loadProgress - a number between 0 (exclusive) and 1 (inclusive).
91
+ */
92
+ onLoadProgress?(loadProgress: number): void;
93
+ /** An abort signal to abort the model load */
94
+ loadSignal?: AbortSignal;
95
+ /**
96
+ * Ignore insufficient memory errors and continue with the model load.
97
+ * Can cause the process to crash if there's not enough VRAM to fit the model.
98
+ *
99
+ * Defaults to `false`.
100
+ */
101
+ ignoreMemorySafetyChecks?: boolean;
102
+ /**
103
+ * Metadata overrides to load the model with.
104
+ *
105
+ * > **Note:** Most metadata value overrides aren't supported and overriding them will have no effect on `llama.cpp`.
106
+ * > Only use this for metadata values that are explicitly documented to be supported by `llama.cpp` to be overridden,
107
+ * > and only in cases when this is crucial, as this is not guaranteed to always work as expected.
108
+ */
109
+ metadataOverrides?: OverridesObject<GgufMetadata, number | bigint | boolean | string>;
110
+ };
111
+ export declare class LlamaModel {
112
+ readonly tokenizer: Tokenizer;
113
+ readonly onDispose: EventRelay<void>;
114
+ private constructor();
115
+ dispose(): Promise<void>;
116
+ /** @hidden */
117
+ [Symbol.asyncDispose](): Promise<void>;
118
+ get disposed(): boolean;
119
+ get tokens(): LlamaModelTokens;
120
+ get filename(): string | undefined;
121
+ get fileInfo(): GgufFileInfo;
122
+ get fileInsights(): GgufInsights;
123
+ /**
124
+ * Number of layers offloaded to the GPU.
125
+ * If GPU support is disabled, this will always be `0`.
126
+ */
127
+ get gpuLayers(): number;
128
+ /**
129
+ * Total model size in memory in bytes
130
+ */
131
+ get size(): number;
132
+ get flashAttentionSupported(): boolean;
133
+ get defaultContextFlashAttention(): boolean;
134
+ /**
135
+ * Transform text into tokens that can be fed to the model
136
+ * @param text - the text to tokenize
137
+ * @param [specialTokens] - if set to true, text that correspond to special tokens will be tokenized to those tokens.
138
+ * For example, `<s>` will be tokenized to the BOS token if `specialTokens` is set to `true`,
139
+ * otherwise it will be tokenized to tokens that corresponds to the plaintext `<s>` string.
140
+ * @param [options] - additional options for tokenization.
141
+ * If set to `"trimLeadingSpace"`, a leading space will be trimmed from the tokenized output if the output has an
142
+ * additional space at the beginning.
143
+ */
144
+ tokenize(text: string, specialTokens?: boolean, options?: "trimLeadingSpace"): Token[];
145
+ tokenize(text: BuiltinSpecialTokenValue, specialTokens: "builtin"): Token[];
146
+ /**
147
+ * Transform tokens into text
148
+ * @param tokens - the tokens to detokenize.
149
+ * @param [specialTokens] - if set to `true`, special tokens will be detokenized to their corresponding token text representation.
150
+ *
151
+ * Recommended for debugging purposes only.
152
+ *
153
+ * > **Note:** there may be additional spaces around special tokens that were not present in the original text - this is not a bug,
154
+ * this is [how the tokenizer is supposed to work](https://github.com/ggerganov/llama.cpp/pull/7697#issuecomment-2144003246).
155
+ *
156
+ * Defaults to `false`.
157
+ * @param [lastTokens] - the last few tokens that preceded the tokens to detokenize.
158
+ * If provided, the last few tokens will be used to determine whether a space has to be added before the current tokens or not,
159
+ * and apply other detokenizer-specific heuristics to provide the correct text continuation to the existing tokens.
160
+ *
161
+ * Using it may have no effect with some models, but it is still recommended.
162
+ */
163
+ detokenize(tokens: readonly Token[], specialTokens?: boolean, lastTokens?: readonly Token[]): string;
164
+ getTokenAttributes(token: Token): TokenAttributes;
165
+ /** Check whether the given token is a special token (a control-type token or a token with no normal text representation) */
166
+ isSpecialToken(token: Token | undefined): boolean;
167
+ iterateAllTokens(): Generator<Token, void, unknown>;
168
+ /** Check whether the given token is an EOG (End Of Generation) token, like EOS or EOT. */
169
+ isEogToken(token: Token | undefined): boolean;
170
+ createContext(options?: LlamaContextOptions): Promise<LlamaContext>;
171
+ createEmbeddingContext(options?: LlamaEmbeddingContextOptions): Promise<LlamaEmbeddingContext>;
172
+ /**
173
+ * Get warnings about the model file that would affect its usage.
174
+ *
175
+ * These warnings include all the warnings generated by `GgufInsights`, but are more comprehensive.
176
+ */
177
+ getWarnings(): string[];
178
+ /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
179
+ get typeDescription(): ModelTypeDescription;
180
+ /** The context size the model was trained on */
181
+ get trainContextSize(): number;
182
+ /** The size of an embedding vector the model can produce */
183
+ get embeddingVectorSize(): number;
184
+ get vocabularyType(): LlamaVocabularyType;
185
+ }
186
+ export declare class LlamaModelTokens {
187
+ private constructor();
188
+ /**
189
+ * @returns infill tokens
190
+ */
191
+ get infill(): LlamaModelInfillTokens;
192
+ /**
193
+ * @returns The BOS (Beginning Of Sequence) token.
194
+ */
195
+ get bos(): Token | null;
196
+ /**
197
+ * @returns The EOS (End Of Sequence) token.
198
+ */
199
+ get eos(): Token | null;
200
+ /**
201
+ * @returns The EOT (End Of Turn) token.
202
+ */
203
+ get eot(): Token | null;
204
+ /**
205
+ * @returns The NL (New Line) token.
206
+ */
207
+ get nl(): Token | null;
208
+ /**
209
+ * @returns The BOS (Beginning Of Sequence) token text representation.
210
+ */
211
+ get bosString(): string | null;
212
+ /**
213
+ * @returns The EOS (End Of Sequence) token text representation.
214
+ */
215
+ get eosString(): string | null;
216
+ /**
217
+ * @returns The EOT (End Of Turn) token text representation.
218
+ */
219
+ get eotString(): string | null;
220
+ /**
221
+ * @returns The NL (New Line) token text representation.
222
+ */
223
+ get nlString(): string | null;
224
+ /**
225
+ * @returns Whether we should prepend a BOS (Beginning Of Sequence) token for evaluations with this model.
226
+ */
227
+ get shouldPrependBosToken(): boolean;
228
+ }
229
+ export declare class LlamaModelInfillTokens {
230
+ private constructor();
231
+ /**
232
+ * @returns The beginning of infill prefix token.
233
+ */
234
+ get prefix(): Token | null;
235
+ /**
236
+ * @returns The beginning of infill middle token.
237
+ */
238
+ get middle(): Token | null;
239
+ /**
240
+ * @returns The beginning of infill suffix token.
241
+ */
242
+ get suffix(): Token | null;
243
+ /**
244
+ * @returns The beginning of infill prefix token as a string.
245
+ */
246
+ get prefixString(): string | null;
247
+ /**
248
+ * @returns The beginning of infill middle token as a string.
249
+ */
250
+ get middleString(): string | null;
251
+ /**
252
+ * @returns The beginning of infill suffix token as a string.
253
+ */
254
+ get suffixString(): string | null;
255
+ }