node-llama-cpp 3.0.0-beta.43 → 3.0.0-beta.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +33 -21
  2. package/bins/_linux-arm64.moved.txt +1 -0
  3. package/bins/_linux-armv7l.moved.txt +1 -0
  4. package/bins/_linux-x64-vulkan.moved.txt +1 -0
  5. package/bins/_linux-x64.moved.txt +1 -0
  6. package/bins/_mac-arm64-metal.moved.txt +1 -0
  7. package/bins/_mac-x64.moved.txt +1 -0
  8. package/bins/_win-arm64.moved.txt +1 -0
  9. package/bins/_win-x64-vulkan.moved.txt +1 -0
  10. package/bins/_win-x64.moved.txt +1 -0
  11. package/dist/ChatWrapper.d.ts +11 -1
  12. package/dist/ChatWrapper.js +1 -1
  13. package/dist/ChatWrapper.js.map +1 -1
  14. package/dist/bindings/AddonTypes.d.ts +30 -19
  15. package/dist/bindings/Llama.d.ts +9 -0
  16. package/dist/bindings/Llama.js +33 -6
  17. package/dist/bindings/Llama.js.map +1 -1
  18. package/dist/bindings/consts.d.ts +1 -1
  19. package/dist/bindings/consts.js +2 -0
  20. package/dist/bindings/consts.js.map +1 -1
  21. package/dist/bindings/getLlama.d.ts +33 -5
  22. package/dist/bindings/getLlama.js +14 -3
  23. package/dist/bindings/getLlama.js.map +1 -1
  24. package/dist/bindings/types.d.ts +2 -2
  25. package/dist/bindings/types.js +2 -0
  26. package/dist/bindings/types.js.map +1 -1
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  28. package/dist/bindings/utils/compileLLamaCpp.d.ts +0 -1
  29. package/dist/bindings/utils/compileLLamaCpp.js +45 -7
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  31. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +0 -1
  32. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -2
  33. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  34. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +0 -1
  35. package/dist/bindings/utils/testCmakeBinary.d.ts +0 -1
  36. package/dist/chatWrappers/AlpacaChatWrapper.js +4 -3
  37. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  38. package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
  39. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  40. package/dist/chatWrappers/FalconChatWrapper.js +5 -4
  41. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +2 -2
  43. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -12
  44. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  45. package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
  46. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  47. package/dist/chatWrappers/GeneralChatWrapper.js +5 -4
  48. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  49. package/dist/chatWrappers/Llama2ChatWrapper.js +5 -6
  50. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
  51. package/dist/chatWrappers/Llama3ChatWrapper.js +1 -1
  52. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
  53. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +13 -9
  54. package/dist/chatWrappers/Llama3_1ChatWrapper.js +92 -38
  55. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
  56. package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
  57. package/dist/chatWrappers/MistralChatWrapper.js +169 -0
  58. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +25 -1
  60. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +50 -12
  61. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  62. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +22 -16
  63. package/dist/chatWrappers/generic/TemplateChatWrapper.js +28 -24
  64. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
  65. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +1 -1
  66. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  67. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  68. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  69. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +37 -26
  70. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  71. package/dist/chatWrappers/utils/jsonDumps.d.ts +1 -1
  72. package/dist/chatWrappers/utils/jsonDumps.js +2 -2
  73. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -1
  74. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +30 -6
  75. package/dist/chatWrappers/utils/resolveChatWrapper.js +71 -25
  76. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  77. package/dist/cli/cli.js +2 -6
  78. package/dist/cli/cli.js.map +1 -1
  79. package/dist/cli/commands/ChatCommand.d.ts +2 -1
  80. package/dist/cli/commands/ChatCommand.js +83 -53
  81. package/dist/cli/commands/ChatCommand.js.map +1 -1
  82. package/dist/cli/commands/CompleteCommand.d.ts +2 -1
  83. package/dist/cli/commands/CompleteCommand.js +58 -30
  84. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  85. package/dist/cli/commands/DebugCommand.js +1 -1
  86. package/dist/cli/commands/DebugCommand.js.map +1 -1
  87. package/dist/cli/commands/InfillCommand.d.ts +2 -1
  88. package/dist/cli/commands/InfillCommand.js +58 -30
  89. package/dist/cli/commands/InfillCommand.js.map +1 -1
  90. package/dist/cli/commands/InitCommand.js +1 -1
  91. package/dist/cli/commands/PullCommand.d.ts +2 -1
  92. package/dist/cli/commands/PullCommand.js +85 -44
  93. package/dist/cli/commands/PullCommand.js.map +1 -1
  94. package/dist/cli/commands/inspect/InspectCommand.js +5 -3
  95. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -1
  96. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
  97. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
  98. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  99. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +17 -4
  100. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
  101. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +31 -9
  102. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
  103. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +7 -4
  104. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  105. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  106. package/dist/cli/commands/source/SourceCommand.js +19 -0
  107. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  108. package/dist/cli/commands/{BuildCommand.d.ts → source/commands/BuildCommand.d.ts} +1 -2
  109. package/dist/cli/commands/{BuildCommand.js → source/commands/BuildCommand.js} +21 -19
  110. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  111. package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +6 -6
  112. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  113. package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +1 -2
  114. package/dist/cli/commands/{DownloadCommand.js → source/commands/DownloadCommand.js} +26 -22
  115. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  116. package/dist/cli/recommendedModels.js +192 -23
  117. package/dist/cli/recommendedModels.js.map +1 -1
  118. package/dist/cli/utils/ConsoleInteraction.d.ts +0 -1
  119. package/dist/cli/utils/ConsoleTable.js.map +1 -1
  120. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -1
  121. package/dist/cli/utils/interactivelyAskForModel.js +6 -17
  122. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  123. package/dist/cli/utils/printCommonInfoLines.js +3 -0
  124. package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
  125. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  126. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  127. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  128. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
  129. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -1
  130. package/dist/commands.d.ts +3 -3
  131. package/dist/commands.js +3 -3
  132. package/dist/commands.js.map +1 -1
  133. package/dist/config.d.ts +7 -3
  134. package/dist/config.js +10 -6
  135. package/dist/config.js.map +1 -1
  136. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +17 -2
  137. package/dist/evaluator/LlamaChat/LlamaChat.js +24 -12
  138. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  139. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +3 -1
  140. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
  141. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +21 -13
  142. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +15 -14
  143. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  144. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
  145. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  146. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  147. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  148. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  149. package/dist/evaluator/LlamaCompletion.d.ts +18 -4
  150. package/dist/evaluator/LlamaCompletion.js +51 -22
  151. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  152. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +21 -0
  153. package/dist/evaluator/LlamaContext/LlamaContext.js +256 -133
  154. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  155. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  156. package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
  157. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  158. package/dist/evaluator/LlamaContext/types.d.ts +71 -9
  159. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -1
  160. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +1 -1
  161. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -1
  162. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  163. package/dist/evaluator/LlamaEmbedding.js +53 -0
  164. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  165. package/dist/evaluator/LlamaEmbeddingContext.d.ts +1 -5
  166. package/dist/evaluator/LlamaEmbeddingContext.js +6 -8
  167. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  168. package/dist/evaluator/LlamaGrammar.d.ts +9 -10
  169. package/dist/evaluator/LlamaGrammar.js +10 -5
  170. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  171. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
  172. package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
  173. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
  174. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
  175. package/dist/evaluator/LlamaJsonSchemaGrammar.js +3 -0
  176. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  177. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +28 -15
  178. package/dist/evaluator/LlamaModel/LlamaModel.js +66 -51
  179. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  180. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +10 -10
  181. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +10 -10
  182. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -1
  183. package/dist/evaluator/TokenBias.d.ts +20 -8
  184. package/dist/evaluator/TokenBias.js +44 -12
  185. package/dist/evaluator/TokenBias.js.map +1 -1
  186. package/dist/evaluator/TokenMeter.d.ts +3 -12
  187. package/dist/evaluator/TokenMeter.js +4 -16
  188. package/dist/evaluator/TokenMeter.js.map +1 -1
  189. package/dist/gguf/fileReaders/GgufFileReader.d.ts +0 -1
  190. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -1
  191. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +0 -2
  192. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +5 -3
  193. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +26 -13
  194. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
  195. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +57 -1
  196. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +86 -4
  197. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
  198. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -1
  199. package/dist/gguf/readGgufFileInfo.d.ts +18 -6
  200. package/dist/gguf/readGgufFileInfo.js +8 -3
  201. package/dist/gguf/readGgufFileInfo.js.map +1 -1
  202. package/dist/gguf/types/GgufMetadataTypes.d.ts +18 -2
  203. package/dist/gguf/types/GgufMetadataTypes.js +16 -1
  204. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  205. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +2 -0
  206. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
  207. package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
  208. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +1 -1
  209. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
  210. package/dist/index.d.ts +8 -4
  211. package/dist/index.js +5 -3
  212. package/dist/index.js.map +1 -1
  213. package/dist/tsconfig.tsbuildinfo +1 -0
  214. package/dist/types.d.ts +1 -0
  215. package/dist/types.js.map +1 -1
  216. package/dist/utils/LlamaText.d.ts +3 -0
  217. package/dist/utils/LlamaText.js +7 -4
  218. package/dist/utils/LlamaText.js.map +1 -1
  219. package/dist/utils/LruCache.d.ts +2 -2
  220. package/dist/utils/LruCache.js.map +1 -1
  221. package/dist/utils/OverridesObject.d.ts +7 -0
  222. package/dist/utils/OverridesObject.js +2 -0
  223. package/dist/utils/OverridesObject.js.map +1 -0
  224. package/dist/utils/StopGenerationDetector.js.map +1 -1
  225. package/dist/utils/ThreadsSplitter.d.ts +26 -0
  226. package/dist/utils/ThreadsSplitter.js +164 -0
  227. package/dist/utils/ThreadsSplitter.js.map +1 -0
  228. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  229. package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
  230. package/dist/utils/appendUserMessageToChatHistory.js +4 -0
  231. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
  232. package/dist/utils/compareTokens.d.ts +1 -1
  233. package/dist/utils/compareTokens.js.map +1 -1
  234. package/dist/utils/createModelDownloader.d.ts +94 -6
  235. package/dist/utils/createModelDownloader.js +174 -46
  236. package/dist/utils/createModelDownloader.js.map +1 -1
  237. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  238. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +1 -1
  239. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  240. package/dist/utils/getGrammarsFolder.js +1 -1
  241. package/dist/utils/getGrammarsFolder.js.map +1 -1
  242. package/dist/utils/gitReleaseBundles.js.map +1 -1
  243. package/dist/utils/modelFileAccesTokens.d.ts +4 -0
  244. package/dist/utils/modelFileAccesTokens.js +40 -0
  245. package/dist/utils/modelFileAccesTokens.js.map +1 -0
  246. package/dist/utils/parseModelFileName.js.map +1 -1
  247. package/dist/utils/parseTextTemplate.js.map +1 -1
  248. package/dist/utils/resolveGithubRelease.d.ts +1 -1
  249. package/dist/utils/resolveLastTokens.js.map +1 -1
  250. package/dist/utils/spawnCommand.d.ts +0 -1
  251. package/dist/utils/truncateTextAndRoundToWords.js +3 -1
  252. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
  253. package/dist/utils/withOra.js +1 -1
  254. package/dist/utils/withOra.js.map +1 -1
  255. package/dist/utils/withProgressLog.d.ts +0 -1
  256. package/dist/utils/wrapAbortSignal.d.ts +0 -1
  257. package/llama/CMakeLists.txt +20 -12
  258. package/llama/addon/AddonContext.cpp +69 -202
  259. package/llama/addon/AddonContext.h +4 -5
  260. package/llama/addon/AddonGrammar.cpp +8 -11
  261. package/llama/addon/AddonGrammar.h +4 -3
  262. package/llama/addon/AddonGrammarEvaluationState.cpp +9 -10
  263. package/llama/addon/AddonGrammarEvaluationState.h +3 -1
  264. package/llama/addon/AddonModel.cpp +6 -5
  265. package/llama/addon/AddonSampler.cpp +513 -0
  266. package/llama/addon/AddonSampler.h +65 -0
  267. package/llama/addon/RingBuffer.h +109 -0
  268. package/llama/addon/addon.cpp +7 -0
  269. package/llama/addon/globals/addonLog.cpp +2 -1
  270. package/llama/binariesGithubRelease.json +1 -1
  271. package/llama/gitRelease.bundle +0 -0
  272. package/llama/grammars/README.md +1 -1
  273. package/llama/llama.cpp.info.json +1 -1
  274. package/package.json +71 -46
  275. package/templates/packed/electron-typescript-react.json +1 -1
  276. package/templates/packed/node-typescript.json +1 -1
  277. package/bins/linux-arm64/_nlcBuildMetadata.json +0 -1
  278. package/bins/linux-arm64/libggml.so +0 -0
  279. package/bins/linux-arm64/libllama.so +0 -0
  280. package/bins/linux-arm64/llama-addon.node +0 -0
  281. package/bins/linux-armv7l/_nlcBuildMetadata.json +0 -1
  282. package/bins/linux-armv7l/libggml.so +0 -0
  283. package/bins/linux-armv7l/libllama.so +0 -0
  284. package/bins/linux-armv7l/llama-addon.node +0 -0
  285. package/bins/linux-x64/_nlcBuildMetadata.json +0 -1
  286. package/bins/linux-x64/libggml.so +0 -0
  287. package/bins/linux-x64/libllama.so +0 -0
  288. package/bins/linux-x64/llama-addon.node +0 -0
  289. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +0 -1
  290. package/bins/linux-x64-vulkan/libggml.so +0 -0
  291. package/bins/linux-x64-vulkan/libllama.so +0 -0
  292. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  293. package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
  294. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +0 -1
  295. package/bins/mac-arm64-metal/ggml-common.h +0 -1833
  296. package/bins/mac-arm64-metal/ggml-metal.metal +0 -6168
  297. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  298. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  299. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  300. package/bins/mac-x64/_nlcBuildMetadata.json +0 -1
  301. package/bins/mac-x64/libggml.dylib +0 -0
  302. package/bins/mac-x64/libllama.dylib +0 -0
  303. package/bins/mac-x64/llama-addon.node +0 -0
  304. package/bins/win-arm64/_nlcBuildMetadata.json +0 -1
  305. package/bins/win-arm64/ggml.dll +0 -0
  306. package/bins/win-arm64/llama-addon.exp +0 -0
  307. package/bins/win-arm64/llama-addon.lib +0 -0
  308. package/bins/win-arm64/llama-addon.node +0 -0
  309. package/bins/win-arm64/llama.dll +0 -0
  310. package/bins/win-x64/_nlcBuildMetadata.json +0 -1
  311. package/bins/win-x64/ggml.dll +0 -0
  312. package/bins/win-x64/llama-addon.exp +0 -0
  313. package/bins/win-x64/llama-addon.lib +0 -0
  314. package/bins/win-x64/llama-addon.node +0 -0
  315. package/bins/win-x64/llama.dll +0 -0
  316. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +0 -1
  317. package/bins/win-x64-vulkan/ggml.dll +0 -0
  318. package/bins/win-x64-vulkan/llama-addon.exp +0 -0
  319. package/bins/win-x64-vulkan/llama-addon.lib +0 -0
  320. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  321. package/bins/win-x64-vulkan/llama.dll +0 -0
  322. package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
  323. package/dist/cli/commands/BuildCommand.js.map +0 -1
  324. package/dist/cli/commands/ClearCommand.js.map +0 -1
  325. package/dist/cli/commands/DownloadCommand.js.map +0 -1
  326. package/dist/utils/DeepPartialObject.d.ts +0 -3
  327. package/dist/utils/DeepPartialObject.js +0 -2
  328. package/dist/utils/DeepPartialObject.js.map +0 -1
  329. /package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +0 -0
@@ -29,6 +29,10 @@ include_directories("gpuInfo")
29
29
  include_directories("llama.cpp")
30
30
  include_directories("./llama.cpp/common")
31
31
 
32
+ unset(GPU_INFO_HEADERS)
33
+ unset(GPU_INFO_SOURCES)
34
+ unset(GPU_INFO_EXTRA_LIBS)
35
+
32
36
  if (GGML_CUDA)
33
37
  cmake_minimum_required(VERSION 3.17)
34
38
 
@@ -38,18 +42,18 @@ if (GGML_CUDA)
38
42
 
39
43
  enable_language(CUDA)
40
44
 
41
- set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
42
- set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)
45
+ list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
46
+ list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)
43
47
 
44
48
  add_compile_definitions(GPU_INFO_USE_CUDA)
45
49
 
46
50
  if (GGML_STATIC)
47
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
51
+ list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
48
52
  else()
49
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
53
+ list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
50
54
  endif()
51
55
 
52
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)
56
+ list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)
53
57
 
54
58
  if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
55
59
  # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
@@ -73,12 +77,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
73
77
  message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
74
78
  endif()
75
79
 
76
- set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
77
- set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)
80
+ list(APPEND GPU_INFO_HEADERS gpuInfo/vulkan-gpu-info.h)
81
+ list(APPEND GPU_INFO_SOURCES gpuInfo/vulkan-gpu-info.cpp)
78
82
 
79
83
  add_compile_definitions(GPU_INFO_USE_VULKAN)
80
84
 
81
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan)
85
+ list(APPEND GPU_INFO_EXTRA_LIBS Vulkan::Vulkan)
82
86
  else()
83
87
  message(FATAL_ERROR "Vulkan was not found")
84
88
  endif()
@@ -105,7 +109,7 @@ if (GGML_HIPBLAS)
105
109
  set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
106
110
  target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
107
111
 
108
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} gpu-info-rocm)
112
+ list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
109
113
  else()
110
114
  message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
111
115
  endif()
@@ -117,18 +121,22 @@ if (GGML_METAL)
117
121
  find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
118
122
 
119
123
  message(STATUS "Using Metal for GPU info")
120
- set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/metal-gpu-info.h)
121
- set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/metal-gpu-info.mm)
124
+ list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
125
+ list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)
122
126
 
123
127
  add_compile_definitions(GPU_INFO_USE_METAL)
124
128
 
125
- set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS}
129
+ list(APPEND GPU_INFO_EXTRA_LIBS
126
130
  ${FOUNDATION_LIBRARY}
127
131
  ${METAL_FRAMEWORK}
128
132
  ${METALKIT_FRAMEWORK}
129
133
  )
130
134
  endif()
131
135
 
136
+ list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
137
+ list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
138
+ list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
139
+
132
140
  file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
133
141
 
134
142
  if(APPLE)
@@ -1,6 +1,7 @@
1
1
  #include <thread>
2
2
  #include <algorithm>
3
- #include "common.h"
3
+ #include "common/common.h"
4
+ #include "llama-grammar.h"
4
5
  #include "llama.h"
5
6
 
6
7
  #include "addonGlobals.h"
@@ -188,21 +189,10 @@ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
188
189
  class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
189
190
  public:
190
191
  AddonContext* ctx;
191
- AddonGrammarEvaluationState* grammar_evaluation_state;
192
+ AddonSampler* sampler;
192
193
  int32_t batchLogitIndex;
193
- bool use_grammar = false;
194
194
  llama_token result;
195
- float temperature = 0.0f;
196
- float min_p = 0;
197
- int32_t top_k = 40;
198
- float top_p = 0.95f;
199
- float repeat_penalty = 1.10f; // 1.0 = disabled
200
- float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
201
- float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
202
- std::vector<llama_token> repeat_penalty_tokens;
203
- std::unordered_map<llama_token, float> tokenBiases;
204
- bool useTokenBiases = false;
205
- bool use_repeat_penalty = false;
195
+ bool no_output = false;
206
196
 
207
197
  AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
208
198
  : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
@@ -211,77 +201,12 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
211
201
  ctx->Ref();
212
202
 
213
203
  batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
214
-
215
- if (info.Length() > 1 && info[1].IsObject()) {
216
- Napi::Object options = info[1].As<Napi::Object>();
217
-
218
- if (options.Has("temperature")) {
219
- temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
220
- }
221
-
222
- if (options.Has("minP")) {
223
- min_p = options.Get("minP").As<Napi::Number>().FloatValue();
224
- }
225
-
226
- if (options.Has("topK")) {
227
- top_k = options.Get("topK").As<Napi::Number>().Int32Value();
228
- }
229
-
230
- if (options.Has("topP")) {
231
- top_p = options.Get("topP").As<Napi::Number>().FloatValue();
232
- }
233
-
234
- if (options.Has("repeatPenalty")) {
235
- repeat_penalty = options.Get("repeatPenalty").As<Napi::Number>().FloatValue();
236
- }
237
-
238
- if (options.Has("repeatPenaltyTokens")) {
239
- Napi::Uint32Array repeat_penalty_tokens_uint32_array = options.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
240
-
241
- repeat_penalty_tokens.reserve(repeat_penalty_tokens_uint32_array.ElementLength());
242
- for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
243
- repeat_penalty_tokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
244
- }
245
-
246
- use_repeat_penalty = true;
247
- }
248
-
249
- if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
250
- Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
251
- Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
252
-
253
- if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
254
- for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
255
- tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
256
- }
257
-
258
- useTokenBiases = true;
259
- }
260
- }
261
-
262
- if (options.Has("repeatPenaltyPresencePenalty")) {
263
- repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
264
- }
265
-
266
- if (options.Has("repeatPenaltyFrequencyPenalty")) {
267
- repeat_penalty_frequency_penalty = options.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue();
268
- }
269
-
270
- if (options.Has("grammarEvaluationState")) {
271
- grammar_evaluation_state =
272
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(options.Get("grammarEvaluationState").As<Napi::Object>());
273
- grammar_evaluation_state->Ref();
274
- use_grammar = true;
275
- }
276
- }
204
+ sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
205
+ sampler->Ref();
277
206
  }
278
207
  ~AddonContextSampleTokenWorker() {
279
208
  ctx->Unref();
280
-
281
- if (use_grammar) {
282
- grammar_evaluation_state->Unref();
283
- use_grammar = false;
284
- }
209
+ sampler->Unref();
285
210
  }
286
211
 
287
212
  Napi::Promise GetPromise() {
@@ -302,93 +227,46 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
302
227
  }
303
228
 
304
229
  void SampleToken() {
305
- llama_token new_token_id = 0;
306
-
307
- // Select the best prediction.
308
230
  if (llama_get_logits(ctx->ctx) == nullptr) {
309
231
  SetError("This model does not support token generation");
310
232
  return;
311
233
  }
312
234
 
313
- auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
314
- auto n_vocab = llama_n_vocab(ctx->model->model);
235
+ sampler->rebuildChainIfNeeded();
315
236
 
316
- std::vector<llama_token_data> candidates;
317
- candidates.reserve(n_vocab);
237
+ const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
238
+ const int n_vocab = llama_n_vocab(ctx->model->model);
318
239
 
240
+ auto & candidates = sampler->tokenCandidates;
319
241
  for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
320
- auto logit = logits[token_id];
321
-
322
- if (useTokenBiases) {
323
- bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
324
- if (hasTokenBias) {
325
- auto logitBias = tokenBiases.at(token_id);
326
- if (logitBias == -INFINITY || logitBias < -INFINITY) {
327
- if (!llama_token_is_eog(ctx->model->model, token_id)) {
328
- logit = -INFINITY;
329
- }
330
- } else {
331
- logit += logitBias;
332
- }
333
- }
334
- }
335
-
336
- candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
242
+ candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};;
337
243
  }
338
244
 
339
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
340
-
341
- if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
342
- llama_sample_repetition_penalties(
343
- ctx->ctx,
344
- &candidates_p,
345
- repeat_penalty_tokens.data(),
346
- repeat_penalty_tokens.size(),
347
- repeat_penalty,
348
- repeat_penalty_frequency_penalty,
349
- repeat_penalty_presence_penalty
350
- );
351
- }
352
-
353
- if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
354
- llama_grammar_sample((grammar_evaluation_state)->grammar, ctx->ctx, &candidates_p);
355
-
356
- if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
357
- // logit biases caused grammar sampling to fail, so sampling again without logit biases
358
- useTokenBiases = false;
359
- SampleToken();
360
- return;
361
- }
362
- }
245
+ llama_token_data_array cur_p = {
246
+ /* .data = */ candidates.data(),
247
+ /* .size = */ candidates.size(),
248
+ /* .selected = */ -1,
249
+ /* .sorted = */ false,
250
+ };
363
251
 
364
- if (temperature <= 0) {
365
- new_token_id = llama_sample_token_greedy(ctx->ctx, &candidates_p);
366
- } else {
367
- const int32_t resolved_top_k =
368
- top_k <= 0 ? llama_n_vocab(ctx->model->model) : std::min(top_k, llama_n_vocab(ctx->model->model));
369
- const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
370
- const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
371
- const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
372
- const float resolved_top_p = top_p; // Top p sampling - 1.0 = disabled
373
-
374
- // Temperature sampling
375
- size_t min_keep = std::max(1, n_probs);
376
- llama_sample_top_k(ctx->ctx, &candidates_p, resolved_top_k, min_keep);
377
- llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
378
- llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
379
- llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
380
- llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
381
- llama_sample_temp(ctx->ctx, &candidates_p, temperature);
382
- new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
383
- }
252
+ llama_sampler_apply(sampler->chain, &cur_p);
384
253
 
385
- if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
386
- llama_grammar_accept_token((grammar_evaluation_state)->grammar, ctx->ctx, new_token_id);
254
+ if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
255
+ no_output = true;
256
+ return;
387
257
  }
388
258
 
259
+ auto new_token_id = cur_p.data[cur_p.selected].id;
260
+ sampler->acceptToken(new_token_id);
389
261
  result = new_token_id;
390
262
  }
391
263
  void OnOK() {
264
+ if (no_output) {
265
+ Napi::Number resultValue = Napi::Number::New(Env(), -1);
266
+ deferred.Resolve(resultValue);
267
+ return;
268
+ }
269
+
392
270
  Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
393
271
  deferred.Resolve(resultValue);
394
272
  }
@@ -402,20 +280,14 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
402
280
  model->Ref();
403
281
 
404
282
  context_params = llama_context_default_params();
405
- context_params.seed = -1;
406
283
  context_params.n_ctx = 4096;
407
- context_params.n_threads = 6;
284
+ context_params.n_threads = std::max(cpu_get_num_math(), 1);
408
285
  context_params.n_threads_batch = context_params.n_threads;
286
+ context_params.no_perf = true;
409
287
 
410
288
  if (info.Length() > 1 && info[1].IsObject()) {
411
289
  Napi::Object options = info[1].As<Napi::Object>();
412
290
 
413
- if (options.Has("noSeed")) {
414
- context_params.seed = time(NULL);
415
- } else if (options.Has("seed")) {
416
- context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
417
- }
418
-
419
291
  if (options.Has("contextSize")) {
420
292
  context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
421
293
  }
@@ -438,12 +310,16 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
438
310
  }
439
311
 
440
312
  if (options.Has("threads")) {
441
- const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
442
- const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
313
+ const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
314
+ const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
443
315
 
444
316
  context_params.n_threads = resolved_n_threads;
445
317
  context_params.n_threads_batch = resolved_n_threads;
446
318
  }
319
+
320
+ if (options.Has("performanceTracking")) {
321
+ context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
322
+ }
447
323
  }
448
324
  }
449
325
  AddonContext::~AddonContext() {
@@ -641,42 +517,6 @@ Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
641
517
  return worker->GetPromise();
642
518
  }
643
519
 
644
- Napi::Value AddonContext::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
645
- AddonGrammarEvaluationState* grammar_evaluation_state =
646
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
647
- llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
648
-
649
- if ((grammar_evaluation_state)->grammar != nullptr) {
650
- llama_grammar_accept_token((grammar_evaluation_state)->grammar, ctx, tokenId);
651
- }
652
-
653
- return info.Env().Undefined();
654
- }
655
-
656
- Napi::Value AddonContext::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
657
- AddonGrammarEvaluationState* grammar_evaluation_state =
658
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
659
- llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
660
-
661
- if ((grammar_evaluation_state)->grammar != nullptr) {
662
- std::vector<llama_token_data> candidates;
663
- candidates.reserve(1);
664
- candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
665
-
666
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
667
-
668
- llama_grammar_sample((grammar_evaluation_state)->grammar, ctx, &candidates_p);
669
-
670
- if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
671
- return Napi::Boolean::New(info.Env(), false);
672
- }
673
-
674
- return Napi::Boolean::New(info.Env(), true);
675
- }
676
-
677
- return Napi::Boolean::New(info.Env(), false);
678
- }
679
-
680
520
  Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
681
521
  if (disposed) {
682
522
  Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
@@ -718,9 +558,36 @@ Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
718
558
  return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
719
559
  }
720
560
 
561
+ Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
562
+ if (disposed) {
563
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
564
+ return info.Env().Undefined();
565
+ }
566
+
567
+ return Napi::Number::From(info.Env(), llama_n_threads(ctx));
568
+ }
569
+
570
+ Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
571
+ if (disposed) {
572
+ Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
573
+ return info.Env().Undefined();
574
+ }
575
+
576
+ const auto threads = info[0].As<Napi::Number>().Int32Value();
577
+ const auto resolvedThreads = threads == 0
578
+ ? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
579
+ : threads;
580
+
581
+ if (llama_n_threads(ctx) != resolvedThreads) {
582
+ llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
583
+ }
584
+
585
+ return info.Env().Undefined();
586
+ }
587
+
721
588
  Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
722
- llama_print_timings(ctx);
723
- llama_reset_timings(ctx);
589
+ llama_perf_context_print(ctx);
590
+ llama_perf_context_reset(ctx);
724
591
  return info.Env().Undefined();
725
592
  }
726
593
 
@@ -749,10 +616,10 @@ void AddonContext::init(Napi::Object exports) {
749
616
  InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
750
617
  InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
751
618
  InstanceMethod("sampleToken", &AddonContext::SampleToken),
752
- InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
753
- InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
754
619
  InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
755
620
  InstanceMethod("getStateSize", &AddonContext::GetStateSize),
621
+ InstanceMethod("getThreads", &AddonContext::GetThreads),
622
+ InstanceMethod("setThreads", &AddonContext::SetThreads),
756
623
  InstanceMethod("printTimings", &AddonContext::PrintTimings),
757
624
  InstanceMethod("setLora", &AddonContext::SetLora),
758
625
  InstanceMethod("dispose", &AddonContext::Dispose),
@@ -2,6 +2,7 @@
2
2
  #include "llama.h"
3
3
  #include "napi.h"
4
4
  #include "addonGlobals.h"
5
+ #include "AddonSampler.h"
5
6
 
6
7
  class AddonContext : public Napi::ObjectWrap<AddonContext> {
7
8
  public:
@@ -38,16 +39,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
38
39
  Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
39
40
  Napi::Value SampleToken(const Napi::CallbackInfo& info);
40
41
 
41
- Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
42
-
43
- Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
44
-
45
42
  Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
46
43
  Napi::Value GetStateSize(const Napi::CallbackInfo& info);
44
+ Napi::Value GetThreads(const Napi::CallbackInfo& info);
45
+ Napi::Value SetThreads(const Napi::CallbackInfo& info);
47
46
 
48
47
  Napi::Value PrintTimings(const Napi::CallbackInfo& info);
49
48
 
50
49
  Napi::Value SetLora(const Napi::CallbackInfo& info);
51
50
 
52
51
  static void init(Napi::Object exports);
53
- };
52
+ };
@@ -2,9 +2,7 @@
2
2
  #include "AddonGrammar.h"
3
3
 
4
4
  AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
5
- // Get the model path
6
- std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
7
- bool should_print_grammar = false;
5
+ grammarCode = info[0].As<Napi::String>().Utf8Value();
8
6
 
9
7
  if (info.Length() > 1 && info[1].IsObject()) {
10
8
  Napi::Object options = info[1].As<Napi::Object>();
@@ -14,21 +12,20 @@ AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
14
12
  hasAddonExportsRef = true;
15
13
  }
16
14
 
17
- if (options.Has("debugPrintGrammar")) {
18
- should_print_grammar = options.Get("debugPrintGrammar").As<Napi::Boolean>().Value();
15
+ if (options.Has("rootRuleName")) {
16
+ rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
19
17
  }
20
18
  }
21
19
 
22
- parsed_grammar = grammar_parser::parse(grammarCode.c_str());
23
- // will be empty (default) if there are parse errors
24
- if (parsed_grammar.rules.empty()) {
20
+ auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str());
21
+
22
+ // will be empty if there are parse errors
23
+ if (parsed_grammar == nullptr) {
25
24
  Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
26
25
  return;
27
26
  }
28
27
 
29
- if (should_print_grammar) {
30
- grammar_parser::print_grammar(stderr, parsed_grammar);
31
- }
28
+ llama_grammar_free_impl(parsed_grammar);
32
29
  }
33
30
  AddonGrammar::~AddonGrammar() {
34
31
  if (hasAddonExportsRef) {
@@ -1,13 +1,14 @@
1
1
  #pragma once
2
2
  #include "llama.h"
3
- #include "common.h"
4
- #include "common/grammar-parser.h"
3
+ #include "common/common.h"
4
+ #include "llama-grammar.h"
5
5
  #include "napi.h"
6
6
  #include "addonGlobals.h"
7
7
 
8
8
  class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
9
9
  public:
10
- grammar_parser::parse_state parsed_grammar;
10
+ std::string grammarCode = "";
11
+ std::string rootRuleName = "root";
11
12
  Napi::Reference<Napi::Object> addonExportsRef;
12
13
  bool hasAddonExportsRef = false;
13
14
 
@@ -1,26 +1,25 @@
1
1
  #include <sstream>
2
2
  #include "addonGlobals.h"
3
- #include "common.h"
3
+ #include "common/common.h"
4
4
  #include "llama.h"
5
5
  #include "AddonGrammarEvaluationState.h"
6
6
  #include "AddonGrammar.h"
7
7
 
8
8
  AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
9
- grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[0].As<Napi::Object>());
9
+ model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
10
+ model->Ref();
11
+
12
+ grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
10
13
  grammarDef->Ref();
11
14
 
12
- std::vector<const llama_grammar_element*> grammar_rules(grammarDef->parsed_grammar.c_rules());
13
- grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), grammarDef->parsed_grammar.symbol_ids.at("root"));
15
+ sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
14
16
  }
15
17
  AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
18
+ llama_sampler_free(sampler);
16
19
  grammarDef->Unref();
17
-
18
- if (grammar != nullptr) {
19
- llama_grammar_free(grammar);
20
- grammar = nullptr;
21
- }
20
+ model->Unref();
22
21
  }
23
22
 
24
23
  void AddonGrammarEvaluationState::init(Napi::Object exports) {
25
24
  exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
26
- }
25
+ }
@@ -2,11 +2,13 @@
2
2
  #include "llama.h"
3
3
  #include "napi.h"
4
4
  #include "addonGlobals.h"
5
+ #include "AddonModel.h"
5
6
 
6
7
  class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
7
8
  public:
9
+ AddonModel* model;
8
10
  AddonGrammar* grammarDef;
9
- llama_grammar* grammar = nullptr;
11
+ llama_sampler * sampler = nullptr;
10
12
 
11
13
  AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
12
14
  ~AddonGrammarEvaluationState();
@@ -1,7 +1,8 @@
1
1
  #include <sstream>
2
2
  #include "addonGlobals.h"
3
3
  #include "globals/addonLog.h"
4
- #include "common.h"
4
+ #include "globals/addonProgress.h"
5
+ #include "common/common.h"
5
6
  #include "llama.h"
6
7
  #include "AddonModel.h"
7
8
  #include "AddonModelData.h"
@@ -538,7 +539,7 @@ Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
538
539
  return info.Env().Undefined();
539
540
  }
540
541
 
541
- return getNapiControlToken(info, model, llama_token_prefix(model));
542
+ return getNapiToken(info, model, llama_token_prefix(model));
542
543
  }
543
544
  Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
544
545
  if (disposed) {
@@ -546,7 +547,7 @@ Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
546
547
  return info.Env().Undefined();
547
548
  }
548
549
 
549
- return getNapiControlToken(info, model, llama_token_middle(model));
550
+ return getNapiToken(info, model, llama_token_middle(model));
550
551
  }
551
552
  Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
552
553
  if (disposed) {
@@ -554,7 +555,7 @@ Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
554
555
  return info.Env().Undefined();
555
556
  }
556
557
 
557
- return getNapiControlToken(info, model, llama_token_suffix(model));
558
+ return getNapiToken(info, model, llama_token_suffix(model));
558
559
  }
559
560
  Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
560
561
  if (disposed) {
@@ -562,7 +563,7 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
562
563
  return info.Env().Undefined();
563
564
  }
564
565
 
565
- return getNapiControlToken(info, model, llama_token_eot(model));
566
+ return getNapiToken(info, model, llama_token_eot(model));
566
567
  }
567
568
  Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
568
569
  if (disposed) {