@realtimex/node-llama-cpp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (876) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +108 -0
  3. package/dist/ChatWrapper.d.ts +27 -0
  4. package/dist/ChatWrapper.js +233 -0
  5. package/dist/ChatWrapper.js.map +1 -0
  6. package/dist/apiDocsIndex.d.ts +1 -0
  7. package/dist/apiDocsIndex.js +7 -0
  8. package/dist/apiDocsIndex.js.map +1 -0
  9. package/dist/bindings/AddonTypes.d.ts +203 -0
  10. package/dist/bindings/AddonTypes.js +2 -0
  11. package/dist/bindings/AddonTypes.js.map +1 -0
  12. package/dist/bindings/Llama.d.ts +104 -0
  13. package/dist/bindings/Llama.js +570 -0
  14. package/dist/bindings/Llama.js.map +1 -0
  15. package/dist/bindings/consts.d.ts +2 -0
  16. package/dist/bindings/consts.js +13 -0
  17. package/dist/bindings/consts.js.map +1 -0
  18. package/dist/bindings/getLlama.d.ts +297 -0
  19. package/dist/bindings/getLlama.js +569 -0
  20. package/dist/bindings/getLlama.js.map +1 -0
  21. package/dist/bindings/types.d.ts +72 -0
  22. package/dist/bindings/types.js +105 -0
  23. package/dist/bindings/types.js.map +1 -0
  24. package/dist/bindings/utils/MemoryOrchestrator.d.ts +23 -0
  25. package/dist/bindings/utils/MemoryOrchestrator.js +50 -0
  26. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  27. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  28. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  29. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  30. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  31. package/dist/bindings/utils/asyncEvery.js +15 -0
  32. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  33. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  34. package/dist/bindings/utils/asyncSome.js +29 -0
  35. package/dist/bindings/utils/asyncSome.js.map +1 -0
  36. package/dist/bindings/utils/binariesGithubRelease.d.ts +6 -0
  37. package/dist/bindings/utils/binariesGithubRelease.js +15 -0
  38. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  39. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  40. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  41. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  42. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  43. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  44. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  45. package/dist/bindings/utils/compileLLamaCpp.d.ts +22 -0
  46. package/dist/bindings/utils/compileLLamaCpp.js +526 -0
  47. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  48. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +18 -0
  49. package/dist/bindings/utils/detectAvailableComputeLayers.js +311 -0
  50. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  51. package/dist/bindings/utils/detectBuildTools.d.ts +14 -0
  52. package/dist/bindings/utils/detectBuildTools.js +149 -0
  53. package/dist/bindings/utils/detectBuildTools.js.map +1 -0
  54. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  55. package/dist/bindings/utils/detectGlibc.js +74 -0
  56. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  57. package/dist/bindings/utils/disposeBeforeExit.d.ts +8 -0
  58. package/dist/bindings/utils/disposeBeforeExit.js +36 -0
  59. package/dist/bindings/utils/disposeBeforeExit.js.map +1 -0
  60. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  61. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  62. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  63. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +6 -0
  64. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +105 -0
  65. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  66. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  67. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  68. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  69. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  70. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  71. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  72. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
  73. package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
  74. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  75. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  76. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  77. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  78. package/dist/bindings/utils/getLlamaGpuTypes.d.ts +13 -0
  79. package/dist/bindings/utils/getLlamaGpuTypes.js +36 -0
  80. package/dist/bindings/utils/getLlamaGpuTypes.js.map +1 -0
  81. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  82. package/dist/bindings/utils/getLlamaWithoutBackend.js +40 -0
  83. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  84. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  85. package/dist/bindings/utils/getPlatform.js +15 -0
  86. package/dist/bindings/utils/getPlatform.js.map +1 -0
  87. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  88. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  89. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  90. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  91. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  92. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  93. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  94. package/dist/bindings/utils/hasFileInPath.js +34 -0
  95. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  96. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  97. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  98. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  99. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  100. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  101. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  102. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  103. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  104. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  105. package/dist/bindings/utils/resolveActualBindingBinaryPath.d.ts +1 -0
  106. package/dist/bindings/utils/resolveActualBindingBinaryPath.js +18 -0
  107. package/dist/bindings/utils/resolveActualBindingBinaryPath.js.map +1 -0
  108. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  109. package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
  110. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  111. package/dist/bindings/utils/testBindingBinary.d.ts +2 -0
  112. package/dist/bindings/utils/testBindingBinary.js +269 -0
  113. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  114. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  115. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  116. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  117. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +17 -0
  118. package/dist/chatWrappers/AlpacaChatWrapper.js +33 -0
  119. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  120. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +6 -0
  121. package/dist/chatWrappers/ChatMLChatWrapper.js +85 -0
  122. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  123. package/dist/chatWrappers/DeepSeekChatWrapper.d.ts +37 -0
  124. package/dist/chatWrappers/DeepSeekChatWrapper.js +294 -0
  125. package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -0
  126. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  127. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  128. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  129. package/dist/chatWrappers/FalconChatWrapper.d.ts +19 -0
  130. package/dist/chatWrappers/FalconChatWrapper.js +126 -0
  131. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  132. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +17 -0
  133. package/dist/chatWrappers/FunctionaryChatWrapper.js +622 -0
  134. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  135. package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
  136. package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
  137. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  138. package/dist/chatWrappers/GeneralChatWrapper.d.ts +19 -0
  139. package/dist/chatWrappers/GeneralChatWrapper.js +140 -0
  140. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  141. package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
  142. package/dist/chatWrappers/HarmonyChatWrapper.js +539 -0
  143. package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
  144. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
  145. package/dist/chatWrappers/Llama2ChatWrapper.js +95 -0
  146. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  147. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
  148. package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
  149. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  150. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +32 -0
  151. package/dist/chatWrappers/Llama3_1ChatWrapper.js +290 -0
  152. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  153. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.d.ts +35 -0
  154. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +264 -0
  155. package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -0
  156. package/dist/chatWrappers/MistralChatWrapper.d.ts +16 -0
  157. package/dist/chatWrappers/MistralChatWrapper.js +180 -0
  158. package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
  159. package/dist/chatWrappers/QwenChatWrapper.d.ts +36 -0
  160. package/dist/chatWrappers/QwenChatWrapper.js +344 -0
  161. package/dist/chatWrappers/QwenChatWrapper.js.map +1 -0
  162. package/dist/chatWrappers/SeedChatWrapper.d.ts +25 -0
  163. package/dist/chatWrappers/SeedChatWrapper.js +183 -0
  164. package/dist/chatWrappers/SeedChatWrapper.js.map +1 -0
  165. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +138 -0
  166. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +665 -0
  167. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  168. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +76 -0
  169. package/dist/chatWrappers/generic/TemplateChatWrapper.js +212 -0
  170. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  171. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.d.ts +7 -0
  172. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js +30 -0
  173. package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js.map +1 -0
  174. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +24 -0
  175. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  176. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  177. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +25 -0
  178. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +690 -0
  179. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -0
  180. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.d.ts +2 -0
  181. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +66 -0
  182. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -0
  183. package/dist/chatWrappers/generic/utils/getFirstValidResult.d.ts +6 -0
  184. package/dist/chatWrappers/generic/utils/getFirstValidResult.js +19 -0
  185. package/dist/chatWrappers/generic/utils/getFirstValidResult.js.map +1 -0
  186. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.d.ts +2 -0
  187. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js +35 -0
  188. package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js.map +1 -0
  189. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.d.ts +22 -0
  190. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js +28 -0
  191. package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js.map +1 -0
  192. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +76 -0
  193. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +177 -0
  194. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  195. package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
  196. package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
  197. package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
  198. package/dist/chatWrappers/utils/getModelLinageNames.d.ts +2 -0
  199. package/dist/chatWrappers/utils/getModelLinageNames.js +18 -0
  200. package/dist/chatWrappers/utils/getModelLinageNames.js.map +1 -0
  201. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  202. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +394 -0
  203. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  204. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.d.ts +2 -0
  205. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js +9 -0
  206. package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js.map +1 -0
  207. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  208. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  209. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  210. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +148 -0
  211. package/dist/chatWrappers/utils/resolveChatWrapper.js +325 -0
  212. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  213. package/dist/cli/cli.d.ts +2 -0
  214. package/dist/cli/cli.js +45 -0
  215. package/dist/cli/cli.js.map +1 -0
  216. package/dist/cli/commands/ChatCommand.d.ts +59 -0
  217. package/dist/cli/commands/ChatCommand.js +856 -0
  218. package/dist/cli/commands/ChatCommand.js.map +1 -0
  219. package/dist/cli/commands/CompleteCommand.d.ts +47 -0
  220. package/dist/cli/commands/CompleteCommand.js +658 -0
  221. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  222. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  223. package/dist/cli/commands/DebugCommand.js +55 -0
  224. package/dist/cli/commands/DebugCommand.js.map +1 -0
  225. package/dist/cli/commands/InfillCommand.d.ts +49 -0
  226. package/dist/cli/commands/InfillCommand.js +693 -0
  227. package/dist/cli/commands/InfillCommand.js.map +1 -0
  228. package/dist/cli/commands/InitCommand.d.ts +12 -0
  229. package/dist/cli/commands/InitCommand.js +230 -0
  230. package/dist/cli/commands/InitCommand.js.map +1 -0
  231. package/dist/cli/commands/OnPostInstallCommand.d.ts +4 -0
  232. package/dist/cli/commands/OnPostInstallCommand.js +62 -0
  233. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -0
  234. package/dist/cli/commands/PullCommand.d.ts +13 -0
  235. package/dist/cli/commands/PullCommand.js +158 -0
  236. package/dist/cli/commands/PullCommand.js.map +1 -0
  237. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  238. package/dist/cli/commands/inspect/InspectCommand.js +21 -0
  239. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  240. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +17 -0
  241. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +275 -0
  242. package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
  243. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +13 -0
  244. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +230 -0
  245. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  246. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  247. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +296 -0
  248. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  249. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +26 -0
  250. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +869 -0
  251. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  252. package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
  253. package/dist/cli/commands/source/SourceCommand.js +19 -0
  254. package/dist/cli/commands/source/SourceCommand.js.map +1 -0
  255. package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
  256. package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
  257. package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
  258. package/dist/cli/commands/source/commands/ClearCommand.d.ts +7 -0
  259. package/dist/cli/commands/source/commands/ClearCommand.js +54 -0
  260. package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
  261. package/dist/cli/commands/source/commands/DownloadCommand.d.ts +16 -0
  262. package/dist/cli/commands/source/commands/DownloadCommand.js +219 -0
  263. package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
  264. package/dist/cli/projectTemplates.d.ts +7 -0
  265. package/dist/cli/projectTemplates.js +10 -0
  266. package/dist/cli/projectTemplates.js.map +1 -0
  267. package/dist/cli/recommendedModels.d.ts +2 -0
  268. package/dist/cli/recommendedModels.js +428 -0
  269. package/dist/cli/recommendedModels.js.map +1 -0
  270. package/dist/cli/startCreateCli.d.ts +2 -0
  271. package/dist/cli/startCreateCli.js +26 -0
  272. package/dist/cli/startCreateCli.js.map +1 -0
  273. package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
  274. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  275. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  276. package/dist/cli/utils/ConsoleTable.d.ts +24 -0
  277. package/dist/cli/utils/ConsoleTable.js +90 -0
  278. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  279. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  280. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  281. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  282. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  283. package/dist/cli/utils/consolePromptQuestion.js +81 -0
  284. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  285. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  286. package/dist/cli/utils/getReadablePath.js +14 -0
  287. package/dist/cli/utils/getReadablePath.js.map +1 -0
  288. package/dist/cli/utils/interactivelyAskForModel.d.ts +13 -0
  289. package/dist/cli/utils/interactivelyAskForModel.js +485 -0
  290. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  291. package/dist/cli/utils/isRunningUnderRosetta.d.ts +1 -0
  292. package/dist/cli/utils/isRunningUnderRosetta.js +20 -0
  293. package/dist/cli/utils/isRunningUnderRosetta.js.map +1 -0
  294. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  295. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  296. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  297. package/dist/cli/utils/packageJsonConfig.d.ts +6 -0
  298. package/dist/cli/utils/packageJsonConfig.js +51 -0
  299. package/dist/cli/utils/packageJsonConfig.js.map +1 -0
  300. package/dist/cli/utils/packageManager.d.ts +1 -0
  301. package/dist/cli/utils/packageManager.js +15 -0
  302. package/dist/cli/utils/packageManager.js.map +1 -0
  303. package/dist/cli/utils/parseXtcArg.d.ts +5 -0
  304. package/dist/cli/utils/parseXtcArg.js +16 -0
  305. package/dist/cli/utils/parseXtcArg.js.map +1 -0
  306. package/dist/cli/utils/printCommonInfoLines.d.ts +12 -0
  307. package/dist/cli/utils/printCommonInfoLines.js +163 -0
  308. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  309. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  310. package/dist/cli/utils/printInfoLine.js +54 -0
  311. package/dist/cli/utils/printInfoLine.js.map +1 -0
  312. package/dist/cli/utils/printModelDestination.d.ts +2 -0
  313. package/dist/cli/utils/printModelDestination.js +11 -0
  314. package/dist/cli/utils/printModelDestination.js.map +1 -0
  315. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  316. package/dist/cli/utils/projectTemplates.js +47 -0
  317. package/dist/cli/utils/projectTemplates.js.map +1 -0
  318. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
  319. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
  320. package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
  321. package/dist/cli/utils/resolveCommandGgufPath.d.ts +19 -0
  322. package/dist/cli/utils/resolveCommandGgufPath.js +123 -0
  323. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  324. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  325. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  326. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  327. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +14 -0
  328. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +12 -0
  329. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  330. package/dist/cli/utils/resolveNpmrcConfig.d.ts +18 -0
  331. package/dist/cli/utils/resolveNpmrcConfig.js +129 -0
  332. package/dist/cli/utils/resolveNpmrcConfig.js.map +1 -0
  333. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  334. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  335. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  336. package/dist/cli/utils/toBytes.d.ts +1 -0
  337. package/dist/cli/utils/toBytes.js +5 -0
  338. package/dist/cli/utils/toBytes.js.map +1 -0
  339. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  340. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  341. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  342. package/dist/commands.d.ts +6 -0
  343. package/dist/commands.js +9 -0
  344. package/dist/commands.js.map +1 -0
  345. package/dist/config.d.ts +79 -0
  346. package/dist/config.js +127 -0
  347. package/dist/config.js.map +1 -0
  348. package/dist/consts.d.ts +4 -0
  349. package/dist/consts.js +11 -0
  350. package/dist/consts.js.map +1 -0
  351. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +500 -0
  352. package/dist/evaluator/LlamaChat/LlamaChat.js +2696 -0
  353. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  354. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
  355. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
  356. package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
  357. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
  358. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
  359. package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
  360. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
  361. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
  362. package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
  363. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  364. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +260 -0
  365. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  366. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +520 -0
  367. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +628 -0
  368. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  369. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +45 -0
  370. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +191 -0
  371. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
  372. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +15 -0
  373. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +16 -0
  374. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  375. package/dist/evaluator/LlamaCompletion.d.ts +219 -0
  376. package/dist/evaluator/LlamaCompletion.js +498 -0
  377. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  378. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +336 -0
  379. package/dist/evaluator/LlamaContext/LlamaContext.js +1919 -0
  380. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  381. package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.d.ts +27 -0
  382. package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js +130 -0
  383. package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js.map +1 -0
  384. package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
  385. package/dist/evaluator/LlamaContext/LlamaSampler.js +39 -0
  386. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
  387. package/dist/evaluator/LlamaContext/TokenPredictor.d.ts +55 -0
  388. package/dist/evaluator/LlamaContext/TokenPredictor.js +20 -0
  389. package/dist/evaluator/LlamaContext/TokenPredictor.js.map +1 -0
  390. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +56 -0
  391. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +266 -0
  392. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -0
  393. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.d.ts +58 -0
  394. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js +138 -0
  395. package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js.map +1 -0
  396. package/dist/evaluator/LlamaContext/types.d.ts +602 -0
  397. package/dist/evaluator/LlamaContext/types.js +2 -0
  398. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  399. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.d.ts +5 -0
  400. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js +16 -0
  401. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  402. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.d.ts +5 -0
  403. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js +42 -0
  404. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  405. package/dist/evaluator/LlamaContext/utils/padSafeContextSize.d.ts +1 -0
  406. package/dist/evaluator/LlamaContext/utils/padSafeContextSize.js +18 -0
  407. package/dist/evaluator/LlamaContext/utils/padSafeContextSize.js.map +1 -0
  408. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  409. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +13 -0
  410. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  411. package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
  412. package/dist/evaluator/LlamaEmbedding.js +53 -0
  413. package/dist/evaluator/LlamaEmbedding.js.map +1 -0
  414. package/dist/evaluator/LlamaEmbeddingContext.d.ts +52 -0
  415. package/dist/evaluator/LlamaEmbeddingContext.js +86 -0
  416. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  417. package/dist/evaluator/LlamaGrammar.d.ts +39 -0
  418. package/dist/evaluator/LlamaGrammar.js +72 -0
  419. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  420. package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +19 -0
  421. package/dist/evaluator/LlamaGrammarEvaluationState.js +29 -0
  422. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  423. package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +17 -0
  424. package/dist/evaluator/LlamaJsonSchemaGrammar.js +35 -0
  425. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  426. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +344 -0
  427. package/dist/evaluator/LlamaModel/LlamaModel.js +853 -0
  428. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
  429. package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
  430. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
  431. package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
  432. package/dist/evaluator/LlamaRankingContext.d.ts +91 -0
  433. package/dist/evaluator/LlamaRankingContext.js +178 -0
  434. package/dist/evaluator/LlamaRankingContext.js.map +1 -0
  435. package/dist/evaluator/TokenBias.d.ts +37 -0
  436. package/dist/evaluator/TokenBias.js +68 -0
  437. package/dist/evaluator/TokenBias.js.map +1 -0
  438. package/dist/evaluator/TokenMeter.d.ts +45 -0
  439. package/dist/evaluator/TokenMeter.js +74 -0
  440. package/dist/evaluator/TokenMeter.js.map +1 -0
  441. package/dist/evaluator/utils/chunkDocument.d.ts +86 -0
  442. package/dist/evaluator/utils/chunkDocument.js +212 -0
  443. package/dist/evaluator/utils/chunkDocument.js.map +1 -0
  444. package/dist/gguf/consts.d.ts +4 -0
  445. package/dist/gguf/consts.js +12 -0
  446. package/dist/gguf/consts.js.map +1 -0
  447. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  448. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  449. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  450. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  451. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  452. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  453. package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
  454. package/dist/gguf/fileReaders/GgufFileReader.js +106 -0
  455. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  456. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
  457. package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
  458. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  459. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +28 -0
  460. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +94 -0
  461. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  462. package/dist/gguf/insights/GgufInsights.d.ts +78 -0
  463. package/dist/gguf/insights/GgufInsights.js +854 -0
  464. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  465. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +203 -0
  466. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +284 -0
  467. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  468. package/dist/gguf/insights/GgufInsightsTokens.d.ts +5 -0
  469. package/dist/gguf/insights/GgufInsightsTokens.js +40 -0
  470. package/dist/gguf/insights/GgufInsightsTokens.js.map +1 -0
  471. package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts +5 -0
  472. package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js +7 -0
  473. package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map +1 -0
  474. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +33 -0
  475. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +117 -0
  476. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  477. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +20 -0
  478. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +251 -0
  479. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  480. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  481. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  482. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  483. package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
  484. package/dist/gguf/parser/GgufV2Parser.js +184 -0
  485. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  486. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  487. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  488. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  489. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  490. package/dist/gguf/parser/parseGguf.js +61 -0
  491. package/dist/gguf/parser/parseGguf.js.map +1 -0
  492. package/dist/gguf/readGgufFileInfo.d.ts +54 -0
  493. package/dist/gguf/readGgufFileInfo.js +82 -0
  494. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  495. package/dist/gguf/types/GgufFileInfoTypes.d.ts +85 -0
  496. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  497. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  498. package/dist/gguf/types/GgufMetadataTypes.d.ts +480 -0
  499. package/dist/gguf/types/GgufMetadataTypes.js +194 -0
  500. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  501. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +63 -0
  502. package/dist/gguf/types/GgufTensorInfoTypes.js +54 -0
  503. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  504. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  505. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  506. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  507. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
  508. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
  509. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  510. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  511. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  512. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  513. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  514. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  515. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  516. package/dist/gguf/utils/getGgufMetadataKeyValue.d.ts +1 -0
  517. package/dist/gguf/utils/getGgufMetadataKeyValue.js +27 -0
  518. package/dist/gguf/utils/getGgufMetadataKeyValue.js.map +1 -0
  519. package/dist/gguf/utils/ggufQuantNames.d.ts +2 -0
  520. package/dist/gguf/utils/ggufQuantNames.js +42 -0
  521. package/dist/gguf/utils/ggufQuantNames.js.map +1 -0
  522. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +2 -0
  523. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +18 -0
  524. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  525. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  526. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +38 -0
  527. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  528. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  529. package/dist/gguf/utils/resolveSplitGgufParts.js +64 -0
  530. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  531. package/dist/index.d.ts +71 -0
  532. package/dist/index.js +64 -0
  533. package/dist/index.js.map +1 -0
  534. package/dist/state.d.ts +6 -0
  535. package/dist/state.js +22 -0
  536. package/dist/state.js.map +1 -0
  537. package/dist/tsconfig.tsbuildinfo +1 -0
  538. package/dist/types.d.ts +408 -0
  539. package/dist/types.js +13 -0
  540. package/dist/types.js.map +1 -0
  541. package/dist/utils/DisposeGuard.d.ts +13 -0
  542. package/dist/utils/DisposeGuard.js +120 -0
  543. package/dist/utils/DisposeGuard.js.map +1 -0
  544. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  545. package/dist/utils/InsufficientMemoryError.js +6 -0
  546. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  547. package/dist/utils/LlamaText.d.ts +92 -0
  548. package/dist/utils/LlamaText.js +527 -0
  549. package/dist/utils/LlamaText.js.map +1 -0
  550. package/dist/utils/LruCache.d.ts +12 -0
  551. package/dist/utils/LruCache.js +44 -0
  552. package/dist/utils/LruCache.js.map +1 -0
  553. package/dist/utils/OpenAIFormat.d.ts +177 -0
  554. package/dist/utils/OpenAIFormat.js +488 -0
  555. package/dist/utils/OpenAIFormat.js.map +1 -0
  556. package/dist/utils/OverridesObject.d.ts +7 -0
  557. package/dist/utils/OverridesObject.js +2 -0
  558. package/dist/utils/OverridesObject.js.map +1 -0
  559. package/dist/utils/ReplHistory.d.ts +9 -0
  560. package/dist/utils/ReplHistory.js +72 -0
  561. package/dist/utils/ReplHistory.js.map +1 -0
  562. package/dist/utils/StopGenerationDetector.d.ts +47 -0
  563. package/dist/utils/StopGenerationDetector.js +291 -0
  564. package/dist/utils/StopGenerationDetector.js.map +1 -0
  565. package/dist/utils/ThreadsSplitter.d.ts +32 -0
  566. package/dist/utils/ThreadsSplitter.js +177 -0
  567. package/dist/utils/ThreadsSplitter.js.map +1 -0
  568. package/dist/utils/TokenStreamRegulator.d.ts +38 -0
  569. package/dist/utils/TokenStreamRegulator.js +200 -0
  570. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  571. package/dist/utils/UnsupportedError.d.ts +2 -0
  572. package/dist/utils/UnsupportedError.js +7 -0
  573. package/dist/utils/UnsupportedError.js.map +1 -0
  574. package/dist/utils/appendUserMessageToChatHistory.d.ts +6 -0
  575. package/dist/utils/appendUserMessageToChatHistory.js +22 -0
  576. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  577. package/dist/utils/clearTempFolder.d.ts +1 -0
  578. package/dist/utils/clearTempFolder.js +16 -0
  579. package/dist/utils/clearTempFolder.js.map +1 -0
  580. package/dist/utils/cmake.d.ts +10 -0
  581. package/dist/utils/cmake.js +146 -0
  582. package/dist/utils/cmake.js.map +1 -0
  583. package/dist/utils/compareTokens.d.ts +2 -0
  584. package/dist/utils/compareTokens.js +4 -0
  585. package/dist/utils/compareTokens.js.map +1 -0
  586. package/dist/utils/createModelDownloader.d.ts +262 -0
  587. package/dist/utils/createModelDownloader.js +486 -0
  588. package/dist/utils/createModelDownloader.js.map +1 -0
  589. package/dist/utils/findBestOption.d.ts +4 -0
  590. package/dist/utils/findBestOption.js +15 -0
  591. package/dist/utils/findBestOption.js.map +1 -0
  592. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +20 -0
  593. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +85 -0
  594. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  595. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +19 -0
  596. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +60 -0
  597. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -0
  598. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +11 -0
  599. package/dist/utils/gbnfJson/GbnfTerminal.js +54 -0
  600. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -0
  601. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  602. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  603. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  604. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.d.ts +9 -0
  605. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js +53 -0
  606. package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js.map +1 -0
  607. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +18 -0
  608. package/dist/utils/gbnfJson/terminals/GbnfArray.js +83 -0
  609. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -0
  610. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +7 -0
  611. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js +22 -0
  612. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -0
  613. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.d.ts +7 -0
  614. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js +17 -0
  615. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -0
  616. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.d.ts +11 -0
  617. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js +28 -0
  618. package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js.map +1 -0
  619. package/dist/utils/gbnfJson/terminals/GbnfFormatString.d.ts +11 -0
  620. package/dist/utils/gbnfJson/terminals/GbnfFormatString.js +90 -0
  621. package/dist/utils/gbnfJson/terminals/GbnfFormatString.js.map +1 -0
  622. package/dist/utils/gbnfJson/terminals/GbnfGrammar.d.ts +9 -0
  623. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js +23 -0
  624. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -0
  625. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.d.ts +5 -0
  626. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js +24 -0
  627. package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js.map +1 -0
  628. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +5 -0
  629. package/dist/utils/gbnfJson/terminals/GbnfNull.js +11 -0
  630. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -0
  631. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +9 -0
  632. package/dist/utils/gbnfJson/terminals/GbnfNumber.js +22 -0
  633. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -0
  634. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.d.ts +9 -0
  635. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js +21 -0
  636. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -0
  637. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +28 -0
  638. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +88 -0
  639. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -0
  640. package/dist/utils/gbnfJson/terminals/GbnfOr.d.ts +9 -0
  641. package/dist/utils/gbnfJson/terminals/GbnfOr.js +34 -0
  642. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -0
  643. package/dist/utils/gbnfJson/terminals/GbnfRef.d.ts +15 -0
  644. package/dist/utils/gbnfJson/terminals/GbnfRef.js +34 -0
  645. package/dist/utils/gbnfJson/terminals/GbnfRef.js.map +1 -0
  646. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +15 -0
  647. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +86 -0
  648. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  649. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +12 -0
  650. package/dist/utils/gbnfJson/terminals/GbnfString.js +43 -0
  651. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -0
  652. package/dist/utils/gbnfJson/terminals/GbnfStringValue.d.ts +8 -0
  653. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js +26 -0
  654. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -0
  655. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  656. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  657. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  658. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +13 -0
  659. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +67 -0
  660. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -0
  661. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +30 -0
  662. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +72 -0
  663. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -0
  664. package/dist/utils/gbnfJson/types.d.ts +213 -0
  665. package/dist/utils/gbnfJson/types.js +30 -0
  666. package/dist/utils/gbnfJson/types.js.map +1 -0
  667. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  668. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  669. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  670. package/dist/utils/gbnfJson/utils/defsScope.d.ts +7 -0
  671. package/dist/utils/gbnfJson/utils/defsScope.js +17 -0
  672. package/dist/utils/gbnfJson/utils/defsScope.js.map +1 -0
  673. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +5 -0
  674. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +143 -0
  675. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -0
  676. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.d.ts +5 -0
  677. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.js +16 -0
  678. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.js.map +1 -0
  679. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +8 -0
  680. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +242 -0
  681. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -0
  682. package/dist/utils/getBuildDefaults.d.ts +5 -0
  683. package/dist/utils/getBuildDefaults.js +9 -0
  684. package/dist/utils/getBuildDefaults.js.map +1 -0
  685. package/dist/utils/getChatWrapperSegmentDefinition.d.ts +2 -0
  686. package/dist/utils/getChatWrapperSegmentDefinition.js +9 -0
  687. package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -0
  688. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  689. package/dist/utils/getConsoleLogPrefix.js +10 -0
  690. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  691. package/dist/utils/getFirstWritableDir.d.ts +8 -0
  692. package/dist/utils/getFirstWritableDir.js +60 -0
  693. package/dist/utils/getFirstWritableDir.js.map +1 -0
  694. package/dist/utils/getGrammarsFolder.d.ts +2 -0
  695. package/dist/utils/getGrammarsFolder.js +19 -0
  696. package/dist/utils/getGrammarsFolder.js.map +1 -0
  697. package/dist/utils/getLlamaClasses.d.ts +9 -0
  698. package/dist/utils/getLlamaClasses.js +14 -0
  699. package/dist/utils/getLlamaClasses.js.map +1 -0
  700. package/dist/utils/getModuleVersion.d.ts +1 -0
  701. package/dist/utils/getModuleVersion.js +13 -0
  702. package/dist/utils/getModuleVersion.js.map +1 -0
  703. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  704. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  705. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  706. package/dist/utils/getReadableContextSize.d.ts +1 -0
  707. package/dist/utils/getReadableContextSize.js +7 -0
  708. package/dist/utils/getReadableContextSize.js.map +1 -0
  709. package/dist/utils/getTempDir.d.ts +10 -0
  710. package/dist/utils/getTempDir.js +121 -0
  711. package/dist/utils/getTempDir.js.map +1 -0
  712. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  713. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +205 -0
  714. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  715. package/dist/utils/gitReleaseBundles.d.ts +2 -0
  716. package/dist/utils/gitReleaseBundles.js +132 -0
  717. package/dist/utils/gitReleaseBundles.js.map +1 -0
  718. package/dist/utils/hashString.d.ts +1 -0
  719. package/dist/utils/hashString.js +8 -0
  720. package/dist/utils/hashString.js.map +1 -0
  721. package/dist/utils/includesText.d.ts +1 -0
  722. package/dist/utils/includesText.js +12 -0
  723. package/dist/utils/includesText.js.map +1 -0
  724. package/dist/utils/isLockfileActive.d.ts +4 -0
  725. package/dist/utils/isLockfileActive.js +12 -0
  726. package/dist/utils/isLockfileActive.js.map +1 -0
  727. package/dist/utils/isToken.d.ts +2 -0
  728. package/dist/utils/isToken.js +4 -0
  729. package/dist/utils/isToken.js.map +1 -0
  730. package/dist/utils/isUrl.d.ts +1 -0
  731. package/dist/utils/isUrl.js +15 -0
  732. package/dist/utils/isUrl.js.map +1 -0
  733. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  734. package/dist/utils/mergeUnionTypes.js +2 -0
  735. package/dist/utils/mergeUnionTypes.js.map +1 -0
  736. package/dist/utils/modelDownloadEndpoints.d.ts +13 -0
  737. package/dist/utils/modelDownloadEndpoints.js +27 -0
  738. package/dist/utils/modelDownloadEndpoints.js.map +1 -0
  739. package/dist/utils/modelFileAccessTokens.d.ts +5 -0
  740. package/dist/utils/modelFileAccessTokens.js +41 -0
  741. package/dist/utils/modelFileAccessTokens.js.map +1 -0
  742. package/dist/utils/optionsMatrix.d.ts +58 -0
  743. package/dist/utils/optionsMatrix.js +97 -0
  744. package/dist/utils/optionsMatrix.js.map +1 -0
  745. package/dist/utils/parseModelFileName.d.ts +15 -0
  746. package/dist/utils/parseModelFileName.js +132 -0
  747. package/dist/utils/parseModelFileName.js.map +1 -0
  748. package/dist/utils/parseModelUri.d.ts +40 -0
  749. package/dist/utils/parseModelUri.js +346 -0
  750. package/dist/utils/parseModelUri.js.map +1 -0
  751. package/dist/utils/parseTextTemplate.d.ts +66 -0
  752. package/dist/utils/parseTextTemplate.js +116 -0
  753. package/dist/utils/parseTextTemplate.js.map +1 -0
  754. package/dist/utils/prettyPrintObject.d.ts +10 -0
  755. package/dist/utils/prettyPrintObject.js +84 -0
  756. package/dist/utils/prettyPrintObject.js.map +1 -0
  757. package/dist/utils/pushAll.d.ts +6 -0
  758. package/dist/utils/pushAll.js +11 -0
  759. package/dist/utils/pushAll.js.map +1 -0
  760. package/dist/utils/removeNullFields.d.ts +2 -0
  761. package/dist/utils/removeNullFields.js +17 -0
  762. package/dist/utils/removeNullFields.js.map +1 -0
  763. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  764. package/dist/utils/resolveGithubRelease.js +77 -0
  765. package/dist/utils/resolveGithubRelease.js.map +1 -0
  766. package/dist/utils/resolveLastTokens.d.ts +2 -0
  767. package/dist/utils/resolveLastTokens.js +12 -0
  768. package/dist/utils/resolveLastTokens.js.map +1 -0
  769. package/dist/utils/resolveModelDestination.d.ts +16 -0
  770. package/dist/utils/resolveModelDestination.js +54 -0
  771. package/dist/utils/resolveModelDestination.js.map +1 -0
  772. package/dist/utils/resolveModelFile.d.ts +142 -0
  773. package/dist/utils/resolveModelFile.js +201 -0
  774. package/dist/utils/resolveModelFile.js.map +1 -0
  775. package/dist/utils/runtime.d.ts +4 -0
  776. package/dist/utils/runtime.js +8 -0
  777. package/dist/utils/runtime.js.map +1 -0
  778. package/dist/utils/safeEventCallback.d.ts +6 -0
  779. package/dist/utils/safeEventCallback.js +29 -0
  780. package/dist/utils/safeEventCallback.js.map +1 -0
  781. package/dist/utils/signalSleep.d.ts +1 -0
  782. package/dist/utils/signalSleep.js +20 -0
  783. package/dist/utils/signalSleep.js.map +1 -0
  784. package/dist/utils/spawnCommand.d.ts +11 -0
  785. package/dist/utils/spawnCommand.js +89 -0
  786. package/dist/utils/spawnCommand.js.map +1 -0
  787. package/dist/utils/tokenizeInput.d.ts +3 -0
  788. package/dist/utils/tokenizeInput.js +14 -0
  789. package/dist/utils/tokenizeInput.js.map +1 -0
  790. package/dist/utils/tokenizerUtils.d.ts +12 -0
  791. package/dist/utils/tokenizerUtils.js +32 -0
  792. package/dist/utils/tokenizerUtils.js.map +1 -0
  793. package/dist/utils/transformPromisable.d.ts +54 -0
  794. package/dist/utils/transformPromisable.js +95 -0
  795. package/dist/utils/transformPromisable.js.map +1 -0
  796. package/dist/utils/truncateTextAndRoundToWords.d.ts +11 -0
  797. package/dist/utils/truncateTextAndRoundToWords.js +110 -0
  798. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  799. package/dist/utils/utilTypes.d.ts +18 -0
  800. package/dist/utils/utilTypes.js +2 -0
  801. package/dist/utils/utilTypes.js.map +1 -0
  802. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  803. package/dist/utils/waitForLockfileRelease.js +19 -0
  804. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  805. package/dist/utils/withLockfile.d.ts +7 -0
  806. package/dist/utils/withLockfile.js +44 -0
  807. package/dist/utils/withLockfile.js.map +1 -0
  808. package/dist/utils/withOra.d.ts +7 -0
  809. package/dist/utils/withOra.js +37 -0
  810. package/dist/utils/withOra.js.map +1 -0
  811. package/dist/utils/withProgressLog.d.ts +22 -0
  812. package/dist/utils/withProgressLog.js +211 -0
  813. package/dist/utils/withProgressLog.js.map +1 -0
  814. package/dist/utils/withStatusLogs.d.ts +6 -0
  815. package/dist/utils/withStatusLogs.js +25 -0
  816. package/dist/utils/withStatusLogs.js.map +1 -0
  817. package/dist/utils/wrapAbortSignal.d.ts +1 -0
  818. package/dist/utils/wrapAbortSignal.js +14 -0
  819. package/dist/utils/wrapAbortSignal.js.map +1 -0
  820. package/llama/.clang-format +46 -0
  821. package/llama/CMakeLists.txt +141 -0
  822. package/llama/addon/AddonContext.cpp +1181 -0
  823. package/llama/addon/AddonContext.h +85 -0
  824. package/llama/addon/AddonGrammar.cpp +92 -0
  825. package/llama/addon/AddonGrammar.h +22 -0
  826. package/llama/addon/AddonGrammarEvaluationState.cpp +36 -0
  827. package/llama/addon/AddonGrammarEvaluationState.h +17 -0
  828. package/llama/addon/AddonModel.cpp +691 -0
  829. package/llama/addon/AddonModel.h +64 -0
  830. package/llama/addon/AddonModelData.cpp +25 -0
  831. package/llama/addon/AddonModelData.h +15 -0
  832. package/llama/addon/AddonModelLora.cpp +103 -0
  833. package/llama/addon/AddonModelLora.h +28 -0
  834. package/llama/addon/AddonSampler.cpp +669 -0
  835. package/llama/addon/AddonSampler.h +75 -0
  836. package/llama/addon/RingBuffer.h +109 -0
  837. package/llama/addon/addon.cpp +330 -0
  838. package/llama/addon/addonGlobals.cpp +22 -0
  839. package/llama/addon/addonGlobals.h +12 -0
  840. package/llama/addon/globals/addonLog.cpp +143 -0
  841. package/llama/addon/globals/addonLog.h +24 -0
  842. package/llama/addon/globals/addonProgress.cpp +15 -0
  843. package/llama/addon/globals/addonProgress.h +15 -0
  844. package/llama/addon/globals/getGpuInfo.cpp +146 -0
  845. package/llama/addon/globals/getGpuInfo.h +11 -0
  846. package/llama/addon/globals/getMemoryInfo.cpp +63 -0
  847. package/llama/addon/globals/getMemoryInfo.h +4 -0
  848. package/llama/addon/globals/getSwapInfo.cpp +69 -0
  849. package/llama/addon/globals/getSwapInfo.h +4 -0
  850. package/llama/binariesGithubRelease.json +3 -0
  851. package/llama/cmake/addVariantSuffix.cmake +21 -0
  852. package/llama/cmake/win32.ensureNinjaPath.cmake +68 -0
  853. package/llama/cmake/win32.ensureNodeLib.cmake +34 -0
  854. package/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake +12 -0
  855. package/llama/cmake/win32.llvmEnsureCmakeAr.cmake +37 -0
  856. package/llama/cmake/win32.llvmUseGnuModeCompilers.cmake +87 -0
  857. package/llama/cmake/win32.programFilesPaths.cmake +35 -0
  858. package/llama/gpuInfo/vulkan-gpu-info.cpp +207 -0
  859. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  860. package/llama/package.json +5 -0
  861. package/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake +14 -0
  862. package/llama/profiles/llvm.win32.host-x64.target-arm64.cmake +14 -0
  863. package/llama/profiles/llvm.win32.host-x64.target-x64.cmake +14 -0
  864. package/llama/toolchains/darwin.host-x64.target-arm64.cmake +8 -0
  865. package/llama/toolchains/linux.host-arm64.target-x64.cmake +5 -0
  866. package/llama/toolchains/linux.host-x64.target-arm64.cmake +5 -0
  867. package/llama/toolchains/linux.host-x64.target-arm71.cmake +5 -0
  868. package/llama/toolchains/llvm.win32.host-x64.target-x64.cmake +20 -0
  869. package/llama/toolchains/win32.host-arm64.target-arm64.cmake +21 -0
  870. package/llama/toolchains/win32.host-x64.target-arm64.cmake +21 -0
  871. package/llama/xpack/package.json +10 -0
  872. package/package.json +241 -0
  873. package/templates/README.md +6 -0
  874. package/templates/package.json +10 -0
  875. package/templates/packed/electron-typescript-react.json +1 -0
  876. package/templates/packed/node-typescript.json +1 -0
@@ -0,0 +1,1919 @@
1
+ import path from "path";
2
+ import { acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
3
+ import { removeNullFields } from "../../utils/removeNullFields.js";
4
+ import { compareTokens } from "../../utils/compareTokens.js";
5
+ import { DisposeGuard } from "../../utils/DisposeGuard.js";
6
+ import { TokenMeter } from "../TokenMeter.js";
7
+ import { UnsupportedError } from "../../utils/UnsupportedError.js";
8
+ import { pushAll } from "../../utils/pushAll.js";
9
+ import { safeEventCallback } from "../../utils/safeEventCallback.js";
10
+ import { GgufArchitectureType } from "../../gguf/types/GgufMetadataTypes.js";
11
+ import { LlamaLogLevel } from "../../bindings/types.js";
12
+ import { resolveGgmlTypeOption } from "../../gguf/types/GgufTensorInfoTypes.js";
13
+ import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
14
+ import { LlamaSampler } from "./LlamaSampler.js";
15
+ import { padSafeContextSize } from "./utils/padSafeContextSize.js";
16
+ import { LlamaContextSequenceCheckpoints } from "./LlamaContextSequenceCheckpoints.js";
17
+ const defaultLoraScale = 1;
18
+ const shrinkRetriesMinContextSize = 4096;
19
+ const defaultMaxPunishTokens = 64;
20
+ const defaultFailedCreationRemedy = {
21
+ retries: 16,
22
+ autoContextSizeShrink: 0.16
23
+ };
24
+ const defaultEvaluationPriority = 5;
25
+ const defaultDryRepeatPenalitySequenceBreakers = ["\n", ":", '"', "*"];
26
+ const defaultCheckpointOptions = {
27
+ max: 32,
28
+ interval: 8192,
29
+ maxMemory: null
30
+ };
31
+ export const internalCheckpoints = {
32
+ speculative: {
33
+ name: "speculative",
34
+ maxCheckpoints: 2
35
+ },
36
+ chatSequenceStart: {
37
+ name: "sequenceStart",
38
+ maxCheckpoints: 1
39
+ },
40
+ chatGrammarEnd: {
41
+ name: "grammarEnd",
42
+ maxCheckpoints: 1
43
+ }
44
+ };
45
+ const decodeSyncWorkaround = {
46
+ vulkanLock: {}
47
+ };
48
+ export class LlamaContext {
49
+ /** @internal */ _llama;
50
+ /** @internal */ _ctx;
51
+ /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
52
+ /** @internal */ _backendContextDisposeGuard;
53
+ /** @internal */ _model;
54
+ /** @internal */ _contextSize;
55
+ /** @internal */ _batchSize;
56
+ /** @internal */ _flashAttention;
57
+ /** @internal */ _idealThreads;
58
+ /** @internal */ _minThreads;
59
+ /** @internal */ _performanceTracking;
60
+ /** @internal */ _kvCacheKeyType;
61
+ /** @internal */ _kvCacheValueType;
62
+ /** @internal */ _totalSequences;
63
+ /** @internal */ _unusedSequenceIds = [];
64
+ /** @internal */ _batchingOptions;
65
+ /** @internal */ _swaFullCache = false;
66
+ /** @internal */ _queuedDecodeSequenceIds = new Set();
67
+ /** @internal */ _queuedDecodes = [];
68
+ /** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
69
+ /** @internal */ _modelPreventDisposalHandle;
70
+ /** @internal */ _loraAdapters = new Set();
71
+ /** @internal */ _nextGeneratedSequenceId = 0;
72
+ /** @internal */ _dispatchDecodeScheduled = false;
73
+ /** @internal */ _batchDispatchPending = false;
74
+ /** @internal */ _threadSplitterConsumer;
75
+ /** @internal */ _freeReservedThreadsTimeout;
76
+ /** @internal */ _currentDispatchBatchHandle = {};
77
+ /** @internal */ _allocatedContextSize;
78
+ /** @internal */ _disposed = false;
79
+ onDispose = new EventRelay();
80
+ constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextCycle", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, swaFullCache = _model.defaultContextSwaFullCache, performanceTracking = false, experimentalKvCacheKeyType, experimentalKvCacheValueType, _embeddings, _ranking }) {
81
+ if (_model.disposed)
82
+ throw new DisposedError();
83
+ this._llama = _model._llama;
84
+ this._model = _model;
85
+ this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
86
+ this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
87
+ this._totalSequences = sequences;
88
+ this._contextSize = contextSize;
89
+ this._batchSize = Math.max(batchSize, this._totalSequences);
90
+ this._flashAttention = flashAttention;
91
+ this._idealThreads = typeof threads === "number"
92
+ ? this._llama._threadsSplitter.normalizeThreadsValue(threads)
93
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
94
+ ? this._llama.cpuMathCores
95
+ : this._llama.maxThreads));
96
+ this._minThreads = Math.max(1, typeof threads === "number"
97
+ ? 1
98
+ : this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
99
+ this._performanceTracking = !!performanceTracking;
100
+ this._kvCacheKeyType = experimentalKvCacheKeyType;
101
+ this._kvCacheValueType = experimentalKvCacheValueType;
102
+ this._swaFullCache = !!swaFullCache;
103
+ this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
104
+ contextSize: padSafeContextSize(this._contextSize * this._totalSequences, "up"), // each sequence needs its own <contextSize> of cells
105
+ batchSize: this._batchSize + ((!this._swaFullCache && this.model.fileInsights.swaSize != null && this.model.fileInsights.swaSize > 0)
106
+ ? 1 // +1 to handle edge cases with SWA KV cache
107
+ : 0),
108
+ sequences: this._totalSequences,
109
+ flashAttention: this._flashAttention,
110
+ threads: this._idealThreads,
111
+ embeddings: _embeddings,
112
+ ranking: _ranking,
113
+ performanceTracking: this._performanceTracking,
114
+ kvCacheKeyType: this._kvCacheKeyType,
115
+ kvCacheValueType: this._kvCacheValueType,
116
+ swaFullCache: this._swaFullCache
117
+ }));
118
+ this._batchingOptions = {
119
+ dispatchSchedule: batchingDispatchSchedule,
120
+ itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
121
+ };
122
+ this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
123
+ this._freeReservedThreads = this._freeReservedThreads.bind(this);
124
+ this._disposeAggregator.add(() => {
125
+ this._disposed = true;
126
+ });
127
+ this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
128
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
129
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
130
+ this._disposeAggregator.add(async () => {
131
+ await this._backendContextDisposeGuard.acquireDisposeLock();
132
+ await this._ctx.dispose();
133
+ this._modelPreventDisposalHandle.dispose();
134
+ });
135
+ }
136
+ async dispose() {
137
+ if (this._disposed)
138
+ return;
139
+ this._disposed = true;
140
+ await this._disposeAggregator.dispose();
141
+ }
142
+ /** @hidden */
143
+ [Symbol.asyncDispose]() {
144
+ return this.dispose();
145
+ }
146
+ get disposed() {
147
+ return this._disposed;
148
+ }
149
+ get model() {
150
+ return this._model;
151
+ }
152
+ get contextSize() {
153
+ return this._contextSize;
154
+ }
155
+ get batchSize() {
156
+ return this._batchSize;
157
+ }
158
+ get flashAttention() {
159
+ return this._flashAttention;
160
+ }
161
+ get kvCacheKeyType() {
162
+ return this._kvCacheKeyType;
163
+ }
164
+ get kvCacheValueType() {
165
+ return this._kvCacheValueType;
166
+ }
167
+ /**
168
+ * The actual size of the state in the memory in bytes.
169
+ * This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
170
+ */
171
+ get stateSize() {
172
+ this._ensureNotDisposed();
173
+ return this._ctx.getStateSize();
174
+ }
175
+ /** The number of threads currently used to evaluate tokens */
176
+ get currentThreads() {
177
+ this._ensureNotDisposed();
178
+ return this._ctx.getThreads();
179
+ }
180
+ /**
181
+ * The number of threads that are preferred to be used to evaluate tokens.
182
+ *
183
+ * The actual number of threads used may be lower when other evaluations are running in parallel.
184
+ */
185
+ get idealThreads() {
186
+ return this._idealThreads;
187
+ }
188
+ getAllocatedContextSize() {
189
+ this._ensureNotDisposed();
190
+ if (this._allocatedContextSize == null)
191
+ this._allocatedContextSize = this._ctx.getContextSize();
192
+ return this._allocatedContextSize;
193
+ }
194
+ get totalSequences() {
195
+ return this._totalSequences;
196
+ }
197
+ get sequencesLeft() {
198
+ return this._totalSequences - this._nextGeneratedSequenceId + this._unusedSequenceIds.length;
199
+ }
200
+ /**
201
+ * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
202
+ * When there are no sequences left, this method will throw an error.
203
+ */
204
+ getSequence(options = {}) {
205
+ const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, tokenPredictor, checkpoints, _tokenMeter } = options;
206
+ this._ensureNotDisposed();
207
+ const nextSequenceId = this._popSequenceId();
208
+ if (nextSequenceId == null)
209
+ throw new Error("No sequences left");
210
+ return LlamaContextSequence._create({
211
+ sequenceId: nextSequenceId,
212
+ context: this,
213
+ tokenMeter: _tokenMeter,
214
+ contextShift: {
215
+ size: contextShiftSize,
216
+ strategy: contextShiftStrategy
217
+ },
218
+ tokenPredictor,
219
+ checkpoints
220
+ });
221
+ }
222
+ dispatchPendingBatch() {
223
+ this._currentDispatchBatchHandle = {};
224
+ this._dispatchDecodeScheduled = false;
225
+ if (this._batchDispatchPending)
226
+ return;
227
+ this._batchDispatchPending = true;
228
+ void withLock([this, "context"], async () => {
229
+ this._currentDispatchBatchHandle = {};
230
+ this._dispatchDecodeScheduled = false;
231
+ this._batchDispatchPending = false;
232
+ let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
233
+ const queuedDecodeToMappedLogits = new Map();
234
+ const resolvePrioritizationStrategy = () => {
235
+ try {
236
+ this._ensureNotDisposed();
237
+ return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
238
+ }
239
+ catch (err) {
240
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
241
+ }
242
+ return null;
243
+ };
244
+ const getOrderedQueuedDecodes = (prioritizationStrategy) => {
245
+ const batchItemToQueuedDecodeMap = new Map();
246
+ const batchItemsList = [];
247
+ for (const queuedDecode of this._queuedDecodes) {
248
+ const batchItem = {
249
+ tokens: queuedDecode.tokens,
250
+ logits: queuedDecode.logits,
251
+ evaluationPriority: queuedDecode.evaluationPriority
252
+ };
253
+ batchItemToQueuedDecodeMap.set(batchItem, queuedDecode);
254
+ batchItemsList.push(batchItem);
255
+ }
256
+ let prioritizedItems;
257
+ try {
258
+ prioritizedItems = prioritizationStrategy({
259
+ items: batchItemsList,
260
+ size: this._batchSize
261
+ });
262
+ }
263
+ catch (err) {
264
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
265
+ return null;
266
+ }
267
+ return prioritizedItems.map((prioritizedItem) => {
268
+ const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
269
+ if (queuedDecode == null)
270
+ throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
271
+ "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
272
+ return {
273
+ queuedDecode,
274
+ processAmount: prioritizedItem.processAmount
275
+ };
276
+ });
277
+ };
278
+ const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
279
+ const currentBatchItems = [];
280
+ let currentBatchSize = 0;
281
+ let batchTokenSlotsLeft = batchSize;
282
+ for (const { queuedDecode, processAmount } of queuedDecodes) {
283
+ const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
284
+ if (resolvedProcessAmount <= 0) {
285
+ if (batchTokenSlotsLeft === 0)
286
+ break;
287
+ continue;
288
+ }
289
+ batchTokenSlotsLeft -= resolvedProcessAmount;
290
+ currentBatchSize += resolvedProcessAmount;
291
+ currentBatchItems.push({
292
+ queuedDecode,
293
+ processAmount: resolvedProcessAmount
294
+ });
295
+ }
296
+ return {
297
+ currentBatchItems,
298
+ currentBatchSize
299
+ };
300
+ };
301
+ const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
302
+ const afterDecodeActions = [];
303
+ const queuedDecodesToDelete = new Set();
304
+ const currentQueuedDecodeItems = new Set();
305
+ if (currentBatchSize !== 0)
306
+ this._ctx.initBatch(currentBatchSize);
307
+ for (const { queuedDecode, processAmount } of batchItems) {
308
+ let batchLogitIndexes;
309
+ const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
310
+ const tokenIndexesWithLogitsToProcess = queuedDecode.logits.slice(0, processAmount)
311
+ .map((logit, index) => (logit ? index : undefined))
312
+ .filter((index) => index != undefined);
313
+ const numberOfOutputTokens = tokenIndexesWithLogitsToProcess.length;
314
+ TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
315
+ TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
316
+ try {
317
+ batchLogitIndexes = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), Uint32Array.from(tokenIndexesWithLogitsToProcess));
318
+ }
319
+ catch (err) {
320
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
321
+ continue;
322
+ }
323
+ currentQueuedDecodeItems.add(queuedDecode);
324
+ if (queuedDecode.tokens.length === processAmount) {
325
+ queuedDecodesToDelete.add(queuedDecode);
326
+ afterDecodeActions.push({
327
+ queuedDecode,
328
+ batchLogitIndexes,
329
+ batchLogitTokenIndexes: tokenIndexesWithLogitsToProcess,
330
+ firstTokenIndex: queuedDecode.firstTokenSequenceIndex,
331
+ sequenceStateLength: queuedDecode.firstTokenSequenceIndex + processAmount + 1,
332
+ returnResults: true
333
+ });
334
+ }
335
+ else {
336
+ if (batchLogitIndexes.length > 0 || queuedDecode.afterBatchAction != null)
337
+ afterDecodeActions.push({
338
+ queuedDecode,
339
+ batchLogitIndexes,
340
+ batchLogitTokenIndexes: tokenIndexesWithLogitsToProcess,
341
+ firstTokenIndex: queuedDecode.firstTokenSequenceIndex,
342
+ sequenceStateLength: queuedDecode.firstTokenSequenceIndex + processAmount + 1
343
+ });
344
+ queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
345
+ queuedDecode.logits = queuedDecode.logits.slice(processAmount);
346
+ queuedDecode.firstTokenSequenceIndex += processAmount;
347
+ }
348
+ }
349
+ for (let i = 0; i < this._queuedDecodes.length; i++) {
350
+ const queuedDecode = this._queuedDecodes[i];
351
+ if (queuedDecodesToDelete.has(queuedDecode)) {
352
+ this._queuedDecodes.splice(i, 1);
353
+ this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
354
+ i--;
355
+ }
356
+ }
357
+ if (currentBatchSize !== 0) {
358
+ const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
359
+ const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
360
+ ? await allocationResult ?? []
361
+ : allocationResult ?? [];
362
+ try {
363
+ if (threadsToUse != null)
364
+ this._ctx.setThreads(threadsToUse);
365
+ await this._ctx.decodeBatch();
366
+ consumerHandle?.dispose();
367
+ }
368
+ catch (err) {
369
+ consumerHandle?.dispose();
370
+ this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
371
+ return;
372
+ }
373
+ }
374
+ function finishAfterDecodeAction(action, mappedLogitValues) {
375
+ if (mappedLogitValues != null && mappedLogitValues.length > 0) {
376
+ if (queuedDecodeToMappedLogits.has(action.queuedDecode))
377
+ pushAll(queuedDecodeToMappedLogits.get(action.queuedDecode), mappedLogitValues);
378
+ else
379
+ queuedDecodeToMappedLogits.set(action.queuedDecode, mappedLogitValues);
380
+ }
381
+ if (action.returnResults != null) {
382
+ const [accept] = action.queuedDecode.response;
383
+ const mappedLogits = queuedDecodeToMappedLogits.get(action.queuedDecode) ?? [];
384
+ queuedDecodeToMappedLogits.delete(action.queuedDecode);
385
+ accept(mappedLogits);
386
+ }
387
+ }
388
+ const afterDecodeActionResults = afterDecodeActions.map((action) => {
389
+ if (action.batchLogitIndexes.length === 0) {
390
+ finishAfterDecodeAction(action);
391
+ return undefined;
392
+ }
393
+ const mappedLogitValues = [];
394
+ let promiseChain = undefined;
395
+ const batchLogitIndexes = action.batchLogitIndexes;
396
+ const batchLogitTokenIndexes = action.batchLogitTokenIndexes;
397
+ for (let i = 0; i < batchLogitIndexes.length; i++) {
398
+ const tokenIndex = batchLogitTokenIndexes[i];
399
+ const mappedValue = promiseChain != null
400
+ ? promiseChain
401
+ .then(() => action.queuedDecode.logitDataMapper(batchLogitIndexes[i], tokenIndex + action.firstTokenIndex))
402
+ : action.queuedDecode.logitDataMapper(batchLogitIndexes[i], tokenIndex + action.firstTokenIndex);
403
+ if (mappedValue instanceof Promise) {
404
+ promiseChain = mappedValue;
405
+ mappedLogitValues.push(mappedValue
406
+ .then((value) => [tokenIndex + action.firstTokenIndex, value]));
407
+ }
408
+ else
409
+ mappedLogitValues.push([tokenIndex + action.firstTokenIndex, mappedValue]);
410
+ }
411
+ if (promiseChain != null)
412
+ return Promise.all(mappedLogitValues)
413
+ .then((resolvedMappedLogitValues) => finishAfterDecodeAction(action, resolvedMappedLogitValues));
414
+ finishAfterDecodeAction(action, mappedLogitValues);
415
+ return undefined;
416
+ });
417
+ await Promise.all(afterDecodeActionResults);
418
+ for (const action of afterDecodeActions) {
419
+ const resPromise = action.queuedDecode.afterBatchAction?.(action.sequenceStateLength);
420
+ if (resPromise instanceof Promise)
421
+ await resPromise;
422
+ }
423
+ };
424
+ const prioritizationStrategy = resolvePrioritizationStrategy();
425
+ if (prioritizationStrategy == null)
426
+ return; // all queued items are rejected and dequeued when we get here
427
+ this._reserveThreads();
428
+ try {
429
+ while (shouldHaveAnotherLoop) {
430
+ const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
431
+ if (orderedQueuedDecodes == null)
432
+ return; // all queued items are rejected and dequeued when we get here
433
+ const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
434
+ let preventDisposalHandle;
435
+ try {
436
+ preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
437
+ }
438
+ catch (err) {
439
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
440
+ return;
441
+ }
442
+ let decodeLock;
443
+ // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
444
+ if (this._llama.gpu === "vulkan")
445
+ decodeLock = await acquireLock([decodeSyncWorkaround.vulkanLock, "decode"]);
446
+ try {
447
+ await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
448
+ shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
449
+ }
450
+ finally {
451
+ decodeLock?.dispose();
452
+ preventDisposalHandle.dispose();
453
+ }
454
+ }
455
+ }
456
+ finally {
457
+ this._scheduleToFreeReservedThreads();
458
+ }
459
+ });
460
+ }
461
+ /**
462
+ * Print the timings of token evaluation since that last print for this context.
463
+ *
464
+ * Requires the `performanceTracking` option to be enabled.
465
+ *
466
+ * > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
467
+ * it won't print anything.
468
+ */
469
+ async printTimings() {
470
+ this._ensureNotDisposed();
471
+ if (!this._performanceTracking)
472
+ throw new UnsupportedError("Performance tracking is not enabled");
473
+ this._ctx.printTimings();
474
+ await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
475
+ }
476
+ /** @internal */
477
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, logits, evaluationPriority = defaultEvaluationPriority, tokenMeter, afterBatchAction }, logitDataMapper) {
478
+ return await new Promise((accept, reject) => {
479
+ this._queuedDecodes.push({
480
+ sequenceId,
481
+ tokens,
482
+ logits,
483
+ firstTokenSequenceIndex,
484
+ evaluationPriority,
485
+ tokenMeter,
486
+ response: [accept, reject],
487
+ logitDataMapper,
488
+ afterBatchAction
489
+ });
490
+ this._queuedDecodeSequenceIds.add(sequenceId);
491
+ this._scheduleDecode();
492
+ });
493
+ }
494
+ /** @internal */
495
+ _reclaimUnusedSequenceId(sequenceId) {
496
+ if (this._disposed)
497
+ return;
498
+ void withLock([this, "context"], async () => {
499
+ if (this._disposed)
500
+ return;
501
+ this._ctx.disposeSequence(sequenceId);
502
+ this._unusedSequenceIds.push(sequenceId);
503
+ this._onReclaimUnusedSequenceId.dispatchEvent();
504
+ });
505
+ }
506
+ /** @internal */
507
+ _popSequenceId() {
508
+ if (this._unusedSequenceIds.length > 0)
509
+ return this._unusedSequenceIds.shift();
510
+ if (this._nextGeneratedSequenceId < this._totalSequences) {
511
+ const sequenceId = this._nextGeneratedSequenceId;
512
+ this._nextGeneratedSequenceId++;
513
+ return sequenceId;
514
+ }
515
+ return null;
516
+ }
517
+ /** @internal */
518
+ _scheduleDecode() {
519
+ if (this._dispatchDecodeScheduled || this._batchDispatchPending)
520
+ return;
521
+ this._dispatchDecodeScheduled = true;
522
+ const currentPendingBatchHandle = this._currentDispatchBatchHandle;
523
+ const dispatch = () => {
524
+ if (this._currentDispatchBatchHandle !== currentPendingBatchHandle)
525
+ return;
526
+ this.dispatchPendingBatch();
527
+ };
528
+ const dispatchSchedule = this._batchingOptions.dispatchSchedule;
529
+ if (this._queuedDecodeSequenceIds.size === this._totalSequences)
530
+ dispatch();
531
+ if (dispatchSchedule === "nextCycle") {
532
+ if (typeof setImmediate === "function")
533
+ setImmediate(dispatch);
534
+ else
535
+ setTimeout(dispatch, 0);
536
+ }
537
+ else if (typeof dispatchSchedule === "function")
538
+ dispatchSchedule(dispatch);
539
+ else {
540
+ if (typeof setImmediate === "function")
541
+ setImmediate(dispatch);
542
+ else
543
+ setTimeout(dispatch, 0);
544
+ }
545
+ }
546
+ /** @internal */
547
+ _dispatchErrorForQueuedDecodesAndDequeue(queuedDecodes, err) {
548
+ for (const pendingDecode of queuedDecodes) {
549
+ const [, reject] = pendingDecode.response;
550
+ reject(err);
551
+ }
552
+ for (let i = 0; i < this._queuedDecodes.length; i++) {
553
+ const item = this._queuedDecodes[i];
554
+ if (queuedDecodes.has(item)) {
555
+ this._queuedDecodes.splice(i, 1);
556
+ this._queuedDecodeSequenceIds.delete(item.sequenceId);
557
+ i--;
558
+ }
559
+ }
560
+ }
561
+ /** @internal */
562
+ _ensureNotDisposed() {
563
+ if (this._disposed)
564
+ throw new DisposedError();
565
+ }
566
+ /** @internal */
567
+ async _setLoras(loras) {
568
+ const addonLoras = [];
569
+ const addonScales = [];
570
+ for (const { filePath, scale } of loras) {
571
+ const lora = await this._model._getOrLoadLora(filePath);
572
+ addonLoras.push(lora);
573
+ addonScales.push(scale ?? defaultLoraScale);
574
+ }
575
+ this._ctx.setLoras(addonLoras, addonScales);
576
+ for (const addonLora of addonLoras) {
577
+ if (!this._loraAdapters.has(addonLora)) {
578
+ this._loraAdapters.add(addonLora);
579
+ addonLora.usages++;
580
+ }
581
+ }
582
+ }
583
+ /** @internal */
584
+ _reserveThreads() {
585
+ clearTimeout(this._freeReservedThreadsTimeout);
586
+ delete this._freeReservedThreadsTimeout;
587
+ if (this._threadSplitterConsumer != null)
588
+ return;
589
+ this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
590
+ }
591
+ /** @internal */
592
+ _freeReservedThreads() {
593
+ clearTimeout(this._freeReservedThreadsTimeout);
594
+ delete this._freeReservedThreadsTimeout;
595
+ if (this._threadSplitterConsumer == null)
596
+ return;
597
+ this._threadSplitterConsumer.dispose();
598
+ delete this._threadSplitterConsumer;
599
+ }
600
+ /** @internal */
601
+ _scheduleToFreeReservedThreads() {
602
+ if (this._threadSplitterConsumer == null)
603
+ return;
604
+ clearTimeout(this._freeReservedThreadsTimeout);
605
+ this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
606
+ }
607
+ /** @internal */
608
+ static async _create(options, { _model }) {
609
+ const kvUnified = false;
610
+ const sequences = Math.max(1, Math.floor(options.sequences ?? getDefaultContextSequences()));
611
+ const flashAttention = _model.flashAttentionSupported
612
+ ? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
613
+ : false;
614
+ const kvCacheKeyType = options.experimentalKvCacheKeyType === "currentQuant"
615
+ ? _model.fileInsights.dominantTensorType ?? _model.defaultContextKvCacheKeyType
616
+ : resolveGgmlTypeOption(options.experimentalKvCacheKeyType) ?? _model.defaultContextKvCacheKeyType;
617
+ const kvCacheValueType = options.experimentalKvCacheValueType === "currentQuant"
618
+ ? _model.fileInsights.dominantTensorType ?? _model.defaultContextKvCacheValueType
619
+ : resolveGgmlTypeOption(options.experimentalKvCacheValueType) ?? _model.defaultContextKvCacheValueType;
620
+ const swaFullCache = options.swaFullCache ?? _model.defaultContextSwaFullCache;
621
+ const loraOptions = typeof options.lora === "string"
622
+ ? { adapters: [{ filePath: options.lora }] }
623
+ : options.lora;
624
+ let failedCreationRetries = options.failedCreationRemedy === false
625
+ ? 0
626
+ : Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
627
+ const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
628
+ ? 0
629
+ : options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
630
+ let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
631
+ batchSize: options.batchSize,
632
+ sequences: sequences,
633
+ modelGpuLayers: _model.gpuLayers,
634
+ modelTrainContextSize: _model.trainContextSize,
635
+ flashAttention,
636
+ kvCacheKeyType,
637
+ kvCacheValueType,
638
+ swaFullCache,
639
+ getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
640
+ llamaGpu: _model._llama.gpu,
641
+ ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
642
+ isEmbeddingContext: options._embeddings
643
+ });
644
+ const minContextSize = options.contextSize === "auto"
645
+ ? shrinkRetriesMinContextSize
646
+ : (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
647
+ ? options.contextSize.min
648
+ : typeof options.contextSize === "number"
649
+ ? options.contextSize
650
+ : shrinkRetriesMinContextSize;
651
+ const { createSignal } = options;
652
+ const paddedContextSize = kvUnified
653
+ ? Math.floor(padSafeContextSize(Math.max(2, contextSize) * sequences, "up") / sequences)
654
+ : padSafeContextSize(Math.max(2, contextSize), "up");
655
+ if (contextSize <= _model.trainContextSize && paddedContextSize < _model.trainContextSize)
656
+ contextSize = paddedContextSize;
657
+ async function createContext(contextSize) {
658
+ const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
659
+ const resourceRequirementsEstimation = _model.fileInsights.estimateContextResourceRequirements({
660
+ contextSize,
661
+ sequences,
662
+ isEmbeddingContext: options._embeddings,
663
+ modelGpuLayers: _model.gpuLayers,
664
+ batchSize,
665
+ flashAttention,
666
+ kvCacheKeyType,
667
+ kvCacheValueType,
668
+ swaFullCache
669
+ });
670
+ const context = new LlamaContext({ _model }, {
671
+ ...options,
672
+ contextSize,
673
+ batchSize,
674
+ sequences,
675
+ flashAttention,
676
+ experimentalKvCacheKeyType: kvCacheKeyType,
677
+ experimentalKvCacheValueType: kvCacheValueType,
678
+ swaFullCache
679
+ });
680
+ const contextCreationVramReservation = options.ignoreMemorySafetyChecks
681
+ ? null
682
+ : _model._llama._vramOrchestrator.reserveMemory(resourceRequirementsEstimation.gpuVram);
683
+ const contextCreationRamReservation = options.ignoreMemorySafetyChecks
684
+ ? null
685
+ : _model._llama._vramOrchestrator.reserveMemory(resourceRequirementsEstimation.cpuRam);
686
+ try {
687
+ if (createSignal?.aborted)
688
+ throw createSignal.reason;
689
+ const contextLoaded = await context._ctx.init();
690
+ if (createSignal?.aborted) {
691
+ if (contextLoaded)
692
+ await context._ctx.dispose();
693
+ throw createSignal.reason;
694
+ }
695
+ else if (!contextLoaded)
696
+ throw new Error("Failed to create context");
697
+ contextCreationVramReservation?.dispose?.();
698
+ contextCreationRamReservation?.dispose?.();
699
+ if (loraOptions != null && loraOptions.adapters.length > 0) {
700
+ try {
701
+ await context._setLoras(loraOptions.adapters);
702
+ try {
703
+ loraOptions.onLoadProgress?.(1);
704
+ }
705
+ catch (err) {
706
+ console.error(err);
707
+ }
708
+ }
709
+ catch (err) {
710
+ await context.dispose();
711
+ throw err;
712
+ }
713
+ if (createSignal?.aborted) {
714
+ await context.dispose();
715
+ throw createSignal.reason;
716
+ }
717
+ }
718
+ else if (loraOptions?.onLoadProgress != null) {
719
+ try {
720
+ loraOptions.onLoadProgress(1);
721
+ }
722
+ catch (err) {
723
+ console.error(err);
724
+ }
725
+ }
726
+ return context;
727
+ }
728
+ finally {
729
+ contextCreationVramReservation?.dispose?.();
730
+ contextCreationRamReservation?.dispose?.();
731
+ }
732
+ }
733
+ while (failedCreationRetries >= 0) {
734
+ try {
735
+ return await createContext(contextSize);
736
+ }
737
+ catch (err) {
738
+ if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
739
+ throw err;
740
+ failedCreationRetries--;
741
+ let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
742
+ ? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
743
+ : Math.floor(failedCreationAutoContextSizeShrink(contextSize));
744
+ if (!Number.isFinite(newContextSize))
745
+ throw err;
746
+ if (newContextSize < minContextSize)
747
+ newContextSize = minContextSize;
748
+ if (newContextSize >= contextSize)
749
+ throw err;
750
+ contextSize = newContextSize;
751
+ }
752
+ }
753
+ throw new Error("Failed to create context");
754
+ }
755
+ }
756
+ export class LlamaContextSequence {
757
+ /** @internal */ _sequenceId;
758
+ /** @internal */ _gcRegistry;
759
+ /** @internal */ _context;
760
+ /** @internal */ _contextShift;
761
+ /** @internal */ _tokenPredictor;
762
+ /** @internal */ _checkpoints = new LlamaContextSequenceCheckpoints();
763
+ /** @internal */ _checkpointOptions;
764
+ /** @internal */ _tokenMeter;
765
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
766
+ /** @internal */ _lock = {};
767
+ /** @internal */ _resetTokenPredictor = false;
768
+ /** @internal */ _tokenPredictorOwner = {};
769
+ /** @internal */ _contextTokens = [];
770
+ /** @internal */ _nextTokenIndex = 0;
771
+ /** @internal */ _loadedTokenPredictions = [];
772
+ /** @internal */ _usedTokenPredictions = 0;
773
+ /** @internal */ _unusedTokenPredictions = 0;
774
+ /** @internal */ _validatedTokenPredictions = 0;
775
+ /** @internal */ _refutedTokenPredictions = 0;
776
+ /** @internal */ _disposed = false;
777
+ onDispose = new EventRelay();
778
+ constructor({ sequenceId, context, tokenMeter, contextShift, tokenPredictor, checkpoints }) {
779
+ this._sequenceId = sequenceId;
780
+ this._context = context;
781
+ this._tokenMeter = tokenMeter ?? new TokenMeter();
782
+ this._contextShift = contextShift;
783
+ this._tokenPredictor = tokenPredictor;
784
+ this._checkpointOptions = {
785
+ max: checkpoints?.max ?? defaultCheckpointOptions.max,
786
+ interval: checkpoints?.interval ?? defaultCheckpointOptions.interval,
787
+ maxMemory: checkpoints?.maxMemory ?? defaultCheckpointOptions.maxMemory
788
+ };
789
+ this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
790
+ this._gcRegistry.register(this, sequenceId);
791
+ this._disposeAggregator.add(() => this._gcRegistry.unregister(this));
792
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
793
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextSequenceIfReferenced.bind(null, new WeakRef(this))));
794
+ this._disposeAggregator.add(() => {
795
+ this._checkpoints.clearAllCheckpoints();
796
+ this._context._reclaimUnusedSequenceId(this._sequenceId);
797
+ });
798
+ if (this._tokenPredictor != null)
799
+ this._disposeAggregator.add(this._tokenPredictor);
800
+ this._takeIntervalCheckpointIfNeededAfterBatch = this._takeIntervalCheckpointIfNeededAfterBatch.bind(this);
801
+ }
802
+ dispose() {
803
+ if (this._disposed)
804
+ return;
805
+ this._disposeAggregator.dispose();
806
+ this._contextTokens.length = 0;
807
+ this._disposed = true;
808
+ }
809
+ /** @hidden */
810
+ [Symbol.dispose]() {
811
+ return this.dispose();
812
+ }
813
+ get disposed() {
814
+ return this._disposed;
815
+ }
816
+ get context() {
817
+ return this._context;
818
+ }
819
+ get model() {
820
+ return this._context.model;
821
+ }
822
+ /** The maximum number of tokens that the sequence state can hold */
823
+ get contextSize() {
824
+ return this._context.contextSize;
825
+ }
826
+ /** The index where the next evaluated token will be placed in the context */
827
+ get nextTokenIndex() {
828
+ return this._nextTokenIndex - this._loadedTokenPredictions.length;
829
+ }
830
+ /** The current context state tokens */
831
+ get contextTokens() {
832
+ if (this._loadedTokenPredictions.length === 0)
833
+ return this._contextTokens.slice();
834
+ return this._contextTokens.slice(0, -this._loadedTokenPredictions.length);
835
+ }
836
+ get tokenMeter() {
837
+ return this._tokenMeter;
838
+ }
839
+ /**
840
+ * The token predictor used when creating this sequence.
841
+ */
842
+ get tokenPredictor() {
843
+ return this._tokenPredictor;
844
+ }
845
+ /**
846
+ * Get the index of the first token in the KV cache.
847
+ *
848
+ * If you remove any tokens from the state that come before this index,
849
+ * no cached prefix tokens evaluation state will be used for the next evaluation.
850
+ *
851
+ * For example, if `stateCellsStartIndex` is `10` and you remove the range `{start: 11, end: 16}`
852
+ * then the cached state for range `0-10` will be used in the next evaluation,
853
+ * but if you remove the range `{start: 10, end: 16}` (or `{start: 9, end: 16}`) then the cached state will not be used at all
854
+ * and will be re-evaluated in the next evaluation.
855
+ *
856
+ * This index can be greater than `0` only when SWA (Sliding Window Attention) is used (only on supported models).
857
+ *
858
+ * When SWA is used, this index will usually be `Math.max(-1, .nextTokenIndex - .model.fileInsights.swaSize)` or larger.
859
+ *
860
+ * When the KV cache is empty, this index will be `-1`.
861
+ *
862
+ * You can disable SWA by setting the `swaFullCache` option to `true` when creating a context.
863
+ */
864
+ get stateCellsStartIndex() {
865
+ this._ensureNotDisposed();
866
+ return this._context._ctx.getSequenceKvCacheMinPosition(this._sequenceId);
867
+ }
868
+ /**
869
+ * Statistics of token predictions using the sequence's `tokenPredictor`.
870
+ *
871
+ * The statistics change only when token prediction is used in this sequence.
872
+ *
873
+ * `validated` + `refuted` = total number of evaluated predictions.
874
+ *
875
+ * Prefer using `validated` and `refuted` to evaluate the effectiveness of token prediction.
876
+ */
877
+ get tokenPredictions() {
878
+ return {
879
+ used: this._usedTokenPredictions,
880
+ unused: this._unusedTokenPredictions,
881
+ validated: this._validatedTokenPredictions,
882
+ refuted: this._refutedTokenPredictions
883
+ };
884
+ }
885
+ get isLoadedToMemory() {
886
+ return !this._disposed;
887
+ }
888
+ compareContextTokens(tokens) {
889
+ for (let i = 0; i < this._contextTokens.length - this._loadedTokenPredictions.length; i++) {
890
+ if (compareTokens(this._contextTokens[i], tokens[i]))
891
+ continue;
892
+ return {
893
+ firstDifferentIndex: i
894
+ };
895
+ }
896
+ return {
897
+ firstDifferentIndex: this._contextTokens.length - this._loadedTokenPredictions.length
898
+ };
899
+ }
900
+ /**
901
+ * Erase parts of the context state to align it with the given tokens.
902
+ *
903
+ * If the given tokens do not align with the current context state, the context state will be erased to align with the given tokens.
904
+ *
905
+ * To find the first different token index between the context state and the given tokens, access the `nextTokenIndex` property.
906
+ *
907
+ * If `allowShift` is `true` (the default), shifting tokens may happen to align the context state with the given tokens,
908
+ * which incurs token evaluation of the shifted tokens.
909
+ */
910
+ async adaptStateToTokens(tokens, allowShift = true) {
911
+ const modelSupportsShifting = !this.model.fileInsights.isRecurrent &&
912
+ this.model.fileInfo.metadata?.general?.architecture !== GgufArchitectureType.deepseek2;
913
+ if (!modelSupportsShifting || !allowShift) {
914
+ const { firstDifferentIndex } = this.compareContextTokens(tokens);
915
+ if (firstDifferentIndex < this.nextTokenIndex)
916
+ await this._eraseContextTokenRanges([{
917
+ start: firstDifferentIndex,
918
+ end: this._nextTokenIndex
919
+ }]);
920
+ return;
921
+ }
922
+ const eraseRanges = [];
923
+ let tokensIndex = 0;
924
+ let differentTokenIndex = undefined;
925
+ for (let i = 0; i < this._contextTokens.length - this._loadedTokenPredictions.length && tokensIndex < tokens.length; i++) {
926
+ if (compareTokens(this._contextTokens[i], tokens[tokensIndex])) {
927
+ if (differentTokenIndex != null) {
928
+ eraseRanges.push({
929
+ start: differentTokenIndex,
930
+ end: i
931
+ });
932
+ differentTokenIndex = undefined;
933
+ }
934
+ tokensIndex++;
935
+ continue;
936
+ }
937
+ if (differentTokenIndex == null)
938
+ differentTokenIndex = i;
939
+ }
940
+ if (differentTokenIndex != null)
941
+ eraseRanges.push({
942
+ start: differentTokenIndex,
943
+ end: this._nextTokenIndex
944
+ });
945
+ if (eraseRanges.length > 0)
946
+ await this._eraseContextTokenRanges(eraseRanges);
947
+ }
948
+ /**
949
+ * Clear the history of the sequence.
950
+ */
951
+ async clearHistory() {
952
+ this._ensureNotDisposed();
953
+ await this._eraseContextTokenRanges([{ start: 0, end: this._nextTokenIndex }]);
954
+ }
955
+ /**
956
+ * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
957
+ * The start of each range is inclusive, and the end of each range is exclusive.
958
+ * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
959
+ */
960
+ eraseContextTokenRanges(ranges) {
961
+ return this._eraseContextTokenRanges(ranges);
962
+ }
963
+ /** @internal */
964
+ async _eraseContextTokenRanges(ranges, { canResetTokenPredictor = true, canRemovePredictionTokens = true, skipLock = false } = {}) {
965
+ this._ensureNotDisposed();
966
+ let awaitEvaluationPromise;
967
+ await withLock([this._context, "context"], async () => {
968
+ this._ensureNotDisposed();
969
+ if (ranges.length === 0)
970
+ return;
971
+ // if the deletion fails, we'll have to dispose the sequence and fill it up again
972
+ let deletionSuccessful = true;
973
+ const resolvedRanges = ranges
974
+ .map(({ start, end }) => {
975
+ if (start === end)
976
+ return null;
977
+ if (start > end)
978
+ [start, end] = [end, start];
979
+ if (end > this._nextTokenIndex)
980
+ end = this._nextTokenIndex;
981
+ if (start >= this._nextTokenIndex)
982
+ return null;
983
+ return { start, end };
984
+ })
985
+ .filter((range) => range != null)
986
+ .sort((a, b) => a.start - b.start)
987
+ .reduce((ranges, range) => {
988
+ if (ranges.length === 0)
989
+ return [range];
990
+ const lastRange = ranges[ranges.length - 1];
991
+ if (lastRange.end >= range.start) {
992
+ lastRange.end = Math.max(lastRange.end, range.end);
993
+ return ranges;
994
+ }
995
+ ranges.push(range);
996
+ return ranges;
997
+ }, []);
998
+ const minKvCachePosition = (this._contextTokens.length === 0 && this._loadedTokenPredictions.length === 0)
999
+ ? 0
1000
+ : Math.max(0, this._context._ctx.getSequenceKvCacheMinPosition(this._sequenceId));
1001
+ if (resolvedRanges[0] != null && resolvedRanges[0].start <= minKvCachePosition)
1002
+ // we have to drop the cache and reevaluate the sequence due to missing KV cache
1003
+ deletionSuccessful = false;
1004
+ const tokenPredictionsToRemove = (resolvedRanges.length > 0 && canRemovePredictionTokens)
1005
+ ? this._loadedTokenPredictions.length
1006
+ : 0;
1007
+ if (tokenPredictionsToRemove > 0) {
1008
+ const startDeleteIndex = this._nextTokenIndex - this._loadedTokenPredictions.length;
1009
+ const lastDeleteRange = resolvedRanges[resolvedRanges.length - 1];
1010
+ if (lastDeleteRange.end >= startDeleteIndex)
1011
+ lastDeleteRange.end = this._nextTokenIndex;
1012
+ else
1013
+ resolvedRanges.push({ start: startDeleteIndex, end: this._nextTokenIndex });
1014
+ if (canResetTokenPredictor)
1015
+ await this._abortTokenPredictor(true);
1016
+ }
1017
+ let removedTokens = 0;
1018
+ let lastDeleteRangeEndPos = null;
1019
+ for (const range of resolvedRanges) {
1020
+ this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
1021
+ if (deletionSuccessful)
1022
+ deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
1023
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start) {
1024
+ this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
1025
+ const shiftedTokens = range.start - lastDeleteRangeEndPos;
1026
+ this._tokenMeter.useTokens(shiftedTokens, "input");
1027
+ }
1028
+ removedTokens += range.end - range.start;
1029
+ lastDeleteRangeEndPos = range.end;
1030
+ }
1031
+ if (tokenPredictionsToRemove > 0)
1032
+ this._loadedTokenPredictions.splice(0, tokenPredictionsToRemove);
1033
+ if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 &&
1034
+ lastDeleteRangeEndPos !== this._nextTokenIndex) {
1035
+ this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
1036
+ const shiftedTokens = this._nextTokenIndex - lastDeleteRangeEndPos;
1037
+ this._tokenMeter.useTokens(shiftedTokens, "input");
1038
+ }
1039
+ this._nextTokenIndex -= removedTokens;
1040
+ if (canResetTokenPredictor && removedTokens > 0)
1041
+ await this._abortTokenPredictor(true);
1042
+ this._checkpoints.pruneFromEndToIndex(this._contextTokens.length - 1);
1043
+ if (deletionSuccessful)
1044
+ return;
1045
+ let restoreCheckpointIndex = this._contextTokens.length - 1;
1046
+ const existingCheckpoint = this._checkpoints.getLastCheckpoint(restoreCheckpointIndex, this.contextSize);
1047
+ if (existingCheckpoint != null &&
1048
+ restoreCheckpointIndex >= existingCheckpoint.minPos &&
1049
+ existingCheckpoint.maxPos <= this.contextSize) {
1050
+ restoreCheckpointIndex = Math.min(restoreCheckpointIndex, existingCheckpoint.maxPos);
1051
+ const restoredSuccessfully = await this._context._ctx.restoreCheckpoint(existingCheckpoint, restoreCheckpointIndex);
1052
+ if (restoredSuccessfully) {
1053
+ const tokensToEvaluate = this._contextTokens.slice(restoreCheckpointIndex + 1);
1054
+ this._contextTokens = this._contextTokens.slice(0, restoreCheckpointIndex + 1);
1055
+ this._nextTokenIndex = restoreCheckpointIndex + 1;
1056
+ // wait for the evaluation outside the "context" lock to avoid deadlocks
1057
+ if (tokensToEvaluate.length > 0)
1058
+ awaitEvaluationPromise = this.evaluateWithoutGeneratingNewTokens(tokensToEvaluate, { _skipLock: skipLock });
1059
+ return;
1060
+ }
1061
+ }
1062
+ const newSequenceTokens = this._contextTokens.slice();
1063
+ this._nextTokenIndex = 0;
1064
+ this._context._ctx.disposeSequence(this._sequenceId);
1065
+ this._contextTokens = [];
1066
+ // wait for the evaluation outside the "context" lock to avoid deadlocks
1067
+ if (newSequenceTokens.length > 0)
1068
+ awaitEvaluationPromise = this.evaluateWithoutGeneratingNewTokens(newSequenceTokens, { _skipLock: skipLock });
1069
+ });
1070
+ if (awaitEvaluationPromise != null) {
1071
+ await awaitEvaluationPromise;
1072
+ if (this.needsCheckpoints && this._checkpoints.lastCheckpointIndex !== this._nextTokenIndex - 1)
1073
+ await this.takeCheckpoint();
1074
+ }
1075
+ }
1076
+ /**
1077
+ * Evaluate the provided tokens into the context sequence, and continue generating new tokens on iterator iterations.
1078
+ *
1079
+ * This method uses the token predictor (when provided) to generate new tokens faster.
1080
+ */
1081
+ async *evaluate(tokens, options = {}) {
1082
+ const iterator = this.evaluateWithMetadata(tokens, {}, options);
1083
+ let iterateInput = undefined;
1084
+ try {
1085
+ while (true) {
1086
+ const { value, done } = await iterator.next(iterateInput);
1087
+ if (done)
1088
+ return;
1089
+ iterateInput = yield value.token;
1090
+ }
1091
+ }
1092
+ finally {
1093
+ await iterator.return();
1094
+ }
1095
+ }
1096
+ /**
1097
+ * Like {@link evaluate `.evaluate(...)`}, but with additional metadata for each generated token.
1098
+ *
1099
+ * Configure the additional metadata options to choose which metadata to include.
1100
+ */
1101
+ evaluateWithMetadata(tokens, metadata, options = {}) {
1102
+ const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, xtc, grammarEvaluationState, repeatPenalty, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
1103
+ if (this._tokenPredictor != null && !_noSampling && tokens.length > 0)
1104
+ return this._speculativeEvaluate(tokens, metadata, {
1105
+ temperature,
1106
+ minP,
1107
+ topK,
1108
+ topP,
1109
+ seed,
1110
+ xtc,
1111
+ grammarEvaluationState,
1112
+ repeatPenalty,
1113
+ dryRepeatPenalty,
1114
+ tokenBias,
1115
+ evaluationPriority,
1116
+ contextShiftOptions: {
1117
+ size: contextShiftSize,
1118
+ strategy: contextShiftStrategy
1119
+ },
1120
+ yieldEogToken,
1121
+ tokenPredictor: this._tokenPredictor
1122
+ });
1123
+ return this._evaluate(tokens, metadata, {
1124
+ temperature,
1125
+ minP,
1126
+ topK,
1127
+ topP,
1128
+ seed,
1129
+ xtc,
1130
+ grammarEvaluationState,
1131
+ repeatPenalty,
1132
+ dryRepeatPenalty,
1133
+ tokenBias,
1134
+ evaluationPriority,
1135
+ contextShiftOptions: {
1136
+ size: contextShiftSize,
1137
+ strategy: contextShiftStrategy
1138
+ },
1139
+ yieldEogToken,
1140
+ _noSampling
1141
+ });
1142
+ }
1143
+ /**
1144
+ * Evaluate the provided tokens into the context sequence without generating new tokens.
1145
+ */
1146
+ async evaluateWithoutGeneratingNewTokens(tokens, options = {}) {
1147
+ const { evaluationPriority = defaultEvaluationPriority, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, _skipLock = false } = options;
1148
+ const iterator = this._evaluate(tokens, {}, {
1149
+ generateNewTokens: false,
1150
+ evaluationPriority,
1151
+ contextShiftOptions: {
1152
+ size: contextShiftSize,
1153
+ strategy: contextShiftStrategy
1154
+ },
1155
+ _skipLock
1156
+ });
1157
+ const predictorAlignmentPromise = this.tokenPredictor == null
1158
+ ? undefined
1159
+ : this._tokenPredictor?.reset({
1160
+ stateTokens: [...this._contextTokens, ...tokens],
1161
+ evaluateOptions: {
1162
+ evaluationPriority,
1163
+ contextShift: {
1164
+ size: contextShiftSize,
1165
+ strategy: contextShiftStrategy
1166
+ }
1167
+ },
1168
+ targetSequence: this
1169
+ });
1170
+ if (predictorAlignmentPromise != null) {
1171
+ this._tokenPredictorOwner = {};
1172
+ this._resetTokenPredictor = false;
1173
+ }
1174
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
1175
+ for await (const token of iterator) {
1176
+ // Array.from doesn't work with async generators, so we have to iterate over the generator
1177
+ }
1178
+ await iterator.return();
1179
+ if (predictorAlignmentPromise != null)
1180
+ await predictorAlignmentPromise;
1181
+ }
1182
+ /**
1183
+ * Evaluate the provided tokens into the context sequence with custom options for each token.
1184
+ *
1185
+ * This method allows for more precise control of the generation process.
1186
+ *
1187
+ * A next token will be generated for a given token only if any of the `generateNext` options for it are used.
1188
+ *
1189
+ * To generate more tokens after this method finishes,
1190
+ * use it again with token(s) you selected to add to the context from the previous evaluation.
1191
+ *
1192
+ * This method doesn't use the token predictor (when provided) since it cannot predict which tokens are actually needed.
1193
+ * Use the `evaluate` method when you need to use token prediction.
1194
+ * @returns An array where for each token in the input array, there can be an output item at the same index in the output array.
1195
+ * For indexes that have no output, there won't be any value at the corresponding index in the output array.
1196
+ *
1197
+ * It's recommended to iterate from `0` up to the length of the input array to check the results in the output array.
1198
+ */
1199
+ async controlledEvaluate(input, options) {
1200
+ const { evaluationPriority = defaultEvaluationPriority, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {} } = options ?? {};
1201
+ const contextShiftOptions = {
1202
+ size: contextShiftSize,
1203
+ strategy: contextShiftStrategy
1204
+ };
1205
+ this._ensureNotDisposed();
1206
+ if (input.length === 0)
1207
+ return [];
1208
+ await this._abortTokenPredictor();
1209
+ const sampler = new LlamaSampler(this.model);
1210
+ const onTokenResult = safeEventCallback(options?.onTokenResult);
1211
+ const logitsArray = [];
1212
+ const resolvedTokens = input.map((item, index) => {
1213
+ if (item instanceof Array) {
1214
+ const [token, options] = item;
1215
+ const generateNext = options?.generateNext ?? {};
1216
+ if (generateNext.probabilities === true || generateNext.confidence === true || generateNext.token === true)
1217
+ logitsArray[index] = true;
1218
+ return token;
1219
+ }
1220
+ return item;
1221
+ });
1222
+ const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
1223
+ try {
1224
+ return await this._decodeTokens(resolvedTokens, logitsArray, evaluationPriority, this._tokenMeter, contextShiftOptions, async (batchLogitIndex, tokenIndex) => {
1225
+ const inputToken = input[tokenIndex];
1226
+ const inputOptions = inputToken instanceof Array
1227
+ ? (inputToken[1] ?? {})
1228
+ : {};
1229
+ const generateNext = inputOptions.generateNext;
1230
+ if (generateNext == null || ((generateNext.probabilities == null || !generateNext.probabilities) &&
1231
+ (generateNext.token == null || !generateNext.token) &&
1232
+ (generateNext.confidence == null || !generateNext.confidence)))
1233
+ return undefined;
1234
+ const sampleOptions = generateNext.options ?? {};
1235
+ const samplerConfig = this._resolveSamplerConfig({
1236
+ temperature: sampleOptions.temperature,
1237
+ minP: sampleOptions.minP,
1238
+ topK: sampleOptions.topK,
1239
+ topP: sampleOptions.topP,
1240
+ seed: sampleOptions.seed,
1241
+ xtc: sampleOptions.xtc,
1242
+ repeatPenalty: sampleOptions.repeatPenalty,
1243
+ dryRepeatPenalty: sampleOptions.dryRepeatPenalty,
1244
+ tokenBias: sampleOptions.tokenBias
1245
+ });
1246
+ return await withLock([sampler, "sample"], async () => {
1247
+ if (sampler.disposed)
1248
+ return undefined;
1249
+ sampler.applyConfig(samplerConfig);
1250
+ const [token, probabilities, confidence] = await this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler, !!generateNext.probabilities, !!generateNext.confidence);
1251
+ const output = {
1252
+ next: {}
1253
+ };
1254
+ if (generateNext.token)
1255
+ output.next.token = token === -1
1256
+ ? null
1257
+ : (token ?? null);
1258
+ if (confidence != null)
1259
+ output.next.confidence = confidence;
1260
+ if (probabilities != null)
1261
+ output.next.probabilities = reviveTokenProbabilities(probabilities);
1262
+ onTokenResult?.(tokenIndex, output);
1263
+ return output;
1264
+ });
1265
+ }, this._takeIntervalCheckpointIfNeededAfterBatch);
1266
+ }
1267
+ finally {
1268
+ evaluatorLock.dispose();
1269
+ void withLock([sampler, "sample"], sampler.asyncDispose);
1270
+ }
1271
+ }
1272
+ /* eslint-disable @stylistic/max-len */
1273
+ /**
1274
+ * Save the current context sequence evaluation state to a file.
1275
+ * @see [Saving and restoring a context sequence evaluation state](https://node-llama-cpp.withcat.ai/guide/chat-session#save-and-restore-with-context-sequence-state)
1276
+ */
1277
+ async saveStateToFile(filePath) {
1278
+ /* eslint-enable @stylistic/max-len */
1279
+ this._ensureNotDisposed();
1280
+ const resolvedPath = path.resolve(process.cwd(), filePath);
1281
+ const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
1282
+ const contextLock = await acquireLock([this._context, "context"]);
1283
+ try {
1284
+ this._ensureNotDisposed();
1285
+ // TODO: save checkpoints to disk
1286
+ const fileSize = await this._context._ctx.saveSequenceStateToFile(resolvedPath, this._sequenceId, Uint32Array.from(this.contextTokens));
1287
+ return { fileSize };
1288
+ }
1289
+ finally {
1290
+ contextLock.dispose();
1291
+ evaluatorLock.dispose();
1292
+ }
1293
+ }
1294
+ /* eslint-disable @stylistic/max-len */
1295
+ /**
1296
+ * Load a context sequence evaluation state from a file.
1297
+ *
1298
+ * Trying to load a state file with a longer context size than the current sequence's context size will fail and throw an error.
1299
+ *
1300
+ * You must ensure that the file was created from the exact same model, otherwise, using this function may crash the process.
1301
+ * @see [Saving and restoring a context sequence evaluation state](https://node-llama-cpp.withcat.ai/guide/chat-session#save-and-restore-with-context-sequence-state)
1302
+ */
1303
+ async loadStateFromFile(filePath, acceptRisk) {
1304
+ /* eslint-enable @stylistic/max-len */
1305
+ if (!acceptRisk.acceptRisk)
1306
+ throw new Error("The `acceptRisk` option must be set to `true` to use this feature");
1307
+ this._ensureNotDisposed();
1308
+ const resolvedPath = path.resolve(process.cwd(), filePath);
1309
+ const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
1310
+ const contextLock = await acquireLock([this._context, "context"]);
1311
+ try {
1312
+ this._ensureNotDisposed();
1313
+ this._tokenPredictorOwner = {};
1314
+ await this._abortTokenPredictor(true);
1315
+ this._ensureNotDisposed();
1316
+ this._loadedTokenPredictions.length = 0;
1317
+ this._nextTokenIndex = 0;
1318
+ this._contextTokens = [];
1319
+ const tokens = Array.from(await this._context._ctx.loadSequenceStateFromFile(resolvedPath, this._sequenceId, this.contextSize));
1320
+ if (tokens.length > this.contextSize) {
1321
+ this._context._ctx.disposeSequence(this._sequenceId);
1322
+ throw new Error("The given state file is too large for the current context size");
1323
+ }
1324
+ this._contextTokens = tokens;
1325
+ this._nextTokenIndex = tokens.length;
1326
+ this._loadedTokenPredictions.length = 0;
1327
+ }
1328
+ finally {
1329
+ contextLock.dispose();
1330
+ evaluatorLock.dispose();
1331
+ }
1332
+ }
1333
+ /**
1334
+ * When reusing a prefix evaluation state is not possible for the current context sequence
1335
+ * (like in contexts from recurrent and hybrid models,
1336
+ * or with models that use SWA (Sliding Window Attention) when the `swaFullCache` option is not enabled on the context),
1337
+ * you can use this method to checkpoint the current context sequence state.
1338
+ * Those checkpoints will automatically be used when trying to erase parts of the context state that come after a checkpointed state,
1339
+ * and be freed from memory when no longer relevant.
1340
+ *
1341
+ * Those checkpoints are relatively lightweight compared to saving the entire state,
1342
+ * but taking too many checkpoints can increase memory usage.
1343
+ * Checkpoints are stored in the RAM (not VRAM).
1344
+ *
1345
+ * Calling this method on a context sequence from a model that natively supports prefix evaluation state reuse will have no effect.
1346
+ *
1347
+ * > **Note:** to check whether the current context sequence needs taking checkpoints,
1348
+ * > you can use the {@link needsCheckpoints `.needsCheckpoints`} property.
1349
+ */
1350
+ async takeCheckpoint() {
1351
+ if (!this.needsCheckpoints)
1352
+ return;
1353
+ return await withLock([this._context, "context"], () => {
1354
+ return this._takeCheckpoint(undefined, this._checkpointOptions.max);
1355
+ });
1356
+ }
1357
+ /** @internal */
1358
+ async _takeNamedCheckpoint(name, maxNamedCheckpoints) {
1359
+ if (!this.needsCheckpoints)
1360
+ return;
1361
+ return await withLock([this._context, "context"], () => {
1362
+ return this._takeCheckpoint(name, maxNamedCheckpoints);
1363
+ });
1364
+ }
1365
+ /**
1366
+ * Whether the current context sequence needs taking checkpoints of the context state to be able to reuse
1367
+ * it as a prefix evaluation state in the future.
1368
+ *
1369
+ * See {@link takeCheckpoint `.takeCheckpoint()`} for more details.
1370
+ */
1371
+ get needsCheckpoints() {
1372
+ if (this.model.fileInsights.isHybrid || this.model.fileInsights.isRecurrent)
1373
+ return true;
1374
+ else if (this.model.fileInsights.swaSize != null && !this._context._swaFullCache)
1375
+ return true;
1376
+ return false;
1377
+ }
1378
+ /**
1379
+ * The index of the last taken checkpoint that's available for prefix reuse
1380
+ */
1381
+ get lastCheckpointIndex() {
1382
+ return Math.max(0, Math.min(this._checkpoints.lastCheckpointIndex, this.nextTokenIndex - 1));
1383
+ }
1384
+ /**
1385
+ * The total memory usage in bytes of all the checkpoints currently held for this context sequence
1386
+ */
1387
+ get checkpointsMemoryUsage() {
1388
+ return this._checkpoints.memoryUsage;
1389
+ }
1390
+ /** @internal */
1391
+ async _takeCheckpoint(name, maxNamedCheckpoints) {
1392
+ if (!this.needsCheckpoints || this._nextTokenIndex === 0 || this._checkpoints.hasCheckpoint(name, this._nextTokenIndex - 1))
1393
+ return;
1394
+ if (this._checkpointOptions.maxMemory != null)
1395
+ this._checkpoints.prepareMemoryForIncomingCheckpoint(this._checkpointOptions.maxMemory);
1396
+ const checkpoint = new this.model._llama._bindings.AddonContextSequenceCheckpoint();
1397
+ await checkpoint.init(this._context._ctx, this._sequenceId);
1398
+ if (this._nextTokenIndex - 1 !== checkpoint.maxPos)
1399
+ this.model._llama._log(LlamaLogLevel.warn, `Checkpoint max position mismatch: expected ${this._nextTokenIndex - 1}, got ${checkpoint.maxPos}`);
1400
+ this._checkpoints.storeCheckpoint({
1401
+ name,
1402
+ maxNamedCheckpoints,
1403
+ checkpoint,
1404
+ currentMaxPos: checkpoint.maxPos
1405
+ });
1406
+ if (this._checkpointOptions.maxMemory != null)
1407
+ this._checkpoints.pruneToKeepUnderMemoryUsage(this._checkpointOptions.maxMemory);
1408
+ }
1409
+ /** @internal */
1410
+ _takeIntervalCheckpointIfNeeded(currentIndex = this._nextTokenIndex - 1) {
1411
+ if (!this.needsCheckpoints)
1412
+ return;
1413
+ const lastCheckpointIndex = this._checkpoints.getLastNamedCheckpointIndex(undefined);
1414
+ if (this._checkpointOptions.interval === false || currentIndex - lastCheckpointIndex < this._checkpointOptions.interval)
1415
+ return;
1416
+ return this._takeCheckpoint(undefined, this._checkpointOptions.max);
1417
+ }
1418
+ /** @internal */
1419
+ _takeIntervalCheckpointIfNeededAfterBatch(sequenceStateLength) {
1420
+ if (sequenceStateLength === 0)
1421
+ return;
1422
+ return this._takeIntervalCheckpointIfNeeded(sequenceStateLength - 1);
1423
+ }
1424
+ /** @internal */
1425
+ async *_evaluate(tokens, metadata, { temperature, minP, topK, topP, seed, xtc, grammarEvaluationState, repeatPenalty, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false, _skipLock = false }) {
1426
+ this._ensureNotDisposed();
1427
+ let evalTokens = tokens;
1428
+ if (evalTokens.length === 0)
1429
+ return;
1430
+ await this._abortTokenPredictor(false, true);
1431
+ const sampleProbabilities = metadata.probabilities === true;
1432
+ const sampleConfidence = metadata.confidence === true;
1433
+ const sampler = new LlamaSampler(this.model);
1434
+ try {
1435
+ while (true) {
1436
+ this._ensureNotDisposed();
1437
+ const evaluatorLock = _skipLock
1438
+ ? undefined
1439
+ : await acquireLock([this._lock, "evaluate"]);
1440
+ let nextToken;
1441
+ const yieldRes = {};
1442
+ try {
1443
+ const logitsArray = [];
1444
+ if (generateNewTokens)
1445
+ logitsArray[evalTokens.length - 1] = true;
1446
+ // Evaluate to get the next token.
1447
+ const decodeResult = await this._decodeTokens(evalTokens, logitsArray, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
1448
+ if (_noSampling)
1449
+ return null;
1450
+ const samplerConfig = this._resolveSamplerConfig({
1451
+ temperature,
1452
+ minP,
1453
+ topK,
1454
+ topP,
1455
+ seed,
1456
+ xtc,
1457
+ grammarEvaluationState,
1458
+ repeatPenalty,
1459
+ dryRepeatPenalty,
1460
+ tokenBias
1461
+ });
1462
+ return withLock([sampler, "sample"], async () => {
1463
+ if (sampler.disposed)
1464
+ return null;
1465
+ sampler.applyConfig(samplerConfig);
1466
+ if (sampleProbabilities || sampleConfidence)
1467
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler, sampleProbabilities, sampleConfidence);
1468
+ else
1469
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
1470
+ });
1471
+ }, this._takeIntervalCheckpointIfNeededAfterBatch);
1472
+ const lastDecodeResult = decodeResult[evalTokens.length - 1];
1473
+ if (lastDecodeResult instanceof Array) {
1474
+ const [token, probabilities, confidence] = lastDecodeResult;
1475
+ nextToken = token;
1476
+ if (probabilities != null)
1477
+ yieldRes.probabilities = reviveTokenProbabilities(probabilities);
1478
+ if (confidence != null)
1479
+ yieldRes.confidence = confidence;
1480
+ }
1481
+ else
1482
+ nextToken = lastDecodeResult;
1483
+ if (nextToken === -1)
1484
+ throw new Error("Failed to sample next token");
1485
+ if (nextToken == null)
1486
+ return;
1487
+ // the model finished generating text
1488
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
1489
+ break;
1490
+ }
1491
+ finally {
1492
+ evaluatorLock?.dispose();
1493
+ }
1494
+ yieldRes.token = nextToken;
1495
+ const replacementToken = yield yieldRes;
1496
+ // set the tokens for the next evaluation
1497
+ if (replacementToken instanceof Array)
1498
+ evalTokens = replacementToken.slice();
1499
+ else if (replacementToken != null)
1500
+ evalTokens = [replacementToken];
1501
+ else
1502
+ evalTokens = [nextToken];
1503
+ }
1504
+ }
1505
+ finally {
1506
+ void withLock([sampler, "sample"], sampler.asyncDispose);
1507
+ }
1508
+ }
1509
+ /** @internal */
1510
+ async *_speculativeEvaluate(tokens, metadata, { temperature, minP, topK, topP, seed, xtc, grammarEvaluationState, repeatPenalty, dryRepeatPenalty, tokenBias, evaluationPriority = defaultEvaluationPriority, contextShiftOptions, yieldEogToken = false, tokenPredictor }) {
1511
+ this._ensureNotDisposed();
1512
+ let evalTokens = tokens.slice();
1513
+ if (evalTokens.length === 0)
1514
+ return;
1515
+ const tokenPredictorOwner = {};
1516
+ this._tokenPredictorOwner = tokenPredictorOwner;
1517
+ await this._abortTokenPredictor();
1518
+ const sampleProbabilities = metadata.probabilities === true;
1519
+ const sampleConfidence = metadata.confidence === true;
1520
+ let logitsArray = [];
1521
+ let logitsStartIndex = evalTokens.length - 1;
1522
+ const validatedTokens = [];
1523
+ logitsArray[logitsStartIndex] = true;
1524
+ const sampler = new LlamaSampler(this.model);
1525
+ try {
1526
+ while (true) {
1527
+ this._ensureNotDisposed();
1528
+ const evaluatorLock = await acquireLock([this._lock, "evaluate"]);
1529
+ let nextToken;
1530
+ const yieldRes = {};
1531
+ try {
1532
+ if (this._tokenPredictorOwner === tokenPredictorOwner &&
1533
+ this._loadedTokenPredictions.length > 0 &&
1534
+ evalTokens.length === 1 &&
1535
+ evalTokens[0] === this._loadedTokenPredictions[0]?.[0]) {
1536
+ const [token, probabilities, confidence] = this._loadedTokenPredictions.shift()[1];
1537
+ nextToken = token;
1538
+ yieldRes.token = nextToken;
1539
+ if (probabilities != null)
1540
+ yieldRes.probabilities = reviveTokenProbabilities(probabilities);
1541
+ if (confidence != null)
1542
+ yieldRes.confidence = confidence;
1543
+ const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
1544
+ ? grammarEvaluationState()
1545
+ : grammarEvaluationState;
1546
+ if (resolvedGrammarEvaluationState != null)
1547
+ LlamaSampler._acceptTokenOnGrammarEvaluationState(this._context._llama, resolvedGrammarEvaluationState, nextToken);
1548
+ this._unusedTokenPredictions--;
1549
+ this._usedTokenPredictions++;
1550
+ }
1551
+ else if (this._tokenPredictorOwner === tokenPredictorOwner && this._loadedTokenPredictions.length > 0) {
1552
+ const deleteStartIndex = Math.max(0, this._nextTokenIndex - this._loadedTokenPredictions.length);
1553
+ await this._eraseContextTokenRanges([{ start: deleteStartIndex, end: this._nextTokenIndex }], { canResetTokenPredictor: true, canRemovePredictionTokens: true, skipLock: true });
1554
+ this._loadedTokenPredictions.length = 0;
1555
+ if (this.needsCheckpoints) {
1556
+ await this._takeCheckpoint(internalCheckpoints.speculative.name, internalCheckpoints.speculative.maxCheckpoints);
1557
+ await this._takeIntervalCheckpointIfNeeded();
1558
+ }
1559
+ }
1560
+ else if (this._tokenPredictorOwner === tokenPredictorOwner && this.needsCheckpoints) {
1561
+ await this._takeCheckpoint(internalCheckpoints.speculative.name, internalCheckpoints.speculative.maxCheckpoints);
1562
+ await this._takeIntervalCheckpointIfNeeded();
1563
+ }
1564
+ if (this._resetTokenPredictor) {
1565
+ await tokenPredictor.reset({
1566
+ stateTokens: [...this._contextTokens, ...evalTokens],
1567
+ evaluateOptions: {
1568
+ temperature,
1569
+ minP,
1570
+ topK,
1571
+ topP,
1572
+ seed,
1573
+ xtc,
1574
+ grammarEvaluationState: grammarEvaluationState instanceof Function
1575
+ ? grammarEvaluationState()?.clone()
1576
+ : grammarEvaluationState?.clone(),
1577
+ repeatPenalty,
1578
+ dryRepeatPenalty,
1579
+ tokenBias,
1580
+ evaluationPriority,
1581
+ contextShift: contextShiftOptions,
1582
+ yieldEogToken: true
1583
+ },
1584
+ targetSequence: this
1585
+ });
1586
+ this._resetTokenPredictor = false;
1587
+ this._tokenPredictorOwner = tokenPredictorOwner;
1588
+ }
1589
+ if (nextToken == null) {
1590
+ if (this._tokenPredictorOwner === tokenPredictorOwner &&
1591
+ // prevent incurring context shifts due to token prediction validations
1592
+ this._nextTokenIndex + evalTokens.length < this._context.contextSize) {
1593
+ const testGrammarClone = grammarEvaluationState instanceof Function
1594
+ ? grammarEvaluationState()?.clone()
1595
+ : grammarEvaluationState?.clone();
1596
+ for (const token of await tokenPredictor.predictTokens()) {
1597
+ if (testGrammarClone != null) {
1598
+ const canAddToken = LlamaSampler._canBeNextTokenForGrammarEvaluationState(this.model._llama, testGrammarClone, token);
1599
+ if (!canAddToken)
1600
+ break;
1601
+ }
1602
+ evalTokens.push(token);
1603
+ logitsArray[evalTokens.length - 1] = true;
1604
+ // prevent incurring context shifts due to token prediction validations
1605
+ if (this._nextTokenIndex + evalTokens.length >= this._context.contextSize)
1606
+ break;
1607
+ }
1608
+ }
1609
+ let resolvedGrammarEvaluationState = undefined;
1610
+ // Evaluate to get the next token.
1611
+ const decodeResult = await this._decodeTokens(evalTokens, logitsArray, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex, tokenIndex) => {
1612
+ if (tokenIndex === logitsStartIndex)
1613
+ resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
1614
+ ? grammarEvaluationState()
1615
+ : grammarEvaluationState;
1616
+ else if (tokenIndex === logitsStartIndex + 1)
1617
+ resolvedGrammarEvaluationState = resolvedGrammarEvaluationState?.clone();
1618
+ const samplerConfig = this._resolveSamplerConfig({
1619
+ temperature,
1620
+ minP,
1621
+ topK,
1622
+ topP,
1623
+ seed,
1624
+ xtc,
1625
+ grammarEvaluationState: resolvedGrammarEvaluationState,
1626
+ repeatPenalty,
1627
+ dryRepeatPenalty,
1628
+ tokenBias
1629
+ });
1630
+ return withLock([sampler, "sample"], async () => {
1631
+ if (sampler.disposed)
1632
+ return null;
1633
+ sampler.applyConfig(samplerConfig);
1634
+ if (sampleProbabilities || sampleConfidence)
1635
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler, sampleProbabilities, sampleConfidence);
1636
+ else
1637
+ return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
1638
+ });
1639
+ });
1640
+ for (let i = logitsStartIndex; i < evalTokens.length; i++) {
1641
+ const item = decodeResult[i];
1642
+ const [resultToken, probabilities, confidence] = item instanceof Array
1643
+ ? item
1644
+ : [item];
1645
+ if (i === logitsStartIndex) {
1646
+ if (resultToken === -1)
1647
+ throw new Error("Failed to sample next token");
1648
+ if (resultToken == null)
1649
+ return;
1650
+ nextToken = resultToken;
1651
+ yieldRes.token = nextToken;
1652
+ if (probabilities != null)
1653
+ yieldRes.probabilities = reviveTokenProbabilities(probabilities);
1654
+ if (confidence != null)
1655
+ yieldRes.confidence = confidence;
1656
+ }
1657
+ else {
1658
+ if (resultToken === -1 || resultToken == null)
1659
+ break;
1660
+ const lastValidatedTokenOutput = i === logitsStartIndex + 1
1661
+ ? nextToken
1662
+ : validatedTokens.at(-1)?.[1];
1663
+ if (lastValidatedTokenOutput != null && lastValidatedTokenOutput === evalTokens[i]) {
1664
+ this._loadedTokenPredictions.push([evalTokens[i], [resultToken, probabilities, confidence]]);
1665
+ this._validatedTokenPredictions++;
1666
+ this._unusedTokenPredictions++;
1667
+ }
1668
+ else {
1669
+ const deleteSize = Math.min(evalTokens.length - i, this.context.contextSize);
1670
+ this._refutedTokenPredictions += deleteSize;
1671
+ const deleteStartIndex = this._nextTokenIndex - deleteSize;
1672
+ tokenPredictor.stop(true);
1673
+ await this._eraseContextTokenRanges([{
1674
+ start: deleteStartIndex,
1675
+ end: this._nextTokenIndex
1676
+ }], { canResetTokenPredictor: false, canRemovePredictionTokens: false, skipLock: true });
1677
+ break; // the assumption that this token will be generated was wrong
1678
+ }
1679
+ }
1680
+ }
1681
+ }
1682
+ if (nextToken == null)
1683
+ throw new Error("Failed to generated next token");
1684
+ // the model finished generating text
1685
+ if (!yieldEogToken && this._context.model.isEogToken(nextToken))
1686
+ break;
1687
+ }
1688
+ finally {
1689
+ evaluatorLock.dispose();
1690
+ }
1691
+ const replacementToken = yield yieldRes;
1692
+ // set the tokens for the next evaluation
1693
+ if (replacementToken instanceof Array)
1694
+ evalTokens = replacementToken.slice();
1695
+ else if (replacementToken != null)
1696
+ evalTokens = [replacementToken];
1697
+ else
1698
+ evalTokens = [nextToken];
1699
+ if (this._tokenPredictorOwner === tokenPredictorOwner)
1700
+ tokenPredictor.pushTokens(evalTokens);
1701
+ logitsArray = [];
1702
+ logitsStartIndex = evalTokens.length - 1;
1703
+ logitsArray[logitsStartIndex] = true;
1704
+ }
1705
+ }
1706
+ finally {
1707
+ void withLock([sampler, "sample"], sampler.asyncDispose);
1708
+ if (this._tokenPredictorOwner === tokenPredictorOwner)
1709
+ tokenPredictor.stop();
1710
+ }
1711
+ }
1712
+ /** @internal */
1713
+ async _abortTokenPredictor(skipClearingPredictionsFromState = false, skipLock = false) {
1714
+ this._tokenPredictor?.stop();
1715
+ this._resetTokenPredictor = true;
1716
+ if (skipClearingPredictionsFromState)
1717
+ return;
1718
+ if (this._loadedTokenPredictions.length > 0)
1719
+ await this._eraseContextTokenRanges([{
1720
+ start: this._nextTokenIndex - this._loadedTokenPredictions.length,
1721
+ end: this._nextTokenIndex
1722
+ }], { canResetTokenPredictor: true, canRemovePredictionTokens: true, skipLock });
1723
+ }
1724
+ /** @internal */
1725
+ _resolveSamplerConfig({ temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, xtc, grammarEvaluationState, repeatPenalty, dryRepeatPenalty, tokenBias }) {
1726
+ const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
1727
+ ? repeatPenalty.punishTokens()
1728
+ : repeatPenalty?.punishTokens;
1729
+ const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
1730
+ const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
1731
+ ? grammarEvaluationState()
1732
+ : grammarEvaluationState;
1733
+ if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
1734
+ throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
1735
+ const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
1736
+ return removeNullFields({
1737
+ temperature,
1738
+ minP,
1739
+ topK,
1740
+ topP,
1741
+ seed: Math.max(0, Number.isFinite(seed)
1742
+ ? Math.floor(seed ?? (Date.now() / 1000))
1743
+ : Math.floor(Date.now() / 1000)),
1744
+ xtcProbability: xtc == null
1745
+ ? undefined
1746
+ : Math.min(1, Math.max(0, xtc.probability)),
1747
+ xtcThreshold: xtc == null
1748
+ ? undefined
1749
+ : Math.min(1, Math.max(0, xtc.threshold)),
1750
+ repeatPenalty: repeatPenalty?.penalty,
1751
+ repeatPenaltyMaxTokens: maxPunishTokens,
1752
+ repeatPenaltyTokens: repeatPenaltyTokens != null
1753
+ ? Uint32Array.from(repeatPenaltyTokens)
1754
+ : undefined,
1755
+ repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
1756
+ repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
1757
+ dryRepeatPenaltyStrength: (dryRepeatPenalty?.strength == null || dryRepeatPenalty?.strength === 0)
1758
+ ? undefined
1759
+ : Math.max(0, dryRepeatPenalty?.strength),
1760
+ dryRepeatPenaltyBase: dryRepeatPenalty?.base,
1761
+ dryRepeatPenaltyAllowedLength: Math.max(1, dryRepeatPenalty?.allowedLength ?? 2),
1762
+ dryRepeatPenaltyLastTokens: dryRepeatPenalty?.lastTokens == null
1763
+ ? -1
1764
+ : Math.max(0, dryRepeatPenalty?.lastTokens),
1765
+ dryRepeatPenaltySequenceBreakers: dryRepeatPenalty?.sequenceBreakers ?? defaultDryRepeatPenalitySequenceBreakers,
1766
+ tokenBiasKeys,
1767
+ tokenBiasValues,
1768
+ grammarEvaluationState: resolvedGrammarEvaluationState?._state
1769
+ });
1770
+ }
1771
+ /**
1772
+ * The caller of this function has to wrap it with a lock to ensure this function doesn't run concurrently.
1773
+ * @internal
1774
+ */
1775
+ async _decodeTokens(tokens, logits, evaluationPriority, tokenMeter, contextShiftOptions, logitDataMapper, afterBatchAction) {
1776
+ this._ensureNotDisposed();
1777
+ const tokensLeftToDecode = tokens.slice();
1778
+ const tokenLogitsLeftToDecode = logits.slice();
1779
+ let currentTokenIndex = 0;
1780
+ const res = [];
1781
+ const normalizedLogitDataMapper = (batchLogitIndex, contextStateTokenIndex) => {
1782
+ return logitDataMapper(batchLogitIndex, currentTokenIndex + (contextStateTokenIndex - this._nextTokenIndex));
1783
+ };
1784
+ while (tokensLeftToDecode.length > 0) {
1785
+ this._ensureNotDisposed();
1786
+ let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
1787
+ if (freeSpace <= 0) {
1788
+ await this._freeUpSpaceForTokens(contextShiftOptions);
1789
+ freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
1790
+ if (freeSpace <= 0)
1791
+ throw new Error("Failed to free up space for new tokens");
1792
+ }
1793
+ const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
1794
+ const tokensLogits = tokenLogitsLeftToDecode.slice(0, tokensToDecode.length);
1795
+ const generatedLogits = await this._context._decodeTokens({
1796
+ sequenceId: this._sequenceId,
1797
+ tokens: tokensToDecode,
1798
+ firstTokenSequenceIndex: this._nextTokenIndex,
1799
+ logits: tokensLogits,
1800
+ evaluationPriority,
1801
+ tokenMeter,
1802
+ afterBatchAction
1803
+ }, normalizedLogitDataMapper);
1804
+ for (const [index, value] of generatedLogits)
1805
+ res[currentTokenIndex + (index - this._nextTokenIndex)] = value;
1806
+ this._nextTokenIndex += tokensToDecode.length;
1807
+ currentTokenIndex += tokensToDecode.length;
1808
+ this._contextTokens = this._contextTokens.concat(tokensToDecode);
1809
+ }
1810
+ return res;
1811
+ }
1812
+ /** @internal */
1813
+ async _freeUpSpaceForTokens(contextShiftOptions) {
1814
+ this._ensureNotDisposed();
1815
+ const size = Math.min(this._nextTokenIndex, Math.max(1, contextShiftOptions.size instanceof Function
1816
+ ? await contextShiftOptions.size(this)
1817
+ : contextShiftOptions.size));
1818
+ this._ensureNotDisposed();
1819
+ if (contextShiftOptions.strategy === "eraseBeginning") {
1820
+ let eraseStartIndex = 0;
1821
+ if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
1822
+ eraseStartIndex = 1;
1823
+ await this._eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }], { skipLock: true });
1824
+ }
1825
+ else {
1826
+ const ranges = await contextShiftOptions.strategy({
1827
+ sequence: this,
1828
+ size
1829
+ });
1830
+ if (ranges == null)
1831
+ throw new Error("Invalid delete ranges");
1832
+ await this._eraseContextTokenRanges(ranges, { skipLock: true });
1833
+ if (this._nextTokenIndex >= this._context.contextSize - 1)
1834
+ await this._eraseContextTokenRanges([{ start: 0, end: size }], { skipLock: true });
1835
+ }
1836
+ }
1837
+ /** @internal */
1838
+ _ensureNotDisposed() {
1839
+ if (this._disposed)
1840
+ throw new DisposedError();
1841
+ }
1842
+ /**
1843
+ * We need this to make it impossible to manually create instances of this class outside the code of this library
1844
+ * @internal
1845
+ */
1846
+ static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, tokenPredictor, checkpoints }) {
1847
+ return new LlamaContextSequence({
1848
+ sequenceId,
1849
+ context,
1850
+ tokenMeter,
1851
+ contextShift: {
1852
+ size: contextShiftSize,
1853
+ strategy: contextShiftStrategy
1854
+ },
1855
+ tokenPredictor,
1856
+ checkpoints
1857
+ });
1858
+ }
1859
+ }
1860
+ function getTokenBiasesForAddon(tokenBias, currentModel) {
1861
+ if (tokenBias == null)
1862
+ return {
1863
+ tokenBiasKeys: undefined,
1864
+ tokenBiasValues: undefined
1865
+ };
1866
+ if (tokenBias instanceof Function)
1867
+ tokenBias = tokenBias();
1868
+ if (tokenBias._tokenizer !== currentModel.tokenizer)
1869
+ throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
1870
+ "Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
1871
+ const tokenBiasKeys = [];
1872
+ const tokenBiasValues = [];
1873
+ for (const [token, bias] of tokenBias._biases) {
1874
+ tokenBiasKeys.push(token);
1875
+ tokenBiasValues.push(bias);
1876
+ }
1877
+ if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
1878
+ return {
1879
+ tokenBiasKeys: undefined,
1880
+ tokenBiasValues: undefined
1881
+ };
1882
+ }
1883
+ return {
1884
+ tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
1885
+ tokenBiasValues: Float32Array.from(tokenBiasValues)
1886
+ };
1887
+ }
1888
+ function reviveTokenProbabilities(probabilities) {
1889
+ if (probabilities == null)
1890
+ return undefined;
1891
+ const res = new Map();
1892
+ for (let i = 1; i < probabilities.length; i += 2) {
1893
+ const token = probabilities[i - 1];
1894
+ const probability = probabilities[i];
1895
+ res.set(token, probability);
1896
+ }
1897
+ return res;
1898
+ }
1899
+ function disposeContextIfReferenced(contextRef) {
1900
+ const context = contextRef.deref();
1901
+ if (context != null)
1902
+ void context.dispose();
1903
+ }
1904
+ function disposeContextSequenceIfReferenced(contextRef) {
1905
+ const context = contextRef.deref();
1906
+ if (context != null)
1907
+ context.dispose();
1908
+ }
1909
+ export function getDefaultContextBatchSize({ contextSize, sequences }) {
1910
+ return Math.min(contextSize * sequences, 512);
1911
+ }
1912
+ export function getDefaultContextSequences() {
1913
+ return 1;
1914
+ }
1915
+ const defaultFallbackContextSize = 4096;
1916
+ export function getDefaultModelContextSize({ trainContextSize }) {
1917
+ return trainContextSize ?? defaultFallbackContextSize;
1918
+ }
1919
+ //# sourceMappingURL=LlamaContext.js.map