node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (351) hide show
  1. package/README.md +1 -1
  2. package/dist/ChatWrapper.js +4 -0
  3. package/dist/ChatWrapper.js.map +1 -1
  4. package/dist/bindings/AddonTypes.d.ts +35 -6
  5. package/dist/bindings/Llama.d.ts +12 -0
  6. package/dist/bindings/Llama.js +100 -7
  7. package/dist/bindings/Llama.js.map +1 -1
  8. package/dist/bindings/getLlama.d.ts +19 -1
  9. package/dist/bindings/getLlama.js +16 -6
  10. package/dist/bindings/getLlama.js.map +1 -1
  11. package/dist/bindings/types.d.ts +18 -0
  12. package/dist/bindings/types.js +31 -2
  13. package/dist/bindings/types.js.map +1 -1
  14. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  15. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  16. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  17. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
  18. package/dist/bindings/utils/cloneLlamaCppRepo.js +4 -3
  19. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  20. package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
  21. package/dist/bindings/utils/compileLLamaCpp.js +133 -97
  22. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  23. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
  24. package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
  25. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  26. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  27. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  28. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  29. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
  30. package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
  31. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
  32. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
  33. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  34. package/dist/bindings/utils/testBindingBinary.js +2 -2
  35. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  36. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  37. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  38. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  39. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  40. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  41. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  42. package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
  43. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  44. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  45. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  46. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  47. package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
  48. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  49. package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
  50. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  51. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  52. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  53. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  54. package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
  55. package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
  56. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
  57. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  58. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  60. package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +16 -18
  61. package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
  62. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  63. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  64. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  65. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  66. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  67. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  68. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  69. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
  70. package/dist/chatWrappers/utils/resolveChatWrapper.js +206 -0
  71. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  72. package/dist/cli/cli.js +1 -1
  73. package/dist/cli/cli.js.map +1 -1
  74. package/dist/cli/commands/ChatCommand.d.ts +7 -4
  75. package/dist/cli/commands/ChatCommand.js +177 -70
  76. package/dist/cli/commands/ChatCommand.js.map +1 -1
  77. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  78. package/dist/cli/commands/ClearCommand.js +5 -5
  79. package/dist/cli/commands/ClearCommand.js.map +1 -1
  80. package/dist/cli/commands/CompleteCommand.d.ts +3 -2
  81. package/dist/cli/commands/CompleteCommand.js +115 -51
  82. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  83. package/dist/cli/commands/InfillCommand.d.ts +3 -2
  84. package/dist/cli/commands/InfillCommand.js +115 -51
  85. package/dist/cli/commands/InfillCommand.js.map +1 -1
  86. package/dist/cli/commands/OnPostInstallCommand.js +2 -0
  87. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  88. package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
  89. package/dist/cli/commands/inspect/InspectCommand.js +17 -0
  90. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  91. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +10 -0
  92. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +108 -0
  93. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  94. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  95. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +98 -0
  96. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  97. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +14 -0
  98. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +577 -0
  99. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  100. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  101. package/dist/cli/utils/ConsoleTable.js +86 -0
  102. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  103. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  104. package/dist/cli/utils/printCommonInfoLines.js +70 -0
  105. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  106. package/dist/cli/utils/printInfoLine.d.ts +10 -0
  107. package/dist/cli/utils/printInfoLine.js +45 -0
  108. package/dist/cli/utils/printInfoLine.js.map +1 -0
  109. package/dist/cli/utils/resolveCommandGgufPath.d.ts +1 -0
  110. package/dist/cli/utils/resolveCommandGgufPath.js +6 -0
  111. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  112. package/dist/config.d.ts +3 -1
  113. package/dist/config.js +7 -1
  114. package/dist/config.js.map +1 -1
  115. package/dist/evaluator/LlamaChat/LlamaChat.js +13 -5
  116. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  117. package/dist/evaluator/LlamaCompletion.js +5 -3
  118. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  119. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +43 -9
  120. package/dist/evaluator/LlamaContext/LlamaContext.js +251 -60
  121. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  122. package/dist/evaluator/LlamaContext/types.d.ts +68 -10
  123. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  124. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  125. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  126. package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
  127. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  128. package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -7
  129. package/dist/evaluator/LlamaEmbeddingContext.js +31 -22
  130. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  131. package/dist/evaluator/LlamaGrammar.js +1 -0
  132. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  133. package/dist/evaluator/LlamaModel.d.ts +78 -20
  134. package/dist/evaluator/LlamaModel.js +385 -21
  135. package/dist/evaluator/LlamaModel.js.map +1 -1
  136. package/dist/evaluator/TokenMeter.d.ts +54 -0
  137. package/dist/evaluator/TokenMeter.js +86 -0
  138. package/dist/evaluator/TokenMeter.js.map +1 -0
  139. package/dist/gguf/GgufInsights.d.ts +40 -0
  140. package/dist/gguf/GgufInsights.js +350 -0
  141. package/dist/gguf/GgufInsights.js.map +1 -0
  142. package/dist/gguf/consts.d.ts +3 -0
  143. package/dist/gguf/consts.js +8 -0
  144. package/dist/gguf/consts.js.map +1 -0
  145. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  146. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  147. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  148. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  149. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  150. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  151. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  152. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  153. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  154. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  155. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  156. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  157. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  158. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  159. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  160. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  161. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  162. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  163. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  164. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  165. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  166. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  167. package/dist/gguf/parser/parseGguf.js +58 -0
  168. package/dist/gguf/parser/parseGguf.js.map +1 -0
  169. package/dist/gguf/readGgufFileInfo.d.ts +30 -0
  170. package/dist/gguf/readGgufFileInfo.js +37 -0
  171. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  172. package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
  173. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  174. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  175. package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
  176. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  177. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  178. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  179. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  180. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  181. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  182. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  183. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  184. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  185. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  186. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  187. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  188. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  189. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  190. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  191. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  192. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  193. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  194. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  195. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  196. package/dist/index.d.ts +13 -7
  197. package/dist/index.js +11 -6
  198. package/dist/index.js.map +1 -1
  199. package/dist/state.d.ts +2 -0
  200. package/dist/state.js +7 -0
  201. package/dist/state.js.map +1 -1
  202. package/dist/types.d.ts +1 -1
  203. package/dist/utils/DisposeGuard.d.ts +13 -0
  204. package/dist/utils/DisposeGuard.js +120 -0
  205. package/dist/utils/DisposeGuard.js.map +1 -0
  206. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  207. package/dist/utils/InsufficientMemoryError.js +6 -0
  208. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  209. package/dist/utils/LlamaText.d.ts +25 -10
  210. package/dist/utils/LlamaText.js +205 -23
  211. package/dist/utils/LlamaText.js.map +1 -1
  212. package/dist/utils/StopGenerationDetector.js +3 -1
  213. package/dist/utils/StopGenerationDetector.js.map +1 -1
  214. package/dist/utils/cmake.js +1 -1
  215. package/dist/utils/cmake.js.map +1 -1
  216. package/dist/utils/findBestOption.d.ts +4 -0
  217. package/dist/utils/findBestOption.js +15 -0
  218. package/dist/utils/findBestOption.js.map +1 -0
  219. package/dist/utils/getConsoleLogPrefix.js +3 -2
  220. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  221. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
  222. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
  223. package/dist/utils/gitReleaseBundles.js +68 -1
  224. package/dist/utils/gitReleaseBundles.js.map +1 -1
  225. package/dist/utils/mergeUnionTypes.d.ts +4 -0
  226. package/dist/utils/parseModelFileName.d.ts +1 -0
  227. package/dist/utils/parseModelFileName.js +6 -1
  228. package/dist/utils/parseModelFileName.js.map +1 -1
  229. package/dist/utils/prettyPrintObject.d.ts +10 -1
  230. package/dist/utils/prettyPrintObject.js +57 -13
  231. package/dist/utils/prettyPrintObject.js.map +1 -1
  232. package/dist/utils/removeNullFields.d.ts +2 -2
  233. package/dist/utils/removeNullFields.js.map +1 -1
  234. package/dist/utils/spawnCommand.d.ts +11 -1
  235. package/dist/utils/spawnCommand.js +55 -7
  236. package/dist/utils/spawnCommand.js.map +1 -1
  237. package/dist/utils/tokenizeInput.d.ts +1 -1
  238. package/dist/utils/tokenizeInput.js +3 -3
  239. package/dist/utils/tokenizeInput.js.map +1 -1
  240. package/dist/utils/withOra.d.ts +1 -0
  241. package/dist/utils/withOra.js +2 -2
  242. package/dist/utils/withOra.js.map +1 -1
  243. package/llama/CMakeLists.txt +5 -5
  244. package/llama/addon.cpp +793 -88
  245. package/llama/binariesGithubRelease.json +1 -1
  246. package/llama/gitRelease.bundle +0 -0
  247. package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
  248. package/llama/gpuInfo/cuda-gpu-info.h +3 -0
  249. package/llama/gpuInfo/metal-gpu-info.h +4 -1
  250. package/llama/gpuInfo/metal-gpu-info.mm +14 -1
  251. package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
  252. package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
  253. package/llama/grammars/json.gbnf +1 -1
  254. package/llama/grammars/json_arr.gbnf +1 -1
  255. package/llama/llama.cpp.info.json +1 -1
  256. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  257. package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  258. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  259. package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  260. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  261. package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  262. package/llamaBins/linux-x64/llama-addon.node +0 -0
  263. package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  264. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  265. package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  266. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  267. package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  268. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  269. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  270. package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  271. package/llamaBins/mac-x64/llama-addon.node +0 -0
  272. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  273. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  274. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  275. package/llamaBins/win-arm64/llama-addon.node +0 -0
  276. package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  277. package/llamaBins/win-x64/llama-addon.exp +0 -0
  278. package/llamaBins/win-x64/llama-addon.lib +0 -0
  279. package/llamaBins/win-x64/llama-addon.node +0 -0
  280. package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  281. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  282. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  283. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  284. package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
  285. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  286. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  287. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  288. package/package.json +16 -11
  289. package/dist/TemplateChatWrapper.js.map +0 -1
  290. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
  291. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
  292. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
  293. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  294. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
  295. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  296. package/dist/cli/commands/InspectCommand.js +0 -113
  297. package/dist/cli/commands/InspectCommand.js.map +0 -1
  298. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  299. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  300. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  301. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  302. package/dist/gguf/GGUFInsights.d.ts +0 -28
  303. package/dist/gguf/GGUFInsights.js +0 -58
  304. package/dist/gguf/GGUFInsights.js.map +0 -1
  305. package/dist/gguf/GGUFMetadata.d.ts +0 -19
  306. package/dist/gguf/GGUFMetadata.js +0 -38
  307. package/dist/gguf/GGUFMetadata.js.map +0 -1
  308. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
  309. package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
  310. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
  311. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
  312. package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
  313. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
  314. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
  315. package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
  316. package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
  317. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
  318. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -12
  319. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
  320. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
  321. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
  322. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
  323. package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
  324. package/dist/gguf/ggufParser/GGUFParser.js +0 -123
  325. package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
  326. package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
  327. package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
  328. package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
  329. package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
  330. package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
  331. package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
  332. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
  333. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
  334. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
  335. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
  336. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
  337. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
  338. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
  339. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
  340. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
  341. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  342. package/dist/utils/parseModelTypeDescription.js +0 -9
  343. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  344. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  345. package/dist/utils/resolveChatWrapper.js +0 -16
  346. package/dist/utils/resolveChatWrapper.js.map +0 -1
  347. package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
  348. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  349. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  350. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  351. /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Tracks the evaluation usage of tokens.
3
+ */
4
+ export class TokenMeter {
5
+ _inputTokens = 0;
6
+ _outputTokens = 0;
7
+ _restoreStateTokens = 0;
8
+ /**
9
+ * The number of input tokens used
10
+ */
11
+ get usedInputTokens() {
12
+ return this._inputTokens;
13
+ }
14
+ /**
15
+ * The number of tokens generated by a model
16
+ */
17
+ get usedOutputTokens() {
18
+ return this._outputTokens;
19
+ }
20
+ /**
21
+ * The number of tokens used as input to restore a context sequence state to continue previous evaluation.
22
+ * This may be consumed by virtual context sequences.
23
+ */
24
+ get usedRestoreStateTokens() {
25
+ return this._restoreStateTokens;
26
+ }
27
+ /**
28
+ * Get the current state of the token meter
29
+ */
30
+ getState() {
31
+ return {
32
+ usedInputTokens: this.usedInputTokens,
33
+ usedOutputTokens: this.usedOutputTokens,
34
+ usedRestoreStateTokens: this.usedRestoreStateTokens
35
+ };
36
+ }
37
+ /**
38
+ * Log the usage of tokens
39
+ */
40
+ useTokens(tokens, type) {
41
+ if (tokens < 0)
42
+ throw new RangeError("Tokens cannot be negative");
43
+ else if (tokens === 0)
44
+ return;
45
+ if (type === "input")
46
+ this._inputTokens += tokens;
47
+ else if (type === "output")
48
+ this._outputTokens += tokens;
49
+ else if (type === "restoreState")
50
+ this._restoreStateTokens += tokens;
51
+ else {
52
+ void (type);
53
+ throw new TypeError(`Unknown token type: ${type}`);
54
+ }
55
+ }
56
+ /**
57
+ * Get the difference between the current meter and another meter
58
+ */
59
+ diff(meter) {
60
+ return TokenMeter.diff(this, meter);
61
+ }
62
+ /**
63
+ * Log the usage of tokens on multiple meters
64
+ */
65
+ static useTokens(meters, tokens, type) {
66
+ if (meters == null)
67
+ return;
68
+ if (meters instanceof TokenMeter)
69
+ meters.useTokens(tokens, type);
70
+ else {
71
+ for (const meter of meters)
72
+ meter.useTokens(tokens, type);
73
+ }
74
+ }
75
+ /**
76
+ * Get the difference between two meters
77
+ */
78
+ static diff(meter1, meter2) {
79
+ return {
80
+ usedInputTokens: meter1.usedInputTokens - meter2.usedInputTokens,
81
+ usedOutputTokens: meter1.usedOutputTokens - meter2.usedOutputTokens,
82
+ usedRestoreStateTokens: meter1.usedRestoreStateTokens - meter2.usedRestoreStateTokens
83
+ };
84
+ }
85
+ }
86
+ //# sourceMappingURL=TokenMeter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TokenMeter.js","sourceRoot":"","sources":["../../src/evaluator/TokenMeter.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,OAAO,UAAU;IACX,YAAY,GAAW,CAAC,CAAC;IACzB,aAAa,GAAW,CAAC,CAAC;IAC1B,mBAAmB,GAAW,CAAC,CAAC;IAExC;;OAEG;IACH,IAAW,eAAe;QACtB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,IAAW,gBAAgB;QACvB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;OAGG;IACH,IAAW,sBAAsB;QAC7B,OAAO,IAAI,CAAC,mBAAmB,CAAC;IACpC,CAAC;IAED;;OAEG;IACI,QAAQ;QACX,OAAO;YACH,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;YACvC,sBAAsB,EAAE,IAAI,CAAC,sBAAsB;SACtD,CAAC;IACN,CAAC;IAED;;OAEG;IACI,SAAS,CAAC,MAAc,EAAE,IAAyC;QACtE,IAAI,MAAM,GAAG,CAAC;YACV,MAAM,IAAI,UAAU,CAAC,2BAA2B,CAAC,CAAC;aACjD,IAAI,MAAM,KAAK,CAAC;YACjB,OAAO;QAEX,IAAI,IAAI,KAAK,OAAO;YAChB,IAAI,CAAC,YAAY,IAAI,MAAM,CAAC;aAC3B,IAAI,IAAI,KAAK,QAAQ;YACtB,IAAI,CAAC,aAAa,IAAI,MAAM,CAAC;aAC5B,IAAI,IAAI,KAAK,cAAc;YAC5B,IAAI,CAAC,mBAAmB,IAAI,MAAM,CAAC;aAClC;YACD,KAAK,CAAC,IAAoB,CAAC,CAAC;YAC5B,MAAM,IAAI,SAAS,CAAC,uBAAuB,IAAI,EAAE,CAAC,CAAC;SACtD;IACL,CAAC;IAED;;OAEG;IACI,IAAI,CAAC,KAAmC;QAC3C,OAAO,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACI,MAAM,CAAC,SAAS,CACnB,MAAuF,EACvF,MAAc,EACd,IAAyC;QAEzC,IAAI,MAAM,IAAI,IAAI;YACd,OAAO;QAEX,IAAI,MAAM,YAAY,UAAU;YAC5B,MAAM,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;aAC9B;YACD,KAAK,MAAM,KAAK,IAAI,MAAM;gBACtB,KAAK,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;SACrC;IACL,CAAC;IAED;;OAEG;IACI,MAAM,CAAC,IAAI,CACd,MAAoC,EACpC,MAAoC;QAEpC,OAAO;YACH,eAAe,EAAE,MAAM,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe;YAChE,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB;YACnE,sBAAsB,EAAE,MAAM,CAAC,sBAAsB,GAAG,MAAM,CAAC,sBAAsB;SACxF,CAAC;IACN,CAAC;CACJ"}
@@ -0,0 +1,40 @@
1
+ import { Llama } from "../bindings/Llama.js";
2
+ import { GgufFileInfo } from "./types/GgufFileInfoTypes.js";
3
+ export type GgufInsightsResourceRequirements = {
4
+ cpuRam: number;
5
+ gpuVram: number;
6
+ };
7
+ export declare class GgufInsights {
8
+ readonly ggufFileInfo: GgufFileInfo;
9
+ private constructor();
10
+ /** The context size the model was trained on */
11
+ get trainContextSize(): number | undefined;
12
+ /** The size of an embedding vector the model can produce */
13
+ get embeddingVectorSize(): number | undefined;
14
+ get totalLayers(): number;
15
+ get modelSize(): number;
16
+ estimateModelResourceRequirements({ gpuLayers }: {
17
+ gpuLayers: number;
18
+ }): GgufInsightsResourceRequirements;
19
+ /**
20
+ * Estimates the memory required to create a context of the given parameters based on the implementation details of `llama.cpp`.
21
+ * The calculation doesn't include a precise estimation of the graph overhead memory, so it uses a rough estimate for that.
22
+ * The estimation for the graph overhead memory will be improved in the future to be more precise, but it's good enough for now.
23
+ */
24
+ estimateContextResourceRequirements({ contextSize, modelGpuLayers, batchSize, sequences, isEmbeddingContext, includeGraphOverhead }: {
25
+ contextSize: number;
26
+ modelGpuLayers: number;
27
+ batchSize?: number;
28
+ sequences?: number;
29
+ isEmbeddingContext?: boolean;
30
+ includeGraphOverhead?: boolean;
31
+ }): GgufInsightsResourceRequirements;
32
+ /**
33
+ * @param ggufFileInfo
34
+ * @param llama - If you already have a `Llama` instance, pass it to reuse it for the `GgufInsights` instance.
35
+ * If you don't pass a `Llama` instance, a basic `Llama` instance is created as a fallback - it's a slim instance that
36
+ * doesn't instantiate a `llama.cpp` backend, so it won't utilize the GPU at all, and be shared with other `GgufInsights` instances
37
+ * that need a fallback `Llama` instance.
38
+ */
39
+ static from(ggufFileInfo: GgufFileInfo, llama?: Llama): Promise<GgufInsights>;
40
+ }
@@ -0,0 +1,350 @@
1
+ import { getLlamaWithoutBackend } from "../bindings/utils/getLlamaWithoutBackend.js";
2
+ import { getDefaultContextBatchSize, getDefaultContextSequences } from "../evaluator/LlamaContext/LlamaContext.js";
3
+ import { GgufArchitectureType } from "./types/GgufMetadataTypes.js";
4
+ export class GgufInsights {
5
+ /** @internal */ _llama;
6
+ /** @internal */ _modelSize;
7
+ /** @internal */ _totalLayers = null;
8
+ ggufFileInfo;
9
+ constructor(ggufFileInfo, llama) {
10
+ this._llama = llama;
11
+ this.ggufFileInfo = ggufFileInfo;
12
+ this._modelSize = calculateTensorsSize(ggufFileInfo.tensorInfo ?? [], llama);
13
+ }
14
+ /** The context size the model was trained on */
15
+ get trainContextSize() {
16
+ return this.ggufFileInfo.architectureMetadata.context_length;
17
+ }
18
+ /** The size of an embedding vector the model can produce */
19
+ get embeddingVectorSize() {
20
+ return this.ggufFileInfo.architectureMetadata.embedding_length;
21
+ }
22
+ get totalLayers() {
23
+ if (this._totalLayers != null)
24
+ return this._totalLayers;
25
+ const outputLayers = 1;
26
+ this._totalLayers = this._getFileLayers() + outputLayers;
27
+ return this._totalLayers;
28
+ }
29
+ get modelSize() {
30
+ return this._modelSize;
31
+ }
32
+ estimateModelResourceRequirements({ gpuLayers }) {
33
+ const { cpu, gpu } = this._getTensorResourceSplit(gpuLayers);
34
+ return {
35
+ cpuRam: calculateTensorsSize(cpu, this._llama),
36
+ gpuVram: calculateTensorsSize(gpu, this._llama)
37
+ };
38
+ }
39
+ /**
40
+ * Estimates the memory required to create a context of the given parameters based on the implementation details of `llama.cpp`.
41
+ * The calculation doesn't include a precise estimation of the graph overhead memory, so it uses a rough estimate for that.
42
+ * The estimation for the graph overhead memory will be improved in the future to be more precise, but it's good enough for now.
43
+ */
44
+ estimateContextResourceRequirements({ contextSize, modelGpuLayers, batchSize, sequences, isEmbeddingContext = false, includeGraphOverhead = true }) {
45
+ if (sequences == null)
46
+ sequences = getDefaultContextSequences();
47
+ if (batchSize == null)
48
+ batchSize = getDefaultContextBatchSize({ contextSize, sequences });
49
+ const actualContextSize = contextSize * sequences;
50
+ const totalLayers = this.totalLayers;
51
+ const finalGpuLayers = Math.max(0, Math.min(modelGpuLayers ?? totalLayers, totalLayers));
52
+ const finalCpuLayers = totalLayers - finalGpuLayers;
53
+ const llmData = this.ggufFileInfo.architectureMetadata;
54
+ const vocabularySize = llmData.vocab_size ?? this.ggufFileInfo.metadata.tokenizer.ggml.tokens.length;
55
+ const logitsSize = vocabularySize * batchSize;
56
+ const embedSize = isEmbeddingContext
57
+ ? (llmData.embedding_length ?? 0) * batchSize
58
+ : 0;
59
+ const sizeTBytes = 8; // sizeof(size_t)
60
+ const floatBytes = 4; // sizeof(float)
61
+ const uint32TBytes = 4; // sizeof(uint32_t)
62
+ const int32TBytes = 4; // sizeof(int32_t)
63
+ // source: `llama_get_state_size` in `llama.cpp`
64
+ const sRngSize = sizeTBytes;
65
+ const sRng = this._llama._consts.llamaMaxRngState;
66
+ const sNOutputs = sizeTBytes;
67
+ const sNOutputPos = batchSize * int32TBytes;
68
+ const sLogitsSize = sizeTBytes;
69
+ const sLogits = logitsSize * floatBytes;
70
+ const sEmbeddingSize = sizeTBytes;
71
+ const sEmbedding = embedSize * floatBytes;
72
+ const sKvBufSize = sizeTBytes;
73
+ const sKvHead = uint32TBytes;
74
+ const sKvSize = uint32TBytes;
75
+ const sKvUsed = uint32TBytes;
76
+ const sKv = 2 * int32TBytes * modelGpuLayers * this._llama._consts.ggmlTensorOverhead;
77
+ const sKvCell = this._llama._consts.llamaPosSize + sizeTBytes + this._llama._consts.llamaSeqIdSize;
78
+ const kvSelfLength = this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.mamba
79
+ ? Math.max(1, sequences)
80
+ : actualContextSize;
81
+ const sKvCells = kvSelfLength * sKvCell;
82
+ const overheadMemory = (sRngSize +
83
+ sRng +
84
+ sNOutputs +
85
+ sNOutputPos +
86
+ sLogitsSize +
87
+ sLogits +
88
+ sEmbeddingSize +
89
+ sEmbedding +
90
+ sKvBufSize +
91
+ sKvHead +
92
+ sKvSize +
93
+ sKvUsed +
94
+ sKv +
95
+ sKvCells);
96
+ // Estimates the memory allocated by `ggml_backend_sched_reserve` in `llama_new_context_with_model` in `llama.cpp`.
97
+ // If you read this line and have better insights on how to estimate this memory, please open a PR to improve it :)
98
+ const estimateGraphOverheadMemory = () => {
99
+ const s1MB = Math.pow(1024, 2);
100
+ const tensorInfo = this.ggufFileInfo.tensorInfo ?? [];
101
+ let defaultCalculationAdjustment = 0;
102
+ if (batchSize == null)
103
+ return 0;
104
+ if (this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.llama) {
105
+ const expertCount = this.ggufFileInfo.architectureMetadata.expert_count ?? 0;
106
+ const headCount = this.ggufFileInfo.architectureMetadata.attention?.head_count ?? 0;
107
+ const embeddingLength = llmData.embedding_length ?? 0;
108
+ if (expertCount > 0) {
109
+ const expertsUsedCount = this.ggufFileInfo.architectureMetadata.expert_used_count ?? 2;
110
+ return int32TBytes * batchSize * (((expertsUsedCount + 1) * embeddingLength) + (actualContextSize * headCount));
111
+ }
112
+ return int32TBytes * batchSize * (embeddingLength + (actualContextSize * headCount));
113
+ }
114
+ else if (this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.qwen2) {
115
+ if (modelGpuLayers === this.totalLayers) {
116
+ defaultCalculationAdjustment -= (s1MB * 340) * (this.trainContextSize == null
117
+ ? 1
118
+ : actualContextSize / this.trainContextSize);
119
+ }
120
+ else {
121
+ defaultCalculationAdjustment -= (s1MB * 250) + ((s1MB * 50) * (this.trainContextSize == null
122
+ ? 1
123
+ : actualContextSize / this.trainContextSize));
124
+ }
125
+ }
126
+ else if (this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.gemma) {
127
+ // only works properly when all layers are on the GPU, which is why it's commented out:
128
+ // return int32TBytes * batchSize * ((llmData.embedding_length ?? 0));
129
+ if (modelGpuLayers === this.totalLayers) {
130
+ defaultCalculationAdjustment += (s1MB * 40) - ((s1MB * 270) * (this.trainContextSize == null
131
+ ? 1
132
+ : actualContextSize / this.trainContextSize));
133
+ }
134
+ else {
135
+ defaultCalculationAdjustment += -(s1MB * 550) + ((s1MB * 150) * (this.trainContextSize == null
136
+ ? 1
137
+ : Math.max(0, (1 - (actualContextSize / this.trainContextSize)))));
138
+ }
139
+ }
140
+ else if (this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.stablelm) {
141
+ const headCount = this.ggufFileInfo.architectureMetadata.attention?.head_count ?? 0;
142
+ return (int32TBytes * batchSize * actualContextSize * headCount) - (50 * s1MB);
143
+ // if (modelGpuLayers === this.totalLayers) {
144
+ // defaultCalculationAdjustment += -(s1MB * 20) + (
145
+ // (s1MB * 250) * (
146
+ // this.trainContextSize == null
147
+ // ? 1
148
+ // : actualContextSize / this.trainContextSize
149
+ // )
150
+ // );
151
+ // } else {
152
+ // defaultCalculationAdjustment += -(s1MB * 40) + (
153
+ // (s1MB * 300) * (
154
+ // this.trainContextSize == null
155
+ // ? 1
156
+ // : actualContextSize / this.trainContextSize
157
+ // )
158
+ // );
159
+ // }
160
+ }
161
+ const totalElements = tensorInfo.length === 0
162
+ ? this.totalLayers * (((llmData.embedding_length ?? 0) +
163
+ (llmData.feed_forward_length ?? 0)) / 2)
164
+ : tensorInfo.reduce((res, tensor) => {
165
+ return res + tensor.dimensions.reduce((res, dim) => res + Number(dim), 0);
166
+ }, 0);
167
+ // magic numbers for estimation. will be improved in the future
168
+ return (totalElements * 77.655 * (actualContextSize / 4096)) + defaultCalculationAdjustment;
169
+ };
170
+ const graphOverheadMemory = !includeGraphOverhead
171
+ ? 0
172
+ : estimateGraphOverheadMemory();
173
+ const usingGpu = finalGpuLayers !== 0;
174
+ const cpuRam = (!usingGpu
175
+ ? (overheadMemory + graphOverheadMemory)
176
+ : 0) +
177
+ this._estimateKvMemorySizeInBytes(actualContextSize, finalCpuLayers);
178
+ const gpuVram = usingGpu
179
+ ? (overheadMemory +
180
+ graphOverheadMemory +
181
+ this._estimateKvMemorySizeInBytes(actualContextSize, finalGpuLayers < totalLayers
182
+ ? (finalGpuLayers + 1)
183
+ : finalGpuLayers))
184
+ : 0;
185
+ return {
186
+ cpuRam,
187
+ gpuVram
188
+ };
189
+ }
190
+ /**
191
+ * Get the split tensor resources for CPU and GPU based on the number of GPU layers
192
+ * @internal
193
+ */
194
+ _getTensorResourceSplit(gpuLayers) {
195
+ const tensorInfo = this.ggufFileInfo.tensorInfo ?? [];
196
+ if (gpuLayers === 0) {
197
+ return {
198
+ cpu: tensorInfo,
199
+ gpu: []
200
+ };
201
+ }
202
+ const gpuTensors = [];
203
+ const cpuTensors = [];
204
+ for (const singleTensorInfo of tensorInfo) {
205
+ const { layerNumber } = parseTensorName(singleTensorInfo.name);
206
+ if (gpuLayers !== this.totalLayers) {
207
+ const architecture = this.ggufFileInfo.metadata?.general?.architecture;
208
+ if (architecture === GgufArchitectureType.qwen2 || architecture === GgufArchitectureType.gemma) {
209
+ if (layerNumber != null && layerNumber < gpuLayers)
210
+ gpuTensors.push(singleTensorInfo);
211
+ else
212
+ cpuTensors.push(singleTensorInfo);
213
+ continue;
214
+ }
215
+ }
216
+ if (layerNumber == null || layerNumber < gpuLayers)
217
+ gpuTensors.push(singleTensorInfo);
218
+ else
219
+ cpuTensors.push(singleTensorInfo);
220
+ }
221
+ return {
222
+ cpu: cpuTensors,
223
+ gpu: gpuTensors
224
+ };
225
+ }
226
+ /** @internal */
227
+ _determineNumberOfLayersFromTensorInfo() {
228
+ const layerNumbers = new Set();
229
+ for (const singleTensorInfo of (this.ggufFileInfo.tensorInfo ?? [])) {
230
+ const { layerNumber } = parseTensorName(singleTensorInfo.name);
231
+ if (layerNumber != null)
232
+ layerNumbers.add(layerNumber);
233
+ }
234
+ return layerNumbers.size;
235
+ }
236
+ /** @internal */
237
+ _getFileLayers() {
238
+ return this.ggufFileInfo.architectureMetadata.block_count ?? this._determineNumberOfLayersFromTensorInfo();
239
+ }
240
+ /** @internal */
241
+ _estimateKvMemorySizeInBytes(contextSize, layers) {
242
+ // source: `llama_kv_cache_init` in `llama.cpp`
243
+ const nHead = this.ggufFileInfo.architectureMetadata.attention?.head_count ?? 0;
244
+ const nEmbd = this.ggufFileInfo.architectureMetadata.embedding_length ?? 0;
245
+ const nEmbdHeadK = this.ggufFileInfo.architectureMetadata.attention?.key_length ?? ((nHead == 0) ? 0 : (nEmbd / nHead));
246
+ const nHeadKv = this.ggufFileInfo.architectureMetadata.attention?.head_count_kv ?? nHead;
247
+ const modelNEmbdKGqa = nEmbdHeadK * nHeadKv;
248
+ const ssmDConv = this.ggufFileInfo.architectureMetadata.ssm?.conv_kernel ?? 0;
249
+ const ssmDInner = this.ggufFileInfo.architectureMetadata.ssm?.inner_size ?? 0;
250
+ const modelNEmbdKS = (ssmDConv > 0 ? (ssmDConv - 1) : 0) * ssmDInner;
251
+ const nEmbdHeadV = this.ggufFileInfo.architectureMetadata.attention?.value_length ?? ((nHead == 0) ? 0 : nEmbd / nHead);
252
+ const modelNEmbdVGqa = nEmbdHeadV * nHeadKv;
253
+ const ssmDState = this.ggufFileInfo.architectureMetadata.ssm?.state_size ?? 0;
254
+ const modelNEmbdVS = ssmDState * ssmDInner;
255
+ const totalNEmbdKGqa = modelNEmbdKGqa + modelNEmbdKS;
256
+ const totalNEmbdVGqa = modelNEmbdVGqa + modelNEmbdVS;
257
+ const keyTypeSize = this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.mamba
258
+ // if `type_k` of `llama_context_params` changes to be configurable in `LlamaContext`,
259
+ // this would have to depend on that value
260
+ ? this._llama._consts.ggmlTypeF32Size
261
+ : this._llama._consts.ggmlTypeF16Size;
262
+ const valueTypeSize = this.ggufFileInfo.metadata.general.architecture === GgufArchitectureType.mamba
263
+ // if `type_v` of `llama_context_params` changes to be configurable in `LlamaContext`,
264
+ // this would have to depend on that value
265
+ ? this._llama._consts.ggmlTypeF32Size
266
+ : this._llama._consts.ggmlTypeF16Size;
267
+ const keyTensorsSize = layers * totalNEmbdKGqa * contextSize * keyTypeSize;
268
+ const valueTensorsSize = layers * totalNEmbdVGqa * contextSize * valueTypeSize;
269
+ return keyTensorsSize + valueTensorsSize;
270
+ }
271
+ /**
272
+ * @param ggufFileInfo
273
+ * @param llama - If you already have a `Llama` instance, pass it to reuse it for the `GgufInsights` instance.
274
+ * If you don't pass a `Llama` instance, a basic `Llama` instance is created as a fallback - it's a slim instance that
275
+ * doesn't instantiate a `llama.cpp` backend, so it won't utilize the GPU at all, and be shared with other `GgufInsights` instances
276
+ * that need a fallback `Llama` instance.
277
+ */
278
+ static async from(ggufFileInfo, llama) {
279
+ let resolvedLlama = llama;
280
+ if (resolvedLlama == null)
281
+ resolvedLlama = await getLlamaWithoutBackend();
282
+ return new GgufInsights(ggufFileInfo, resolvedLlama);
283
+ }
284
+ }
285
+ function parseTensorName(tensorName) {
286
+ if (tensorName == null)
287
+ return { layerNumber: undefined };
288
+ const layerTensorPrefix = "blk.";
289
+ if (!tensorName.startsWith(layerTensorPrefix))
290
+ return { layerNumber: undefined };
291
+ const dotIndex = tensorName.indexOf(".", layerTensorPrefix.length);
292
+ const layerNumberString = tensorName.slice(layerTensorPrefix.length, dotIndex < 0
293
+ ? tensorName.length
294
+ : dotIndex);
295
+ const layerNumber = parseInt(layerNumberString);
296
+ if (Number.isFinite(layerNumber))
297
+ return { layerNumber };
298
+ return { layerNumber: undefined };
299
+ }
300
+ function calculateTensorsSize(tensorsInfo, llama) {
301
+ let size = 0;
302
+ for (const tensorInfo of tensorsInfo)
303
+ size += calculateTensorSize(tensorInfo, llama);
304
+ return size;
305
+ }
306
+ function calculateTensorSize(tensor, llama) {
307
+ const typeSize = llama._bindings.getTypeSizeForGgmlType(tensor.ggmlType);
308
+ const blockSize = llama._bindings.getBlockSizeForGgmlType(tensor.ggmlType);
309
+ const ggmlMaxDims = llama._consts.ggmlMaxDims;
310
+ if (typeSize == null || blockSize == null)
311
+ throw new Error("Invalid type or block size");
312
+ const { ne, nb } = getTensorNeAndNb(tensor, { typeSize, blockSize, ggmlMaxDims });
313
+ if (blockSize === 1) {
314
+ let totalBytes = typeSize;
315
+ for (let i = 0; i < ggmlMaxDims; i++) {
316
+ totalBytes += (ne[i] - 1) * nb[i];
317
+ }
318
+ return totalBytes;
319
+ }
320
+ else {
321
+ let totalBytes = Math.floor((ne[0] * nb[0]) / blockSize);
322
+ for (let i = 1; i < ggmlMaxDims; i++) {
323
+ totalBytes += (ne[i] - 1) * nb[i];
324
+ }
325
+ return totalBytes;
326
+ }
327
+ }
328
+ function getTensorNeAndNb(tensor, { typeSize, blockSize, ggmlMaxDims }) {
329
+ // number of elements
330
+ // source: `ggml_new_tensor_impl` in `ggml.c`
331
+ const ne = [
332
+ ...tensor.dimensions,
333
+ ...(Array(Math.max(0, ggmlMaxDims - tensor.dimensions.length)).fill(1))
334
+ ].slice(0, ggmlMaxDims);
335
+ // number of bytes
336
+ // source: `ggml_new_tensor_impl` in `ggml.c`
337
+ const nb = [
338
+ typeSize,
339
+ Math.floor(typeSize * (ne[0] / blockSize)),
340
+ ...Array(ggmlMaxDims - 2).fill(0)
341
+ ];
342
+ for (let i = 2; i < ggmlMaxDims; i++) {
343
+ nb[i] = nb[i - 1] * ne[i - 1];
344
+ }
345
+ return {
346
+ ne,
347
+ nb
348
+ };
349
+ }
350
+ //# sourceMappingURL=GgufInsights.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GgufInsights.js","sourceRoot":"","sources":["../../src/gguf/GgufInsights.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,sBAAsB,EAAC,MAAM,6CAA6C,CAAC;AACnF,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,2CAA2C,CAAC;AAGjH,OAAO,EAAC,oBAAoB,EAAC,MAAM,8BAA8B,CAAC;AAOlE,MAAM,OAAO,YAAY;IACrB,gBAAgB,CAAkB,MAAM,CAAQ;IAChD,gBAAgB,CAAkB,UAAU,CAAS;IACrD,gBAAgB,CAAS,YAAY,GAAkB,IAAI,CAAC;IAC5C,YAAY,CAAe;IAE3C,YAAoB,YAA0B,EAAE,KAAY;QACxD,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QAEjC,IAAI,CAAC,UAAU,GAAG,oBAAoB,CAAC,YAAY,CAAC,UAAU,IAAI,EAAE,EAAE,KAAK,CAAC,CAAC;IACjF,CAAC;IAED,gDAAgD;IAChD,IAAW,gBAAgB;QACvB,OAAO,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,cAAc,CAAC;IACjE,CAAC;IAED,4DAA4D;IAC5D,IAAW,mBAAmB;QAC1B,OAAO,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,gBAAgB,CAAC;IACnE,CAAC;IAED,IAAW,WAAW;QAClB,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI;YACzB,OAAO,IAAI,CAAC,YAAY,CAAC;QAE7B,MAAM,YAAY,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,cAAc,EAAE,GAAG,YAAY,CAAC;QAEzD,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,SAAS;QAChB,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAEM,iCAAiC,CAAC,EAAC,SAAS,EAAsB;QACrE,MAAM,EAAC,GAAG,EAAE,GAAG,EAAC,GAAG,IAAI,CAAC,uBAAuB,CAAC,SAAS,CAAC,CAAC;QAE3D,OAAO;YACH,MAAM,EAAE,oBAAoB,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC;YAC9C,OAAO,EAAE,oBAAoB,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC;SAClD,CAAC;IACN,CAAC;IAED;;;;OAIG;IACI,mCAAmC,CAAC,EACvC,WAAW,EAAE,cAAc,EAAE,SAAS,EAAE,SAAS,EAAE,kBAAkB,GAAG,KAAK,EAAE,oBAAoB,GAAG,IAAI,EAI7G;QACG,IAAI,SAAS,IAAI,IAAI;YAAE,SAAS,GAAG,0BAA0B,EAAE,CAAC;QAChE,IAAI,SAAS,IAAI,IAAI;YAAE,SAAS,GAAG,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC,CAAC;QAExF,MAAM,iBAAiB,GAAG,WAAW,GAAG,SAAS,CAAC;QAElD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;QACrC,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,cAAc,IAAI,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;QACzF,MAAM,cAAc,GAAG,WAAW,GAAG,cAAc,CAAC;QACpD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC;QAEvD,MAAM,cAAc,GAAG,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;QACrG,MAAM,UAAU,GAAG,cAAc,GAAG,SAAS,CAAC;QAC9C,MAAM,SAAS,GAAG,kBAAkB;YAChC,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC,GAAG,SAAS;YAC7C,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,iBAAiB;QACvC,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,gBAAgB;QACtC,MAAM,YAAY,GAAG,CAAC,CAAC,CAAC,mBAAmB;QAC3C,MAAM,WAAW,GAAG,CAAC,CAAC,CAAC,kBAAkB;QAEzC,gDAAgD;QAChD,MAAM,QAAQ,GAAG,UAAU,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,gBAAgB,CAAC;QAClD,MAAM,SAAS,GAAG,UAAU,CAAC;QAC7B,MAAM,WAAW,GAAG,SAAS,GAAG,WAAW,CAAC;QAC5C,MAAM,WAAW,GAAG,UAAU,CAAC;QAC/B,MAAM,OAAO,GAAG,UAAU,GAAG,UAAU,CAAC;QACxC,MAAM,cAAc,GAAG,UAAU,CAAC;QAClC,MAAM,UAAU,GAAG,SAAS,GAAG,UAAU,CAAC;QAC1C,MAAM,UAAU,GAAG,UAAU,CAAC;QAC9B,MAAM,OAAO,GAAG,YAAY,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC;QAC7B,MAAM,GAAG,GAAG,CAAC,GAAG,WAAW,GAAG,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC;QACtF,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC;QACnG,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK;YAC/F,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC;YACxB,CAAC,CAAC,iBAAiB,CAAC;QACxB,MAAM,QAAQ,GAAG,YAAY,GAAG,OAAO,CAAC;QAExC,MAAM,cAAc,GAAG,CACnB,QAAQ;YACR,IAAI;YACJ,SAAS;YACT,WAAW;YACX,WAAW;YACX,OAAO;YACP,cAAc;YACd,UAAU;YACV,UAAU;YACV,OAAO;YACP,OAAO;YACP,OAAO;YACP,GAAG;YACH,QAAQ,CACX,CAAC;QAEF,mHAAmH;QACnH,mHAAmH;QACnH,MAAM,2BAA2B,GAAG,GAAG,EAAE;YACrC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,EAAE,CAAC;YAEtD,IAAI,4BAA4B,GAAG,CAAC,CAAC;YAErC,IAAI,SAAS,IAAI,IAAI;gBACjB,OAAO,CAAC,CAAC;YAEb,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK,EAAE;gBAChF,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,YAAY,IAAI,CAAC,CAAC;gBAC7E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,UAAU,IAAI,CAAC,CAAC;gBACpF,MAAM,eAAe,GAAG,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC;gBAEtD,IAAI,WAAW,GAAG,CAAC,EAAE;oBACjB,MAAM,gBAAgB,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,iBAAiB,IAAI,CAAC,CAAC;oBAEvF,OAAO,WAAW,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,gBAAgB,GAAG,CAAC,CAAC,GAAG,eAAe,CAAC,GAAG,CAAC,iBAAiB,GAAG,SAAS,CAAC,CAAC,CAAC;iBACnH;gBAED,OAAO,WAAW,GAAG,SAAS,GAAG,CAAC,eAAe,GAAG,CAAC,iBAAiB,GAAG,SAAS,CAAC,CAAC,CAAC;aACxF;iBAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK,EAAE;gBACvF,IAAI,cAAc,KAAK,IAAI,CAAC,WAAW,EAAE;oBACrC,4BAA4B,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,CAC3C,IAAI,CAAC,gBAAgB,IAAI,IAAI;wBACzB,CAAC,CAAC,CAAC;wBACH,CAAC,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAClD,CAAC;iBACL;qBAAM;oBACH,4BAA4B,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,CAC3C,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,CACV,IAAI,CAAC,gBAAgB,IAAI,IAAI;wBACzB,CAAC,CAAC,CAAC;wBACH,CAAC,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAClD,CACJ,CAAC;iBACL;aACJ;iBAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK,EAAE;gBACvF,uFAAuF;gBACvF,sEAAsE;gBAEtE,IAAI,cAAc,KAAK,IAAI,CAAC,WAAW,EAAE;oBACrC,4BAA4B,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC,GAAG,CAC1C,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,CACX,IAAI,CAAC,gBAAgB,IAAI,IAAI;wBACzB,CAAC,CAAC,CAAC;wBACH,CAAC,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAClD,CACJ,CAAC;iBACL;qBAAM;oBACH,4BAA4B,IAAI,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,CAC5C,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,CACX,IAAI,CAAC,gBAAgB,IAAI,IAAI;wBACzB,CAAC,CAAC,CAAC;wBACH,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,CACvE,CACJ,CAAC;iBACL;aACJ;iBAAM,IAAI,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,QAAQ,EAAE;gBAC1F,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,UAAU,IAAI,CAAC,CAAC;gBAEpF,OAAO,CAAC,WAAW,GAAG,SAAS,GAAG,iBAAiB,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;gBAE/E,6CAA6C;gBAC7C,uDAAuD;gBACvD,2BAA2B;gBAC3B,4CAA4C;gBAC5C,sBAAsB;gBACtB,8DAA8D;gBAC9D,YAAY;gBACZ,SAAS;gBACT,WAAW;gBACX,uDAAuD;gBACvD,2BAA2B;gBAC3B,4CAA4C;gBAC5C,sBAAsB;gBACtB,8DAA8D;gBAC9D,YAAY;gBACZ,SAAS;gBACT,IAAI;aACP;YAED,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC;gBACzC,CAAC,CAAC,IAAI,CAAC,WAAW,GAAG,CACjB,CACI,CAAC,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC;oBAC/B,CAAC,OAAO,CAAC,mBAAmB,IAAI,CAAC,CAAC,CACrC,GAAG,CAAC,CACR;gBACD,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE;oBAChC,OAAO,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,GAAW,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBACtF,CAAC,EAAE,CAAC,CAAC,CAAC;YAEV,+DAA+D;YAC/D,OAAO,CAAC,aAAa,GAAG,MAAM,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAC,GAAG,4BAA4B,CAAC;QAChG,CAAC,CAAC;QAEF,MAAM,mBAAmB,GAAG,CAAC,oBAAoB;YAC7C,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,2BAA2B,EAAE,CAAC;QAEpC,MAAM,QAAQ,GAAG,cAAc,KAAK,CAAC,CAAC;QAEtC,MAAM,MAAM,GAAG,CACX,CAAC,QAAQ;YACL,CAAC,CAAC,CAAC,cAAc,GAAG,mBAAmB,CAAC;YACxC,CAAC,CAAC,CAAC,CACV;YACG,IAAI,CAAC,4BAA4B,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QACzE,MAAM,OAAO,GAAG,QAAQ;YACpB,CAAC,CAAC,CACE,cAAc;gBACd,mBAAmB;gBACnB,IAAI,CAAC,4BAA4B,CAC7B,iBAAiB,EACjB,cAAc,GAAG,WAAW;oBACxB,CAAC,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC;oBACtB,CAAC,CAAC,cAAc,CACvB,CACJ;YACD,CAAC,CAAC,CAAC,CAAC;QAER,OAAO;YACH,MAAM;YACN,OAAO;SACV,CAAC;IACN,CAAC;IAED;;;OAGG;IACI,uBAAuB,CAAC,SAAiB;QAI5C,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,EAAE,CAAC;QAEtD,IAAI,SAAS,KAAK,CAAC,EAAE;YACjB,OAAO;gBACH,GAAG,EAAE,UAAU;gBACf,GAAG,EAAE,EAAE;aACV,CAAC;SACL;QAED,MAAM,UAAU,GAAqB,EAAE,CAAC;QACxC,MAAM,UAAU,GAAqB,EAAE,CAAC;QAExC,KAAK,MAAM,gBAAgB,IAAI,UAAU,EAAE;YACvC,MAAM,EAAC,WAAW,EAAC,GAAG,eAAe,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAE7D,IAAI,SAAS,KAAK,IAAI,CAAC,WAAW,EAAE;gBAChC,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,EAAE,YAAY,CAAC;gBAEvE,IAAI,YAAY,KAAK,oBAAoB,CAAC,KAAK,IAAI,YAAY,KAAK,oBAAoB,CAAC,KAAK,EAAE;oBAC5F,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,GAAG,SAAS;wBAC9C,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;;wBAElC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;oBAEtC,SAAS;iBACZ;aACJ;YAED,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,GAAG,SAAS;gBAC9C,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;;gBAElC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;SACzC;QAED,OAAO;YACH,GAAG,EAAE,UAAU;YACf,GAAG,EAAE,UAAU;SAClB,CAAC;IACN,CAAC;IAED,gBAAgB;IACT,sCAAsC;QACzC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;QAEvC,KAAK,MAAM,gBAAgB,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE;YACjE,MAAM,EAAC,WAAW,EAAC,GAAG,eAAe,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAE7D,IAAI,WAAW,IAAI,IAAI;gBACnB,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;SACrC;QAED,OAAO,YAAY,CAAC,IAAI,CAAC;IAC7B,CAAC;IAED,gBAAgB;IACT,cAAc;QACjB,OAAO,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,WAAW,IAAI,IAAI,CAAC,sCAAsC,EAAE,CAAC;IAC/G,CAAC;IAED,gBAAgB;IACT,4BAA4B,CAAC,WAAmB,EAAE,MAAc;QACnE,+CAA+C;QAC/C,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,UAAU,IAAI,CAAC,CAAC;QAChF,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,gBAAgB,IAAI,CAAC,CAAC;QAC3E,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,UAAU,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC;QACxH,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,aAAa,IAAI,KAAK,CAAC;QACzF,MAAM,cAAc,GAAG,UAAU,GAAG,OAAO,CAAC;QAE5C,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,GAAG,EAAE,WAAW,IAAI,CAAC,CAAC;QAC9E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,IAAI,CAAC,CAAC;QAC9E,MAAM,YAAY,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;QAErE,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,SAAS,EAAE,YAAY,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC;QACxH,MAAM,cAAc,GAAG,UAAU,GAAG,OAAO,CAAC;QAE5C,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,IAAI,CAAC,CAAC;QAC9E,MAAM,YAAY,GAAG,SAAS,GAAG,SAAS,CAAC;QAE3C,MAAM,cAAc,GAAG,cAAc,GAAG,YAAY,CAAC;QACrD,MAAM,cAAc,GAAG,cAAc,GAAG,YAAY,CAAC;QAErD,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK;YAC9F,sFAAsF;YACtF,0CAA0C;YAC1C,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe;YACrC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC;QAC1C,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,KAAK,oBAAoB,CAAC,KAAK;YAChG,sFAAsF;YACtF,0CAA0C;YAC1C,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe;YACrC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC;QAE1C,MAAM,cAAc,GAAG,MAAM,GAAG,cAAc,GAAG,WAAW,GAAG,WAAW,CAAC;QAC3E,MAAM,gBAAgB,GAAG,MAAM,GAAG,cAAc,GAAG,WAAW,GAAG,aAAa,CAAC;QAE/E,OAAO,cAAc,GAAG,gBAAgB,CAAC;IAC7C,CAAC;IAED;;;;;;OAMG;IACI,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,YAA0B,EAAE,KAAa;QAC9D,IAAI,aAAa,GAAG,KAAK,CAAC;QAC1B,IAAI,aAAa,IAAI,IAAI;YACrB,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;QAEnD,OAAO,IAAI,YAAY,CAAC,YAAY,EAAE,aAAa,CAAC,CAAC;IACzD,CAAC;CACJ;AAED,SAAS,eAAe,CAAC,UAAmB;IAGxC,IAAI,UAAU,IAAI,IAAI;QAClB,OAAO,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC;IAEpC,MAAM,iBAAiB,GAAG,MAAM,CAAC;IACjC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,iBAAiB,CAAC;QACzC,OAAO,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAC;IACnE,MAAM,iBAAiB,GAAG,UAAU,CAAC,KAAK,CACtC,iBAAiB,CAAC,MAAM,EACxB,QAAQ,GAAG,CAAC;QACR,CAAC,CAAC,UAAU,CAAC,MAAM;QACnB,CAAC,CAAC,QAAQ,CACjB,CAAC;IAEF,MAAM,WAAW,GAAG,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IAChD,IAAI,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC;QAC5B,OAAO,EAAC,WAAW,EAAC,CAAC;IAEzB,OAAO,EAAC,WAAW,EAAE,SAAS,EAAC,CAAC;AACpC,CAAC;AAED,SAAS,oBAAoB,CAAC,WAA6B,EAAE,KAAY;IACrE,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,MAAM,UAAU,IAAI,WAAW;QAChC,IAAI,IAAI,mBAAmB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;IAEnD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAsB,EAAE,KAAY;IAC7D,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,sBAAsB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACzE,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC,uBAAuB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC3E,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC;IAE9C,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI;QACrC,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAElD,MAAM,EAAC,EAAE,EAAE,EAAE,EAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,EAAC,QAAQ,EAAE,SAAS,EAAE,WAAW,EAAC,CAAC,CAAC;IAE9E,IAAI,SAAS,KAAK,CAAC,EAAE;QACjB,IAAI,UAAU,GAAG,QAAQ,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE;YAClC,UAAU,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACrC;QAED,OAAO,UAAU,CAAC;KACrB;SAAM;QACH,IAAI,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE;YAClC,UAAU,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;SACrC;QAED,OAAO,UAAU,CAAC;KACrB;AACL,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAsB,EAAE,EAC9C,QAAQ,EAAE,SAAS,EAAE,WAAW,EAGnC;IACG,qBAAqB;IACrB,6CAA6C;IAC7C,MAAM,EAAE,GAAG;QACP,GAAG,MAAM,CAAC,UAAU;QACpB,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KAC1E,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAExB,kBAAkB;IAClB,6CAA6C;IAC7C,MAAM,EAAE,GAAG;QACP,QAAQ;QACR,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;QAC1C,GAAG,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;KACpC,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE;QAClC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;KACjC;IAED,OAAO;QACH,EAAE;QACF,EAAE;KACL,CAAC;AACN,CAAC"}
@@ -0,0 +1,3 @@
1
+ import retry from "async-retry";
2
+ export declare const ggufDefaultFetchRetryOptions: retry.Options;
3
+ export declare const defaultExtraAllocationSize: number;
@@ -0,0 +1,8 @@
1
+ export const ggufDefaultFetchRetryOptions = {
2
+ retries: 10,
3
+ factor: 2,
4
+ minTimeout: 1000,
5
+ maxTimeout: 1000 * 16
6
+ };
7
+ export const defaultExtraAllocationSize = 1024 * 1024 * 1.5; // 1.5MB
8
+ //# sourceMappingURL=consts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"consts.js","sourceRoot":"","sources":["../../src/gguf/consts.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,4BAA4B,GAAkB;IACvD,OAAO,EAAE,EAAE;IACX,MAAM,EAAE,CAAC;IACT,UAAU,EAAE,IAAI;IAChB,UAAU,EAAE,IAAI,GAAG,EAAE;CACf,CAAC;AAEX,MAAM,CAAC,MAAM,0BAA0B,GAAG,IAAI,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC,QAAQ"}
@@ -0,0 +1,3 @@
1
+ export declare class InvalidGgufMagicError extends Error {
2
+ constructor(expectedGgufMagic: string, actualGgufMagic: string);
3
+ }
@@ -0,0 +1,6 @@
1
+ export class InvalidGgufMagicError extends Error {
2
+ constructor(expectedGgufMagic, actualGgufMagic) {
3
+ super(`Invalid GGUF magic. Expected "${expectedGgufMagic}" but got "${actualGgufMagic}".`);
4
+ }
5
+ }
6
+ //# sourceMappingURL=InvalidGgufMagicError.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"InvalidGgufMagicError.js","sourceRoot":"","sources":["../../../src/gguf/errors/InvalidGgufMagicError.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,qBAAsB,SAAQ,KAAK;IAC5C,YAAmB,iBAAyB,EAAE,eAAuB;QACjE,KAAK,CAAC,iCAAiC,iBAAiB,cAAc,eAAe,IAAI,CAAC,CAAC;IAC/F,CAAC;CACJ"}
@@ -0,0 +1,4 @@
1
+ export declare class UnsupportedGgufValueTypeError extends Error {
2
+ readonly ggufValueType: number;
3
+ constructor(ggufValueType: number);
4
+ }
@@ -0,0 +1,9 @@
1
+ export class UnsupportedGgufValueTypeError extends Error {
2
+ ggufValueType;
3
+ constructor(ggufValueType) {
4
+ super(`Unsupported GGUF value type "${ggufValueType}"`);
5
+ Object.defineProperty(this, "ggufValueType", { enumerable: false });
6
+ this.ggufValueType = ggufValueType;
7
+ }
8
+ }
9
+ //# sourceMappingURL=UnsupportedGgufValueTypeError.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"UnsupportedGgufValueTypeError.js","sourceRoot":"","sources":["../../../src/gguf/errors/UnsupportedGgufValueTypeError.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,6BAA8B,SAAQ,KAAK;IACpC,aAAa,CAAS;IAEtC,YAAmB,aAAqB;QACpC,KAAK,CAAC,gCAAgC,aAAa,GAAG,CAAC,CAAC;QAExD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,eAAoC,EAAE,EAAC,UAAU,EAAE,KAAK,EAAC,CAAC,CAAC;QAEvF,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACvC,CAAC;CACJ"}
@@ -0,0 +1,33 @@
1
+ /// <reference types="node" />
2
+ import { GgufReadOffset } from "../utils/GgufReadOffset.js";
3
+ export declare const valueTypeToBytesToRead: {
4
+ readonly uint8: 1;
5
+ readonly uint16: 2;
6
+ readonly uint32: 4;
7
+ readonly uint64: 8;
8
+ readonly int8: 1;
9
+ readonly int16: 2;
10
+ readonly int32: 4;
11
+ readonly int64: 8;
12
+ readonly float32: 4;
13
+ readonly float64: 8;
14
+ readonly bool: 1;
15
+ };
16
+ export declare abstract class GgufFileReader {
17
+ protected _buffer: Buffer;
18
+ abstract readByteRange(offset: number | GgufReadOffset, length: number): Promise<Buffer>;
19
+ readUint8(offset: number | GgufReadOffset): Promise<number>;
20
+ readUint16(offset: number | GgufReadOffset): Promise<number>;
21
+ readUint32(offset: number | GgufReadOffset): Promise<number>;
22
+ readUint64(offset: number | GgufReadOffset): Promise<bigint>;
23
+ readInt8(offset: number | GgufReadOffset): Promise<number>;
24
+ readInt16(offset: number | GgufReadOffset): Promise<number>;
25
+ readInt32(offset: number | GgufReadOffset): Promise<number>;
26
+ readInt64(offset: number | GgufReadOffset): Promise<bigint>;
27
+ readFloat32(offset: number | GgufReadOffset): Promise<number>;
28
+ readFloat64(offset: number | GgufReadOffset): Promise<number>;
29
+ readBool(offset: number | GgufReadOffset): Promise<boolean>;
30
+ protected _addToBuffer(buffer: Buffer): void;
31
+ private _readByteRangeAndUpdateOffset;
32
+ static castNumberIfSafe(value: bigint): number | bigint;
33
+ }