node-llama-cpp 3.0.0-beta.11 → 3.0.0-beta.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/README.md +4 -4
  2. package/dist/ChatWrapper.d.ts +1 -0
  3. package/dist/ChatWrapper.js +2 -1
  4. package/dist/ChatWrapper.js.map +1 -1
  5. package/dist/TemplateChatWrapper.d.ts +67 -0
  6. package/dist/TemplateChatWrapper.js +239 -0
  7. package/dist/TemplateChatWrapper.js.map +1 -0
  8. package/dist/bindings/AddonTypes.d.ts +2 -0
  9. package/dist/bindings/Llama.d.ts +1 -2
  10. package/dist/bindings/Llama.js +10 -14
  11. package/dist/bindings/Llama.js.map +1 -1
  12. package/dist/bindings/consts.d.ts +2 -0
  13. package/dist/bindings/consts.js +11 -0
  14. package/dist/bindings/consts.js.map +1 -0
  15. package/dist/bindings/getLlama.d.ts +14 -12
  16. package/dist/bindings/getLlama.js +210 -75
  17. package/dist/bindings/getLlama.js.map +1 -1
  18. package/dist/bindings/types.d.ts +8 -4
  19. package/dist/bindings/types.js +18 -0
  20. package/dist/bindings/types.js.map +1 -1
  21. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  22. package/dist/bindings/utils/asyncEvery.js +15 -0
  23. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  24. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  25. package/dist/bindings/utils/asyncSome.js +27 -0
  26. package/dist/bindings/utils/asyncSome.js.map +1 -0
  27. package/dist/bindings/utils/cloneLlamaCppRepo.js +13 -3
  28. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
  29. package/dist/bindings/utils/compileLLamaCpp.js +31 -3
  30. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  31. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +11 -0
  32. package/dist/bindings/utils/detectAvailableComputeLayers.js +158 -0
  33. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  34. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  35. package/dist/bindings/utils/detectGlibc.js +36 -0
  36. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  37. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  38. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  39. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  40. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +12 -4
  41. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  42. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
  43. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  44. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  45. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  46. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  47. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  48. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  49. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  50. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  51. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  52. package/dist/bindings/utils/hasFileInPath.js +34 -0
  53. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  54. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
  55. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
  56. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
  57. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +13 -0
  58. package/dist/bindings/utils/logDistroInstallInstruction.js +38 -0
  59. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  60. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +9 -2
  61. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +10 -4
  62. package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -1
  63. package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -0
  64. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
  65. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  66. package/dist/bindings/utils/testBindingBinary.js +98 -0
  67. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  68. package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
  69. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  70. package/dist/chatWrappers/GemmaChatWrapper.d.ts +18 -0
  71. package/dist/chatWrappers/GemmaChatWrapper.js +86 -0
  72. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  73. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +3 -0
  74. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  75. package/dist/cli/cli.js +2 -0
  76. package/dist/cli/cli.js.map +1 -1
  77. package/dist/cli/commands/BuildCommand.d.ts +5 -4
  78. package/dist/cli/commands/BuildCommand.js +78 -58
  79. package/dist/cli/commands/BuildCommand.js.map +1 -1
  80. package/dist/cli/commands/DebugCommand.js +12 -15
  81. package/dist/cli/commands/DebugCommand.js.map +1 -1
  82. package/dist/cli/commands/DownloadCommand.d.ts +5 -4
  83. package/dist/cli/commands/DownloadCommand.js +97 -54
  84. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  85. package/dist/cli/commands/InspectCommand.d.ts +7 -0
  86. package/dist/cli/commands/InspectCommand.js +113 -0
  87. package/dist/cli/commands/InspectCommand.js.map +1 -0
  88. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  89. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  90. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  91. package/dist/config.d.ts +3 -2
  92. package/dist/config.js +12 -10
  93. package/dist/config.js.map +1 -1
  94. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  95. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  96. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
  97. package/dist/gguf/GGUFInsights.d.ts +28 -0
  98. package/dist/gguf/GGUFInsights.js +58 -0
  99. package/dist/gguf/GGUFInsights.js.map +1 -0
  100. package/dist/gguf/GGUFMetadata.d.ts +19 -0
  101. package/dist/gguf/GGUFMetadata.js +38 -0
  102. package/dist/gguf/GGUFMetadata.js.map +1 -0
  103. package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +3 -0
  104. package/dist/gguf/errors/InvalidGGUFMagicError.js +6 -0
  105. package/dist/gguf/errors/InvalidGGUFMagicError.js.map +1 -0
  106. package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +3 -0
  107. package/dist/gguf/errors/MetadataNotParsedYetError.js +6 -0
  108. package/dist/gguf/errors/MetadataNotParsedYetError.js.map +1 -0
  109. package/dist/gguf/errors/MissingNodeLlamaError.d.ts +3 -0
  110. package/dist/gguf/errors/MissingNodeLlamaError.js +6 -0
  111. package/dist/gguf/errors/MissingNodeLlamaError.js.map +1 -0
  112. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +5 -0
  113. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +12 -0
  114. package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -0
  115. package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +4 -0
  116. package/dist/gguf/errors/UnsupportedMetadataTypeError.js +8 -0
  117. package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +1 -0
  118. package/dist/gguf/ggufParser/GGUFParser.d.ts +18 -0
  119. package/dist/gguf/ggufParser/GGUFParser.js +123 -0
  120. package/dist/gguf/ggufParser/GGUFParser.js.map +1 -0
  121. package/dist/gguf/ggufParser/GGUFTypes.d.ts +257 -0
  122. package/dist/gguf/ggufParser/GGUFTypes.js +2 -0
  123. package/dist/gguf/ggufParser/GGUFTypes.js.map +1 -0
  124. package/dist/gguf/ggufParser/checkArchitecture.d.ts +14 -0
  125. package/dist/gguf/ggufParser/checkArchitecture.js +74 -0
  126. package/dist/gguf/ggufParser/checkArchitecture.js.map +1 -0
  127. package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +38 -0
  128. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +83 -0
  129. package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +1 -0
  130. package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +14 -0
  131. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +35 -0
  132. package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +1 -0
  133. package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +15 -0
  134. package/dist/gguf/ggufParser/stream/GGUFReadStream.js +40 -0
  135. package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -0
  136. package/dist/index.d.ts +3 -1
  137. package/dist/index.js +3 -1
  138. package/dist/index.js.map +1 -1
  139. package/dist/utils/LlamaText.js +2 -2
  140. package/dist/utils/LlamaText.js.map +1 -1
  141. package/dist/utils/cmake.js +23 -10
  142. package/dist/utils/cmake.js.map +1 -1
  143. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  144. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  145. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  146. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  147. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  148. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  149. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  150. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  151. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  152. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  153. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  154. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  155. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  156. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  157. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  158. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  159. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +1 -1
  160. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  161. package/dist/utils/getBuildDefaults.d.ts +1 -2
  162. package/dist/utils/getBuildDefaults.js +2 -3
  163. package/dist/utils/getBuildDefaults.js.map +1 -1
  164. package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
  165. package/dist/utils/getConsoleLogPrefix.js +2 -2
  166. package/dist/utils/getConsoleLogPrefix.js.map +1 -1
  167. package/dist/utils/mergeUnionTypes.d.ts +6 -0
  168. package/dist/utils/mergeUnionTypes.js +2 -0
  169. package/dist/utils/mergeUnionTypes.js.map +1 -0
  170. package/dist/utils/parseTextTemplate.d.ts +66 -0
  171. package/dist/utils/parseTextTemplate.js +116 -0
  172. package/dist/utils/parseTextTemplate.js.map +1 -0
  173. package/llama/CMakeLists.txt +30 -4
  174. package/llama/addon.cpp +62 -7
  175. package/llama/binariesGithubRelease.json +1 -1
  176. package/llama/gitRelease.bundle +0 -0
  177. package/llama/gpuInfo/cuda-gpu-info.cu +5 -5
  178. package/llama/gpuInfo/cuda-gpu-info.h +2 -2
  179. package/llama/gpuInfo/vulkan-gpu-info.cpp +65 -0
  180. package/llama/gpuInfo/vulkan-gpu-info.h +7 -0
  181. package/llama/llama.cpp.info.json +1 -1
  182. package/llamaBins/linux-arm64/.buildMetadata.json +1 -1
  183. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  184. package/llamaBins/linux-armv7l/.buildMetadata.json +1 -1
  185. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  186. package/llamaBins/linux-x64/.buildMetadata.json +1 -1
  187. package/llamaBins/linux-x64/llama-addon.node +0 -0
  188. package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -1
  189. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  190. package/llamaBins/linux-x64-vulkan/.buildMetadata.json +1 -0
  191. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  192. package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -1
  193. package/llamaBins/mac-arm64-metal/ggml-metal.metal +1382 -142
  194. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  195. package/llamaBins/mac-x64/.buildMetadata.json +1 -1
  196. package/llamaBins/mac-x64/llama-addon.node +0 -0
  197. package/llamaBins/win-x64/.buildMetadata.json +1 -1
  198. package/llamaBins/win-x64/llama-addon.exp +0 -0
  199. package/llamaBins/win-x64/llama-addon.lib +0 -0
  200. package/llamaBins/win-x64/llama-addon.node +0 -0
  201. package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -1
  202. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  203. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  204. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  205. package/llamaBins/win-x64-vulkan/.buildMetadata.json +1 -0
  206. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  207. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  208. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  209. package/package.json +7 -4
@@ -0,0 +1,116 @@
1
+ import { splitText } from "lifecycle-utils";
2
+ /**
3
+ * Parses a text template into a map of parts and their prefixes and suffixes.
4
+ * This parser assumes each part occurs exactly once in the template, and that all parts must occur in the order they are defined.
5
+ * For example,
6
+ * ```typescript
7
+ * const res = parseTextTemplate(
8
+ * "Hello, {{name}}! What is the {{thing}}?",
9
+ * [{
10
+ * key: "name",
11
+ * text: "{{name}}"
12
+ * }, {
13
+ * key: "thing",
14
+ * text: "{{thing}}"
15
+ * }]
16
+ * );
17
+ * expect(res).to.eql({
18
+ * name: {
19
+ * prefix: "Hello, ",
20
+ * suffix: "! What is the "
21
+ * },
22
+ * thing: {
23
+ * prefix: "What is the ",
24
+ * suffix: "?"
25
+ * }
26
+ * });
27
+ * ```
28
+ *
29
+ * ```typescript
30
+ * const res2 = parseTextTemplate(
31
+ * "What is the {{thing}}?",
32
+ * [{
33
+ * key: "name",
34
+ * text: "{{name}}",
35
+ * optional: true
36
+ * }, {
37
+ * key: "thing",
38
+ * text: "{{thing}}"
39
+ * }]
40
+ * );
41
+ * expect(res2).to.eql({
42
+ * thing: {
43
+ * prefix: "What is the ",
44
+ * suffix: "?"
45
+ * }
46
+ * });
47
+ * ```
48
+ */
49
+ export function parseTextTemplate(template, parts) {
50
+ const result = {};
51
+ const templateParts = splitText(template, parts.map((part) => part.text));
52
+ let partIndex = 0;
53
+ for (let i = 0; i < templateParts.length; i++) {
54
+ const textPart = templateParts[i];
55
+ if (typeof textPart === "string")
56
+ continue;
57
+ for (; partIndex < parts.length; partIndex++) {
58
+ const part = parts[partIndex];
59
+ if (textPart.separator === part.text) {
60
+ const previousItem = i > 0
61
+ ? templateParts[i - 1]
62
+ : null;
63
+ const nextItem = i < templateParts.length - 1
64
+ ? templateParts[i + 1]
65
+ : null;
66
+ result[part.key] = {
67
+ prefix: typeof previousItem === "string"
68
+ ? previousItem
69
+ : "",
70
+ suffix: typeof nextItem === "string"
71
+ ? nextItem
72
+ : ""
73
+ };
74
+ partIndex++;
75
+ break;
76
+ }
77
+ if (part.optional != true) {
78
+ if (result[part.key] != null)
79
+ throw new Error(`Template must contain exactly one "${part.text}"`);
80
+ else if (partIndex > 0) {
81
+ const previousNonOptionalOrFoundPart = parts
82
+ .slice(0, partIndex)
83
+ .reverse()
84
+ .find((p) => (p.optional != true || result[p.key] != null));
85
+ if (previousNonOptionalOrFoundPart != null)
86
+ throw new Error(`Template must contain "${part.text}" after "${previousNonOptionalOrFoundPart.text}"`);
87
+ throw new Error(`Template must contain "${part.text}" at the beginning`);
88
+ }
89
+ else
90
+ throw new Error(`Template must contain "${part.text}" at the beginning`);
91
+ }
92
+ else
93
+ result[part.key] = undefined;
94
+ }
95
+ }
96
+ for (; partIndex < parts.length; partIndex++) {
97
+ const part = parts[partIndex];
98
+ if (part.optional == true) {
99
+ result[part.key] = undefined;
100
+ continue;
101
+ }
102
+ if (partIndex > 0) {
103
+ const previousNonOptionalOrFoundPart = parts
104
+ .slice(0, partIndex)
105
+ .reverse()
106
+ .find((p) => (p.optional != true || result[p.key] != null));
107
+ if (previousNonOptionalOrFoundPart != null)
108
+ throw new Error(`Template must contain "${part.text}" after "${previousNonOptionalOrFoundPart.text}"`);
109
+ throw new Error(`Template must contain "${part.text}" at the beginning`);
110
+ }
111
+ else
112
+ throw new Error(`Template must contain "${part.text}" at the beginning`);
113
+ }
114
+ return result;
115
+ }
116
+ //# sourceMappingURL=parseTextTemplate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseTextTemplate.js","sourceRoot":"","sources":["../../src/utils/parseTextTemplate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,SAAS,EAAC,MAAM,iBAAiB,CAAC;AAG1C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8CG;AACH,MAAM,UAAU,iBAAiB,CAC7B,QAAgB,EAAE,KAAY;IAE9B,MAAM,MAAM,GAKR,EAAE,CAAC;IAEP,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAE1E,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;QAElC,IAAI,OAAO,QAAQ,KAAK,QAAQ;YAC5B,SAAS;QAEb,OAAO,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE;YAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;YAE9B,IAAI,QAAQ,CAAC,SAAS,KAAK,IAAI,CAAC,IAAI,EAAE;gBAClC,MAAM,YAAY,GAAG,CAAC,GAAG,CAAC;oBACtB,CAAC,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC,CAAC,IAAI,CAAC;gBACX,MAAM,QAAQ,GAAG,CAAC,GAAG,aAAa,CAAC,MAAM,GAAG,CAAC;oBACzC,CAAC,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,CAAC;oBACtB,CAAC,CAAC,IAAI,CAAC;gBAEX,MAAM,CAAC,IAAI,CAAC,GAA2B,CAAC,GAAG;oBACvC,MAAM,EAAE,OAAO,YAAY,KAAK,QAAQ;wBACpC,CAAC,CAAC,YAAY;wBACd,CAAC,CAAC,EAAE;oBACR,MAAM,EAAE,OAAO,QAAQ,KAAK,QAAQ;wBAChC,CAAC,CAAC,QAAQ;wBACV,CAAC,CAAC,EAAE;iBACX,CAAC;gBACF,SAAS,EAAE,CAAC;gBACZ,MAAM;aACT;YAED,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;gBACvB,IAAI,MAAM,CAAC,IAAI,CAAC,GAA2B,CAAC,IAAI,IAAI;oBAChD,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;qBACnE,IAAI,SAAS,GAAG,CAAC,EAAE;oBACpB,MAAM,8BAA8B,GAAG,KAAK;yBACvC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC;yBACnB,OAAO,EAAE;yBACT,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,GAA2B,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;oBAExF,IAAI,8BAA8B,IAAI,IAAI;wBACtC,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,YAAY,8BAA8B,CAAC,IAAI,GAAG,CAAC,CAAC;oBAE3G,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,oBAAoB,CAAC,CAAC;iBAC5E;;oBACG,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,oBAAoB,CAAC,CAAC;aAChF;;gBACG,MAAM,CAAC,IAAI,CAAC,GAA2B,CAAC,GAAG,SAAS,CAAC;SAC5D;KACJ;IAED,OAAO,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;QAE9B,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACvB,MAAM,CAAC,IAAI,CAAC,GAA2B,CAAC,GAAG,SAAS,CAAC;YACrD,SAAS;SACZ;QAED,IAAI,SAAS,GAAG,CAAC,EAAE;YACf,MAAM,8BAA8B,GAAG,KAAK;iBACvC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC;iBACnB,OAAO,EAAE;iBACT,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,GAA2B,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;YAExF,IAAI,8BAA8B,IAAI,IAAI;gBACtC,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,YAAY,8BAA8B,CAAC,IAAI,GAAG,CAAC,CAAC;YAE3G,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,oBAAoB,CAAC,CAAC;SAC5E;;YACG,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,CAAC,IAAI,oBAAoB,CAAC,CAAC;KAChF;IAED,OAAO,MAA0C,CAAC;AACtD,CAAC"}
@@ -3,9 +3,15 @@ cmake_minimum_required(VERSION 3.13)
3
3
  project("llama-addon" C CXX)
4
4
 
5
5
  if (MSVC)
6
- # add_compile_options(/EHsc)
6
+ if (LLAMA_STATIC)
7
+ add_link_options(-static)
8
+ if (MINGW)
9
+ add_link_options(-static-libgcc -static-libstdc++)
10
+ endif()
11
+ endif()
12
+ # add_compile_options(/EHsc)
7
13
  else()
8
- add_compile_options(-fexceptions)
14
+ add_compile_options(-fexceptions)
9
15
  endif()
10
16
 
11
17
  add_definitions(-DNAPI_VERSION=7)
@@ -54,7 +60,27 @@ if (LLAMA_CUBLAS)
54
60
  endif()
55
61
  endif()
56
62
  else()
57
- message(WARNING "cuBLAS not found. Not using it for GPU info")
63
+ message(FATAL_ERROR "cuBLAS was not found")
64
+ endif()
65
+ endif()
66
+
67
+ if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
68
+ find_package(Vulkan)
69
+ if (Vulkan_FOUND)
70
+ if (LLAMA_VULKAN)
71
+ message(STATUS "Using Vulkan for GPU info")
72
+ elseif (LLAMA_KOMPUTE)
73
+ message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
74
+ endif()
75
+
76
+ set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
77
+ set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)
78
+
79
+ add_compile_definitions(GPU_INFO_USE_VULKAN)
80
+
81
+ set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan)
82
+ else()
83
+ message(FATAL_ERROR "Vulkan was not found")
58
84
  endif()
59
85
  endif()
60
86
 
@@ -81,7 +107,7 @@ if (LLAMA_HIPBLAS)
81
107
 
82
108
  set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} gpu-info-rocm)
83
109
  else()
84
- message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
110
+ message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
85
111
  endif()
86
112
  endif()
87
113
 
package/llama/addon.cpp CHANGED
@@ -12,6 +12,9 @@
12
12
  #ifdef GPU_INFO_USE_CUBLAS
13
13
  # include "gpuInfo/cuda-gpu-info.h"
14
14
  #endif
15
+ #ifdef GPU_INFO_USE_VULKAN
16
+ # include "gpuInfo/vulkan-gpu-info.h"
17
+ #endif
15
18
  #ifdef GPU_INFO_USE_METAL
16
19
  # include "gpuInfo/metal-gpu-info.h"
17
20
  #endif
@@ -35,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
35
38
  AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
36
39
  bool addonJsLoggerCallbackSet = false;
37
40
  int addonLoggerLogLevel = 5;
41
+ bool backendInitialized = false;
38
42
 
39
43
  std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
40
44
  std::vector<char> result(8, 0);
@@ -51,10 +55,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
51
55
  }
52
56
 
53
57
  #ifdef GPU_INFO_USE_CUBLAS
54
- void lodCudaError(const char* message) {
58
+ void logCudaError(const char* message) {
55
59
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56
60
  }
57
61
  #endif
62
+ #ifdef GPU_INFO_USE_VULKAN
63
+ void logVulkanWarning(const char* message) {
64
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
65
+ }
66
+ #endif
58
67
 
59
68
  Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60
69
  uint64_t total = 0;
@@ -63,7 +72,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
63
72
  #ifdef GPU_INFO_USE_CUBLAS
64
73
  size_t cudaDeviceTotal = 0;
65
74
  size_t cudaDeviceUsed = 0;
66
- bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
75
+ bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
67
76
 
68
77
  if (cudeGetInfoSuccess) {
69
78
  total += cudaDeviceTotal;
@@ -71,6 +80,17 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
71
80
  }
72
81
  #endif
73
82
 
83
+ #ifdef GPU_INFO_USE_VULKAN
84
+ uint64_t vulkanDeviceTotal = 0;
85
+ uint64_t vulkanDeviceUsed = 0;
86
+ const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
87
+
88
+ if (vulkanDeviceSupportsMemoryBudgetExtension) {
89
+ total += vulkanDeviceTotal;
90
+ used += vulkanDeviceUsed;
91
+ }
92
+ #endif
93
+
74
94
  #ifdef GPU_INFO_USE_METAL
75
95
  uint64_t metalDeviceTotal = 0;
76
96
  uint64_t metalDeviceUsed = 0;
@@ -87,6 +107,22 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
87
107
  return result;
88
108
  }
89
109
 
110
+ Napi::Value getGpuType(const Napi::CallbackInfo& info) {
111
+ #ifdef GPU_INFO_USE_CUBLAS
112
+ return Napi::String::New(info.Env(), "cuda");
113
+ #endif
114
+
115
+ #ifdef GPU_INFO_USE_VULKAN
116
+ return Napi::String::New(info.Env(), "vulkan");
117
+ #endif
118
+
119
+ #ifdef GPU_INFO_USE_METAL
120
+ return Napi::String::New(info.Env(), "metal");
121
+ #endif
122
+
123
+ return info.Env().Undefined();
124
+ }
125
+
90
126
  static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
91
127
  auto tokenType = llama_token_get_type(model, token);
92
128
 
@@ -100,7 +136,7 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
100
136
  static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
101
137
  auto tokenType = llama_token_get_type(model, token);
102
138
 
103
- if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
139
+ if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
104
140
  return Napi::Number::From(info.Env(), -1);
105
141
  }
106
142
 
@@ -641,7 +677,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
641
677
  int32_t endPos = info[2].As<Napi::Number>().Int32Value();
642
678
  int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
643
679
 
644
- llama_kv_cache_seq_shift(ctx, sequenceId, startPos, endPos, shiftDelta);
680
+ llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
645
681
 
646
682
  return info.Env().Undefined();
647
683
  }
@@ -950,7 +986,7 @@ void addonCallJsLogCallback(
950
986
  called = false;
951
987
  }
952
988
  }
953
-
989
+
954
990
  if (!called && data != nullptr) {
955
991
  if (data->logLevelNumber == 2) {
956
992
  fputs(data->stringStream->str().c_str(), stderr);
@@ -1046,20 +1082,39 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1046
1082
  return info.Env().Undefined();
1047
1083
  }
1048
1084
 
1085
+ Napi::Value addonInit(const Napi::CallbackInfo& info) {
1086
+ if (!backendInitialized) {
1087
+ llama_backend_init();
1088
+ backendInitialized = true;
1089
+ }
1090
+
1091
+ llama_log_set(addonLlamaCppLogCallback, nullptr);
1092
+
1093
+ return info.Env().Undefined();
1094
+ }
1095
+
1096
+ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1097
+ if (backendInitialized) {
1098
+ llama_backend_free();
1099
+ backendInitialized = false;
1100
+ }
1101
+ }
1102
+
1049
1103
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1050
- llama_backend_init();
1051
1104
  exports.DefineProperties({
1052
1105
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1053
1106
  Napi::PropertyDescriptor::Function("setLogger", setLogger),
1054
1107
  Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1055
1108
  Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1109
+ Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1110
+ Napi::PropertyDescriptor::Function("init", addonInit),
1056
1111
  });
1057
1112
  AddonModel::init(exports);
1058
1113
  AddonGrammar::init(exports);
1059
1114
  AddonGrammarEvaluationState::init(exports);
1060
1115
  AddonContext::init(exports);
1061
1116
 
1062
- llama_log_set(addonLlamaCppLogCallback, nullptr);
1117
+ exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
1063
1118
 
1064
1119
  return exports;
1065
1120
  }
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b2174"
2
+ "release": "b2329"
3
3
  }
Binary file
@@ -15,9 +15,9 @@
15
15
  #endif
16
16
 
17
17
 
18
- typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
18
+ typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
19
19
 
20
- bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
20
+ bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
21
21
  int current_device;
22
22
  auto getDeviceResult = cudaGetDevice(&current_device);
23
23
 
@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
40
40
  return true;
41
41
  }
42
42
 
43
- bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
43
+ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
44
44
  gpuInfoSetCudaDevice(device, errorLogCallback);
45
45
 
46
46
  size_t freeMem;
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
58
58
  return true;
59
59
  }
60
60
 
61
- int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
61
+ int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
62
62
  int deviceCount;
63
63
  auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
64
64
 
@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
70
70
  return deviceCount;
71
71
  }
72
72
 
73
- bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
73
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
74
74
  int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
75
75
 
76
76
  if (deviceCount < 0) {
@@ -2,6 +2,6 @@
2
2
 
3
3
  #include <stddef.h>
4
4
 
5
- typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
5
+ typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
6
6
 
7
- bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
7
+ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
@@ -0,0 +1,65 @@
1
+ #include <stddef.h>
2
+
3
+ #include <vulkan/vulkan.hpp>
4
+
5
+ typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
8
+ vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
9
+ vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
10
+ vk::Instance instance = vk::createInstance(createInfo);
11
+
12
+ auto physicalDevices = instance.enumeratePhysicalDevices();
13
+
14
+ size_t usedMem = 0;
15
+ size_t totalMem = 0;
16
+
17
+ for (size_t i = 0; i < physicalDevices.size(); i++) {
18
+ vk::PhysicalDevice physicalDevice = physicalDevices[i];
19
+ vk::PhysicalDeviceMemoryProperties memProps = physicalDevice.getMemoryProperties();
20
+ vk::PhysicalDeviceProperties deviceProps = physicalDevice.getProperties();
21
+
22
+ if (deviceProps.deviceType == vk::PhysicalDeviceType::eCpu) {
23
+ // ignore CPU devices, as we don't want to count RAM from the CPU as VRAM
24
+ continue;
25
+ }
26
+
27
+ std::vector<vk::ExtensionProperties> extensionProperties = physicalDevice.enumerateDeviceExtensionProperties();
28
+ bool memoryBudgetExtensionSupported =
29
+ std::any_of(
30
+ extensionProperties.begin(),
31
+ extensionProperties.end(),
32
+ [](const vk::ExtensionProperties& ext) { return std::string(ext.extensionName.data()) == VK_EXT_MEMORY_BUDGET_EXTENSION_NAME;}
33
+ );
34
+
35
+ if (memoryBudgetExtensionSupported) {
36
+ vk::PhysicalDeviceMemoryBudgetPropertiesEXT memoryBudgetProperties;
37
+ vk::PhysicalDeviceMemoryProperties2 memProps2 = {};
38
+ memProps2.pNext = &memoryBudgetProperties;
39
+
40
+ physicalDevice.getMemoryProperties2(&memProps2);
41
+
42
+ for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
43
+ if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
44
+ totalMem += memProps.memoryHeaps[i].size;
45
+ usedMem += memoryBudgetProperties.heapUsage[i];
46
+ break;
47
+ }
48
+ }
49
+ } else {
50
+ // VK_EXT_memory_budget extension is not supported, so we cannot determine used memory
51
+ warningLogCallback(
52
+ (
53
+ "Vulkan VK_EXT_memory_budget extension not supported for device \"" +
54
+ std::string(deviceProps.deviceName.data()) + "\", so VRAM info cannot be determained for it"
55
+ )
56
+ .c_str()
57
+ );
58
+ return false;
59
+ }
60
+ }
61
+
62
+ *total = totalMem;
63
+ *used = usedMem;
64
+ return true;
65
+ }
@@ -0,0 +1,7 @@
1
+ #pragma once
2
+
3
+ #include <stddef.h>
4
+
5
+ typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
6
+
7
+ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b2174",
2
+ "tag": "b2329",
3
3
  "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
4
  }
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}
@@ -0,0 +1 @@
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}
@@ -1 +1 @@
1
- {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
1
+ {"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2329"}}}