node-llama-cpp 1.3.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +100 -28
  2. package/dist/ChatPromptWrapper.d.ts +3 -0
  3. package/dist/ChatPromptWrapper.js.map +1 -1
  4. package/dist/chatWrappers/ChatMLPromptWrapper.d.ts +11 -0
  5. package/dist/chatWrappers/ChatMLPromptWrapper.js +19 -0
  6. package/dist/chatWrappers/ChatMLPromptWrapper.js.map +1 -0
  7. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +1 -0
  8. package/dist/chatWrappers/EmptyChatPromptWrapper.js +1 -0
  9. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +1 -1
  10. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +11 -1
  11. package/dist/chatWrappers/GeneralChatPromptWrapper.js +28 -4
  12. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +1 -1
  13. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +4 -1
  14. package/dist/chatWrappers/LlamaChatPromptWrapper.js +9 -5
  15. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +1 -1
  16. package/dist/chatWrappers/createChatWrapperByBos.d.ts +2 -0
  17. package/dist/chatWrappers/createChatWrapperByBos.js +14 -0
  18. package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -0
  19. package/dist/cli/commands/BuildCommand.d.ts +3 -1
  20. package/dist/cli/commands/BuildCommand.js +24 -2
  21. package/dist/cli/commands/BuildCommand.js.map +1 -1
  22. package/dist/cli/commands/ChatCommand.d.ts +7 -1
  23. package/dist/cli/commands/ChatCommand.js +87 -12
  24. package/dist/cli/commands/ChatCommand.js.map +1 -1
  25. package/dist/cli/commands/ClearCommand.js +1 -1
  26. package/dist/cli/commands/ClearCommand.js.map +1 -1
  27. package/dist/cli/commands/DownloadCommand.d.ts +4 -1
  28. package/dist/cli/commands/DownloadCommand.js +71 -70
  29. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  30. package/dist/cli/commands/OnPostInstallCommand.js +4 -2
  31. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  32. package/dist/config.d.ts +5 -0
  33. package/dist/config.js +11 -1
  34. package/dist/config.js.map +1 -1
  35. package/dist/index.d.ts +5 -1
  36. package/dist/index.js +4 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
  39. package/dist/llamaEvaluator/LlamaBins.js +2 -2
  40. package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
  41. package/dist/llamaEvaluator/LlamaChatSession.d.ts +7 -2
  42. package/dist/llamaEvaluator/LlamaChatSession.js +51 -11
  43. package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
  44. package/dist/llamaEvaluator/LlamaContext.d.ts +31 -2
  45. package/dist/llamaEvaluator/LlamaContext.js +74 -7
  46. package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
  47. package/dist/llamaEvaluator/LlamaGrammar.d.ts +14 -0
  48. package/dist/llamaEvaluator/LlamaGrammar.js +30 -0
  49. package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -0
  50. package/dist/llamaEvaluator/LlamaModel.d.ts +49 -1
  51. package/dist/llamaEvaluator/LlamaModel.js +25 -9
  52. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  53. package/dist/types.d.ts +1 -0
  54. package/dist/types.js +2 -0
  55. package/dist/types.js.map +1 -0
  56. package/dist/utils/binariesGithubRelease.d.ts +6 -0
  57. package/dist/utils/binariesGithubRelease.js +15 -0
  58. package/dist/utils/binariesGithubRelease.js.map +1 -0
  59. package/dist/utils/compileLLamaCpp.d.ts +3 -1
  60. package/dist/utils/compileLLamaCpp.js +34 -4
  61. package/dist/utils/compileLLamaCpp.js.map +1 -1
  62. package/dist/utils/getBin.d.ts +18 -4
  63. package/dist/utils/getBin.js +4 -2
  64. package/dist/utils/getBin.js.map +1 -1
  65. package/dist/utils/getGrammarsFolder.d.ts +1 -0
  66. package/dist/utils/getGrammarsFolder.js +18 -0
  67. package/dist/utils/getGrammarsFolder.js.map +1 -0
  68. package/dist/utils/getTextCompletion.d.ts +3 -0
  69. package/dist/utils/getTextCompletion.js +12 -0
  70. package/dist/utils/getTextCompletion.js.map +1 -0
  71. package/dist/utils/removeNullFields.d.ts +1 -0
  72. package/dist/utils/removeNullFields.js +9 -0
  73. package/dist/utils/removeNullFields.js.map +1 -0
  74. package/dist/utils/spawnCommand.d.ts +2 -1
  75. package/dist/utils/spawnCommand.js +2 -2
  76. package/dist/utils/spawnCommand.js.map +1 -1
  77. package/llama/addon.cpp +180 -42
  78. package/llama/binariesGithubRelease.json +3 -0
  79. package/llama/binding.gyp +6 -3
  80. package/llama/grammars/README.md +91 -0
  81. package/llama/grammars/arithmetic.gbnf +6 -0
  82. package/llama/grammars/chess.gbnf +13 -0
  83. package/llama/grammars/japanese.gbnf +7 -0
  84. package/llama/grammars/json.gbnf +25 -0
  85. package/llama/grammars/list.gbnf +4 -0
  86. package/llamaBins/linux-arm64-16.node +0 -0
  87. package/llamaBins/linux-arm64-17.node +0 -0
  88. package/llamaBins/linux-arm64-18.node +0 -0
  89. package/llamaBins/linux-arm64-19.node +0 -0
  90. package/llamaBins/linux-arm64-20.node +0 -0
  91. package/llamaBins/linux-armv7l-16.node +0 -0
  92. package/llamaBins/linux-armv7l-17.node +0 -0
  93. package/llamaBins/linux-armv7l-18.node +0 -0
  94. package/llamaBins/linux-armv7l-19.node +0 -0
  95. package/llamaBins/linux-armv7l-20.node +0 -0
  96. package/llamaBins/linux-ppc64le-16.node +0 -0
  97. package/llamaBins/linux-ppc64le-17.node +0 -0
  98. package/llamaBins/linux-ppc64le-18.node +0 -0
  99. package/llamaBins/linux-ppc64le-19.node +0 -0
  100. package/llamaBins/linux-ppc64le-20.node +0 -0
  101. package/llamaBins/linux-x64-16.node +0 -0
  102. package/llamaBins/linux-x64-17.node +0 -0
  103. package/llamaBins/linux-x64-18.node +0 -0
  104. package/llamaBins/linux-x64-19.node +0 -0
  105. package/llamaBins/linux-x64-20.node +0 -0
  106. package/llamaBins/mac-arm64-16.node +0 -0
  107. package/llamaBins/mac-arm64-17.node +0 -0
  108. package/llamaBins/mac-arm64-18.node +0 -0
  109. package/llamaBins/mac-arm64-19.node +0 -0
  110. package/llamaBins/mac-arm64-20.node +0 -0
  111. package/llamaBins/mac-x64-16.node +0 -0
  112. package/llamaBins/mac-x64-17.node +0 -0
  113. package/llamaBins/mac-x64-18.node +0 -0
  114. package/llamaBins/mac-x64-19.node +0 -0
  115. package/llamaBins/mac-x64-20.node +0 -0
  116. package/llamaBins/win-x64-16.node +0 -0
  117. package/llamaBins/win-x64-17.node +0 -0
  118. package/llamaBins/win-x64-18.node +0 -0
  119. package/llamaBins/win-x64-19.node +0 -0
  120. package/llamaBins/win-x64-20.node +0 -0
  121. package/package.json +12 -6
@@ -7,14 +7,44 @@ import { clearLlamaBuild } from "./clearLlamaBuild.js";
7
7
  import { setUsedBinFlag } from "./usedBinFlag.js";
8
8
  import { spawnCommand } from "./spawnCommand.js";
9
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
- export async function compileLlamaCpp({ arch = process.arch, nodeTarget = process.version, setUsedBingFlag = true }) {
10
+ export async function compileLlamaCpp({ arch = process.arch, nodeTarget = process.version, setUsedBingFlag = true, metal = false, cuda = false }) {
11
11
  try {
12
12
  if (!(await fs.exists(llamaCppDirectory))) {
13
13
  throw new Error(`"${llamaCppDirectory}" directory does not exist`);
14
14
  }
15
+ const gypDefines = ["GGML_USE_K_QUANTS", "NAPI_CPP_EXCEPTIONS"];
16
+ if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1")
17
+ gypDefines.push("LLAMA_METAL=1");
18
+ if (cuda || process.env.LLAMA_CUBLAS === "1")
19
+ gypDefines.push("LLAMA_CUBLAS=1");
20
+ if (process.env.LLAMA_MPI === "1")
21
+ gypDefines.push("LLAMA_MPI=1");
22
+ if (process.env.LLAMA_OPENBLAS === "1")
23
+ gypDefines.push("LLAMA_OPENBLAS=1");
24
+ if (process.env.LLAMA_BLAS_VENDOR != null)
25
+ gypDefines.push("LLAMA_BLAS_VENDOR=" + process.env.LLAMA_BLAS_VENDOR);
26
+ if (process.env.LLAMA_CUDA_FORCE_DMMV != null)
27
+ gypDefines.push("LLAMA_CUDA_FORCE_DMMV=" + process.env.LLAMA_CUDA_FORCE_DMMV);
28
+ if (process.env.LLAMA_CUDA_DMMV_X != null)
29
+ gypDefines.push("LLAMA_CUDA_DMMV_X=" + process.env.LLAMA_CUDA_DMMV_X);
30
+ if (process.env.LLAMA_CUDA_MMV_Y != null)
31
+ gypDefines.push("LLAMA_CUDA_MMV_Y=" + process.env.LLAMA_CUDA_MMV_Y);
32
+ if (process.env.LLAMA_CUDA_F16 != null)
33
+ gypDefines.push("LLAMA_CUDA_F16=" + process.env.LLAMA_CUDA_F16);
34
+ if (process.env.LLAMA_CUDA_KQUANTS_ITER != null)
35
+ gypDefines.push("LLAMA_CUDA_KQUANTS_ITER=" + process.env.LLAMA_CUDA_KQUANTS_ITER);
36
+ if (process.env.LLAMA_HIPBLAS === "1")
37
+ gypDefines.push("LLAMA_HIPBLAS=1");
38
+ if (process.env.LLAMA_CLBLAST === "1")
39
+ gypDefines.push("LLAMA_CLBLAST=1");
40
+ const nodeGypEnv = {
41
+ ...process.env,
42
+ "CMAKE_CURRENT_SOURCE_DIR": llamaCppDirectory,
43
+ "GYP_DEFINES": gypDefines.join(" ")
44
+ };
15
45
  await clearLlamaBuild();
16
- await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget], __dirname);
17
- await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget, "--", "-f", "compile_commands_json"], __dirname);
46
+ await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget], __dirname, nodeGypEnv);
47
+ await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget, "--", "-f", "compile_commands_json"], __dirname, nodeGypEnv);
18
48
  if (await fs.exists(path.join(llamaDirectory, "Release", "compile_commands.json"))) {
19
49
  await fs.move(path.join(llamaDirectory, "Release", "compile_commands.json"), path.join(llamaDirectory, "compile_commands.json"));
20
50
  }
@@ -23,7 +53,7 @@ export async function compileLlamaCpp({ arch = process.arch, nodeTarget = proces
23
53
  }
24
54
  await fs.remove(path.join(llamaDirectory, "Release"));
25
55
  await fs.remove(path.join(llamaDirectory, "Debug"));
26
- await spawnCommand("npm", ["run", "-s", "node-gyp-llama-build", "--", "--arch=" + arch, "--target=" + nodeTarget], __dirname);
56
+ await spawnCommand("npm", ["run", "-s", "node-gyp-llama-build", "--", "--arch=" + arch, "--target=" + nodeTarget], __dirname, nodeGypEnv);
27
57
  if (setUsedBingFlag) {
28
58
  await setUsedBinFlag("localBuildFromSource");
29
59
  }
@@ -1 +1 @@
1
- {"version":3,"file":"compileLLamaCpp.js","sourceRoot":"","sources":["../../src/utils/compileLLamaCpp.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAE,cAAc,EAAC,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAC,eAAe,EAAC,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,EAAC,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,eAAe,GAAG,IAAI,EAE/G;IACG,IAAI;QACA,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,EAAE;YACvC,MAAM,IAAI,KAAK,CAAC,IAAI,iBAAiB,4BAA4B,CAAC,CAAC;SACtE;QAED,MAAM,eAAe,EAAE,CAAC;QAExB,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,CAAC,CAAC;QAErI,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,uBAAuB,CAAC,EAAE,SAAS,CAAC,CAAC;QAE1K,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,CAAC,EAAE;YAChF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,EAC7D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;aAAM,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,CAAC,EAAE;YACrF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,EAC3D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;QAED,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC,CAAC;QACtD,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAGpD,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,CAAC,CAAC;QAE9H,IAAI,eAAe,EAAE;YACjB,MAAM,cAAc,CAAC,sBAAsB,CAAC,CAAC;SAChD;KACJ;IAAC,OAAO,GAAG,EAAE;QACV,IAAI,eAAe;YACf,MAAM,cAAc,CAAC,kBAAkB,CAAC,CAAC;QAE7C,MAAM,GAAG,CAAC;KACb;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,6BAA6B;IAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;IAE/F,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC3B,OAAO,UAAU,CAAC;IAEtB,OAAO,IAAI,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"compileLLamaCpp.js","sourceRoot":"","sources":["../../src/utils/compileLLamaCpp.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAE,cAAc,EAAC,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAC,eAAe,EAAC,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,EAClC,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,eAAe,GAAG,IAAI,EAAE,KAAK,GAAG,KAAK,EAAE,IAAI,GAAG,KAAK,EAGzG;IACG,IAAI;QACA,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,EAAE;YACvC,MAAM,IAAI,KAAK,CAAC,IAAI,iBAAiB,4BAA4B,CAAC,CAAC;SACtE;QAED,MAAM,UAAU,GAAG,CAAC,mBAAmB,EAAE,qBAAqB,CAAC,CAAC;QAEhE,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAClH,IAAI,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAChF,IAAI,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAClE,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC5E,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,oBAAoB,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QACjH,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,wBAAwB,GAAG,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QAC7H,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,oBAAoB,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QACjH,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAC9G,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACxG,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,0BAA0B,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACnI,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAC1E,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAE1E,MAAM,UAAU,GAAsB;YAClC,GAAG,OAAO,CAAC,GAAG;YACd,0BAA0B,EAAE,iBAAiB;YAC7C,aAAa,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC;SACtC,CAAC;QAEF,MAAM,eAAe,EAAE,CAAC;QAExB,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEjJ,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,uBAAuB,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEtL,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,CAAC,EAAE;YAChF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,EAC7D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;aAAM,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,CAAC,EAAE;YACrF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,EAC3D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;QAED,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC,CAAC;QACtD,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAGpD,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAE1I,IAAI,eAAe,EAAE;YACjB,MAAM,cAAc,CAAC,sBAAsB,CAAC,CAAC;SAChD;KACJ;IAAC,OAAO,GAAG,EAAE;QACV,IAAI,eAAe;YACf,MAAM,cAAc,CAAC,kBAAkB,CAAC,CAAC;QAE7C,MAAM,GAAG,CAAC;KACb;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,6BAA6B;IAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;IAE/F,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC3B,OAAO,UAAU,CAAC;IAEtB,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -3,9 +3,8 @@ export declare function loadBin(): Promise<LlamaCppNodeModule>;
3
3
  export type LlamaCppNodeModule = {
4
4
  LLAMAModel: LLAMAModel;
5
5
  LLAMAContext: LLAMAContext;
6
- tokenBos(): number;
6
+ LLAMAGrammar: LLAMAGrammar;
7
7
  systemInfo(): string;
8
- tokenEos(): number;
9
8
  };
10
9
  export type LLAMAModel = {
11
10
  new (modelPath: string, params: {
@@ -20,11 +19,26 @@ export type LLAMAModel = {
20
19
  useMmap?: boolean;
21
20
  useMlock?: boolean;
22
21
  embedding?: boolean;
22
+ temperature?: number;
23
+ topK?: number;
24
+ topP?: number;
23
25
  }): LLAMAModel;
24
26
  };
25
27
  export type LLAMAContext = {
26
- new (model: LLAMAModel): LLAMAContext;
28
+ new (model: LLAMAModel, params?: {
29
+ grammar?: LLAMAGrammar;
30
+ }): LLAMAContext;
27
31
  encode(text: string): Uint32Array;
28
- eval(tokens: Uint32Array, restrictions?: Uint32Array): Promise<number>;
32
+ eval(tokens: Uint32Array): Promise<number>;
29
33
  decode(tokens: Uint32Array): string;
34
+ tokenBos(): number;
35
+ tokenEos(): number;
36
+ tokenNl(): number;
37
+ getContextSize(): number;
38
+ getTokenString(token: number): string;
39
+ };
40
+ export type LLAMAGrammar = {
41
+ new (grammarPath: string, params?: {
42
+ printGrammar?: boolean;
43
+ }): LLAMAGrammar;
30
44
  };
@@ -3,7 +3,7 @@ import * as console from "console";
3
3
  import path from "path";
4
4
  import process from "process";
5
5
  import fs from "fs-extra";
6
- import { defaultLlamaCppGitHubRepo, defaultLlamaCppRelease, defaultSkipDownload, llamaBinsDirectory } from "../config.js";
6
+ import { defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultSkipDownload, llamaBinsDirectory } from "../config.js";
7
7
  import { DownloadLlamaCppCommand } from "../cli/commands/DownloadCommand.js";
8
8
  import { getUsedBinFlag } from "./usedBinFlag.js";
9
9
  import { getCompiledLlamaCppBinaryPath } from "./compileLLamaCpp.js";
@@ -56,7 +56,9 @@ export async function loadBin() {
56
56
  else {
57
57
  await DownloadLlamaCppCommand({
58
58
  repo: defaultLlamaCppGitHubRepo,
59
- release: defaultLlamaCppRelease
59
+ release: defaultLlamaCppRelease,
60
+ metal: defaultLlamaCppMetalSupport,
61
+ cuda: defaultLlamaCppCudaSupport
60
62
  });
61
63
  const modulePath = await getCompiledLlamaCppBinaryPath();
62
64
  if (modulePath == null) {
@@ -1 +1 @@
1
- {"version":3,"file":"getBin.js","sourceRoot":"","sources":["../../src/utils/getBin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,QAAQ,CAAC;AACrC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,yBAAyB,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,kBAAkB,EAAC,MAAM,cAAc,CAAC;AACxH,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,6BAA6B,EAAC,MAAM,sBAAsB,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACpC,MAAM,gBAAgB,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IACrE,MAAM,iBAAiB,GAAG,CAAC,gBAAgB,EAAE,gBAAgB,GAAG,CAAC,CAAC,CAAC;IAEnE,SAAS,UAAU,CAAC,QAAgB,EAAE,IAAY,EAAE,WAAmB;QACnE,OAAO,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,QAAQ,IAAI,IAAI,IAAI,WAAW,OAAO,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,UAAU,WAAW,CAAC,QAAgB,EAAE,IAAY,EAAE,YAAsB;QAC7E,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE;YACpC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YAExD,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC;gBACxB,OAAO,OAAO,CAAC;SACtB;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,KAAK,UAAU,OAAO;QAClB,QAAQ,OAAO,CAAC,QAAQ,EAAE;YACtB,KAAK,OAAO,CAAC;YACb,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAE/D,KAAK,OAAO,CAAC;YACb,KAAK,SAAS;gBACV,OAAO,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAEjE,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;SAClE;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,OAAO,MAAM,OAAO,EAAE,CAAC;AAC3B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO;IACzB,MAAM,YAAY,GAAG,MAAM,cAAc,EAAE,CAAC;IAE5C,IAAI,YAAY,KAAK,kBAAkB,EAAE;QACrC,MAAM,eAAe,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAEnD,IAAI,eAAe,IAAI,IAAI,EAAE;YACzB,OAAO,CAAC,IAAI,CAAC,wEAAwE,CAAC,CAAC;SAC1F;aAAM;YACH,OAAO,OAAO,CAAC,eAAe,CAAC,CAAC;SACnC;KACJ;IAED,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;IAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;QACpB,IAAI,mBAAmB,EAAE;YACrB,MAAM,IAAI,KAAK,CAAC,oFAAoF,CAAC,CAAC;SACzG;aAAM;YACH,MAAM,uBAAuB,CAAC;gBAC1B,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,sBAAsB;aAClC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;YAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;gBACpB,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;aAC/D;YAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;SAC9B;KACJ;IAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;AAC/B,CAAC"}
1
+ {"version":3,"file":"getBin.js","sourceRoot":"","sources":["../../src/utils/getBin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,QAAQ,CAAC;AACrC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EACH,0BAA0B,EAAE,yBAAyB,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,mBAAmB,EAC/H,kBAAkB,EACrB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,6BAA6B,EAAC,MAAM,sBAAsB,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACpC,MAAM,gBAAgB,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IACrE,MAAM,iBAAiB,GAAG,CAAC,gBAAgB,EAAE,gBAAgB,GAAG,CAAC,CAAC,CAAC;IAEnE,SAAS,UAAU,CAAC,QAAgB,EAAE,IAAY,EAAE,WAAmB;QACnE,OAAO,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,QAAQ,IAAI,IAAI,IAAI,WAAW,OAAO,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,UAAU,WAAW,CAAC,QAAgB,EAAE,IAAY,EAAE,YAAsB;QAC7E,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE;YACpC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YAExD,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC;gBACxB,OAAO,OAAO,CAAC;SACtB;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,KAAK,UAAU,OAAO;QAClB,QAAQ,OAAO,CAAC,QAAQ,EAAE;YACtB,KAAK,OAAO,CAAC;YACb,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAE/D,KAAK,OAAO,CAAC;YACb,KAAK,SAAS;gBACV,OAAO,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAEjE,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;SAClE;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,OAAO,MAAM,OAAO,EAAE,CAAC;AAC3B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO;IACzB,MAAM,YAAY,GAAG,MAAM,cAAc,EAAE,CAAC;IAE5C,IAAI,YAAY,KAAK,kBAAkB,EAAE;QACrC,MAAM,eAAe,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAEnD,IAAI,eAAe,IAAI,IAAI,EAAE;YACzB,OAAO,CAAC,IAAI,CAAC,wEAAwE,CAAC,CAAC;SAC1F;aAAM;YACH,OAAO,OAAO,CAAC,eAAe,CAAC,CAAC;SACnC;KACJ;IAED,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;IAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;QACpB,IAAI,mBAAmB,EAAE;YACrB,MAAM,IAAI,KAAK,CAAC,oFAAoF,CAAC,CAAC;SACzG;aAAM;YACH,MAAM,uBAAuB,CAAC;gBAC1B,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,sBAAsB;gBAC/B,KAAK,EAAE,2BAA2B;gBAClC,IAAI,EAAE,0BAA0B;aACnC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;YAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;gBACpB,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;aAC/D;YAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;SAC9B;KACJ;IAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;AAC/B,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function getGrammarsFolder(): Promise<string>;
@@ -0,0 +1,18 @@
1
+ import fs from "fs-extra";
2
+ import { llamaBinsGrammarsDirectory, llamaCppGrammarsDirectory } from "../config.js";
3
+ import { getUsedBinFlag } from "./usedBinFlag.js";
4
+ export async function getGrammarsFolder() {
5
+ const usedBingFlag = await getUsedBinFlag();
6
+ if (usedBingFlag === "localBuildFromSource") {
7
+ if (await fs.exists(llamaCppGrammarsDirectory))
8
+ return llamaCppGrammarsDirectory;
9
+ }
10
+ else if (usedBingFlag === "prebuiltBinaries") {
11
+ if (await fs.exists(llamaBinsGrammarsDirectory))
12
+ return llamaBinsGrammarsDirectory;
13
+ else if (await fs.exists(llamaCppGrammarsDirectory))
14
+ return llamaCppGrammarsDirectory;
15
+ }
16
+ throw new Error("Grammars folder not found");
17
+ }
18
+ //# sourceMappingURL=getGrammarsFolder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getGrammarsFolder.js","sourceRoot":"","sources":["../../src/utils/getGrammarsFolder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,0BAA0B,EAAE,yBAAyB,EAAC,MAAM,cAAc,CAAC;AACnF,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACnC,MAAM,YAAY,GAAG,MAAM,cAAc,EAAE,CAAC;IAE5C,IAAI,YAAY,KAAK,sBAAsB,EAAE;QACzC,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC;YAC1C,OAAO,yBAAyB,CAAC;KACxC;SAAM,IAAI,YAAY,KAAK,kBAAkB,EAAE;QAC5C,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,0BAA0B,CAAC;YAC3C,OAAO,0BAA0B,CAAC;aACjC,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC;YAC/C,OAAO,yBAAyB,CAAC;KACxC;IAED,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;AACjD,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function getTextCompletion(text: null, fullText: string | string[]): null;
2
+ export declare function getTextCompletion(text: string, fullText: string | string[]): string | null;
3
+ export declare function getTextCompletion(text: string | null, fullText: string | string[]): string | null;
@@ -0,0 +1,12 @@
1
+ export function getTextCompletion(text, fullText) {
2
+ if (text == null) {
3
+ return null;
4
+ }
5
+ const fullTexts = typeof fullText === "string" ? [fullText] : fullText;
6
+ for (const fullText of fullTexts) {
7
+ if (fullText.startsWith(text))
8
+ return fullText.slice(text.length);
9
+ }
10
+ return null;
11
+ }
12
+ //# sourceMappingURL=getTextCompletion.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getTextCompletion.js","sourceRoot":"","sources":["../../src/utils/getTextCompletion.ts"],"names":[],"mappings":"AAGA,MAAM,UAAU,iBAAiB,CAAC,IAAmB,EAAE,QAA2B;IAC9E,IAAI,IAAI,IAAI,IAAI,EAAE;QACd,OAAO,IAAI,CAAC;KACf;IAED,MAAM,SAAS,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;IAEvE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE;QAC9B,IAAI,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC;YACzB,OAAO,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;KAC1C;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function removeNullFields<T extends object>(obj: T): T;
@@ -0,0 +1,9 @@
1
+ export function removeNullFields(obj) {
2
+ const newObj = Object.assign({}, obj);
3
+ for (const key in obj) {
4
+ if (newObj[key] == null)
5
+ delete newObj[key];
6
+ }
7
+ return newObj;
8
+ }
9
+ //# sourceMappingURL=removeNullFields.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"removeNullFields.js","sourceRoot":"","sources":["../../src/utils/removeNullFields.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,gBAAgB,CAAmB,GAAM;IACrD,MAAM,MAAM,GAAM,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE;QACnB,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI;YACnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
@@ -1 +1,2 @@
1
- export declare function spawnCommand(command: string, args: string[], cwd: string): Promise<void>;
1
+ /// <reference types="node" />
2
+ export declare function spawnCommand(command: string, args: string[], cwd: string, env?: NodeJS.ProcessEnv): Promise<void>;
@@ -1,5 +1,5 @@
1
1
  import spawn from "cross-spawn";
2
- export function spawnCommand(command, args, cwd) {
2
+ export function spawnCommand(command, args, cwd, env = process.env) {
3
3
  function getCommandString() {
4
4
  let res = command;
5
5
  for (const arg of args) {
@@ -16,7 +16,7 @@ export function spawnCommand(command, args, cwd) {
16
16
  const child = spawn(command, args, {
17
17
  stdio: "inherit",
18
18
  cwd,
19
- env: process.env,
19
+ env,
20
20
  detached: false,
21
21
  windowsHide: true
22
22
  });
@@ -1 +1 @@
1
- {"version":3,"file":"spawnCommand.js","sourceRoot":"","sources":["../../src/utils/spawnCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,aAAa,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,IAAc,EAAE,GAAW;IACrE,SAAS,gBAAgB;QACrB,IAAI,GAAG,GAAG,OAAO,CAAC;QAElB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACpB,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBACnB,GAAG,IAAI,KAAK,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;aAC7C;iBAAM;gBACH,GAAG,IAAI,IAAI,GAAG,EAAE,CAAC;aACpB;SACJ;QAED,OAAO,GAAG,CAAC;IACf,CAAC;IAED,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACzC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,EAAE,IAAI,EAAE;YAC/B,KAAK,EAAE,SAAS;YAChB,GAAG;YACH,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACtB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;QAC9F,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE;YACrB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC"}
1
+ {"version":3,"file":"spawnCommand.js","sourceRoot":"","sources":["../../src/utils/spawnCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,aAAa,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,IAAc,EAAE,GAAW,EAAE,GAAG,GAAG,OAAO,CAAC,GAAG;IACxF,SAAS,gBAAgB;QACrB,IAAI,GAAG,GAAG,OAAO,CAAC;QAElB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACpB,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBACnB,GAAG,IAAI,KAAK,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;aAC7C;iBAAM;gBACH,GAAG,IAAI,IAAI,GAAG,EAAE,CAAC;aACpB;SACJ;QAED,OAAO,GAAG,CAAC;IACf,CAAC;IAED,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACzC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,EAAE,IAAI,EAAE;YAC/B,KAAK,EAAE,SAAS;YAChB,GAAG;YACH,GAAG;YACH,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACtB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;QAC9F,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE;YACrB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC"}
package/llama/addon.cpp CHANGED
@@ -3,18 +3,26 @@
3
3
  #include <sstream>
4
4
  #include <vector>
5
5
 
6
+ #include "common.h"
6
7
  #include "llama.h"
8
+ #include "common/grammar-parser.h"
7
9
  #include "napi.h"
8
10
 
9
11
  class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
10
12
  public:
11
13
  llama_context_params params;
12
14
  llama_model* model;
15
+ float temperature;
16
+ int32_t top_k;
17
+ float top_p;
13
18
 
14
19
  LLAMAModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAModel>(info) {
15
20
  params = llama_context_default_params();
16
21
  params.seed = -1;
17
22
  params.n_ctx = 4096;
23
+ temperature = 0.0f;
24
+ top_k = 40;
25
+ top_p = 0.95f;
18
26
 
19
27
  // Get the model path
20
28
  std::string modelPath = info[0].As<Napi::String>().Utf8Value();
@@ -65,8 +73,21 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
65
73
  if (options.Has("embedding")) {
66
74
  params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
67
75
  }
76
+
77
+ if (options.Has("temperature")) {
78
+ temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
79
+ }
80
+
81
+ if (options.Has("topK")) {
82
+ top_k = options.Get("topK").As<Napi::Number>().Int32Value();
83
+ }
84
+
85
+ if (options.Has("topP")) {
86
+ top_p = options.Get("topP").As<Napi::Number>().FloatValue();
87
+ }
68
88
  }
69
89
 
90
+ llama_backend_init(false);
70
91
  model = llama_load_model_from_file(modelPath.c_str(), params);
71
92
 
72
93
  if (model == NULL) {
@@ -84,20 +105,84 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
84
105
  }
85
106
  };
86
107
 
108
+ class LLAMAGrammar : public Napi::ObjectWrap<LLAMAGrammar> {
109
+ public:
110
+ grammar_parser::parse_state parsed_grammar;
111
+ llama_grammar *grammar = nullptr;
112
+
113
+ LLAMAGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAGrammar>(info) {
114
+ // Get the model path
115
+ std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
116
+ bool should_print_grammar = false;
117
+
118
+ if (info.Length() > 1 && info[1].IsObject()) {
119
+ Napi::Object options = info[1].As<Napi::Object>();
120
+
121
+ if (options.Has("printGrammar")) {
122
+ should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
123
+ }
124
+ }
125
+
126
+ parsed_grammar = grammar_parser::parse(grammarCode.c_str());
127
+ // will be empty (default) if there are parse errors
128
+ if (parsed_grammar.rules.empty()) {
129
+ Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
130
+ return;
131
+ }
132
+
133
+ if (should_print_grammar) {
134
+ grammar_parser::print_grammar(stderr, parsed_grammar);
135
+ }
136
+
137
+ std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
138
+ grammar = llama_grammar_init(
139
+ grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
140
+ }
141
+
142
+ ~LLAMAGrammar() {
143
+ if (grammar != nullptr) {
144
+ llama_grammar_free(grammar);
145
+ grammar = nullptr;
146
+ }
147
+ }
148
+
149
+ static void init(Napi::Object exports) {
150
+ exports.Set("LLAMAGrammar", DefineClass(exports.Env(), "LLAMAGrammar", {}));
151
+ }
152
+ };
153
+
87
154
  class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
88
155
  public:
89
156
  LLAMAModel* model;
90
157
  llama_context* ctx;
158
+ LLAMAGrammar* grammar;
159
+ bool use_grammar = false;
160
+
91
161
  LLAMAContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAContext>(info) {
92
162
  model = Napi::ObjectWrap<LLAMAModel>::Unwrap(info[0].As<Napi::Object>());
93
163
  model->Ref();
94
164
  ctx = llama_new_context_with_model(model->model, model->params);
95
165
  Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
166
+
167
+ if (info.Length() > 1 && info[1].IsObject()) {
168
+ Napi::Object options = info[1].As<Napi::Object>();
169
+
170
+ if (options.Has("grammar")) {
171
+ grammar = Napi::ObjectWrap<LLAMAGrammar>::Unwrap(options.Get("grammar").As<Napi::Object>());
172
+ grammar->Ref();
173
+ use_grammar = true;
174
+ }
175
+ }
96
176
  }
97
177
  ~LLAMAContext() {
98
178
  Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
99
179
  llama_free(ctx);
100
180
  model->Unref();
181
+
182
+ if (use_grammar) {
183
+ grammar->Unref();
184
+ use_grammar = false;
185
+ }
101
186
  }
102
187
  Napi::Value Encode(const Napi::CallbackInfo& info) {
103
188
  std::string text = info[0].As<Napi::String>().Utf8Value();
@@ -124,16 +209,42 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
124
209
 
125
210
  // Decode each token and accumulate the result.
126
211
  for (size_t i = 0; i < tokens.ElementLength(); i++) {
127
- const char* str = llama_token_to_str(ctx, (llama_token)tokens[i]);
128
- if (str == nullptr) {
129
- Napi::Error::New(info.Env(), "Invalid token").ThrowAsJavaScriptException();
130
- return info.Env().Undefined();
131
- }
132
- ss << str;
212
+ const std::string piece = llama_token_to_piece(ctx, (llama_token)tokens[i]);
213
+
214
+ if (piece.empty()) {
215
+ continue;
216
+ }
217
+
218
+ ss << piece;
133
219
  }
134
220
 
135
221
  return Napi::String::New(info.Env(), ss.str());
136
222
  }
223
+ Napi::Value TokenBos(const Napi::CallbackInfo& info) {
224
+ return Napi::Number::From(info.Env(), llama_token_bos(ctx));
225
+ }
226
+ Napi::Value TokenEos(const Napi::CallbackInfo& info) {
227
+ return Napi::Number::From(info.Env(), llama_token_eos(ctx));
228
+ }
229
+ Napi::Value TokenNl(const Napi::CallbackInfo& info) {
230
+ return Napi::Number::From(info.Env(), llama_token_nl(ctx));
231
+ }
232
+ Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
233
+ return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
234
+ }
235
+ Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
236
+ int token = info[0].As<Napi::Number>().Int32Value();
237
+ std::stringstream ss;
238
+
239
+ const char* str = llama_token_get_text(ctx, token);
240
+ if (str == nullptr) {
241
+ return info.Env().Undefined();
242
+ }
243
+
244
+ ss << str;
245
+
246
+ return Napi::String::New(info.Env(), ss.str());
247
+ }
137
248
  Napi::Value Eval(const Napi::CallbackInfo& info);
138
249
  static void init(Napi::Object exports) {
139
250
  exports.Set("LLAMAContext",
@@ -142,6 +253,11 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
142
253
  {
143
254
  InstanceMethod("encode", &LLAMAContext::Encode),
144
255
  InstanceMethod("decode", &LLAMAContext::Decode),
256
+ InstanceMethod("tokenBos", &LLAMAContext::TokenBos),
257
+ InstanceMethod("tokenEos", &LLAMAContext::TokenEos),
258
+ InstanceMethod("tokenNl", &LLAMAContext::TokenNl),
259
+ InstanceMethod("getContextSize", &LLAMAContext::GetContextSize),
260
+ InstanceMethod("getTokenString", &LLAMAContext::GetTokenString),
145
261
  InstanceMethod("eval", &LLAMAContext::Eval),
146
262
  }));
147
263
  }
@@ -151,7 +267,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
151
267
  class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
152
268
  LLAMAContext* ctx;
153
269
  std::vector<llama_token> tokens;
154
- std::vector<llama_token> restriction;
155
270
  llama_token result;
156
271
 
157
272
  public:
@@ -160,13 +275,6 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
160
275
  Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
161
276
  this->tokens.reserve(tokens.ElementLength());
162
277
  for (size_t i = 0; i < tokens.ElementLength(); i++) { this->tokens.push_back(static_cast<llama_token>(tokens[i])); }
163
-
164
- if (info.Length() > 1 && info[1].IsTypedArray()) {
165
- Napi::Uint32Array restriction = info[1].As<Napi::Uint32Array>();
166
- this->restriction.reserve(restriction.ElementLength());
167
- for (size_t i = 0; i < restriction.ElementLength(); i++) { this->restriction.push_back(static_cast<llama_token>(restriction[i])); }
168
- std::sort(this->restriction.begin(), this->restriction.end());
169
- }
170
278
  }
171
279
  ~LLAMAContextEvalWorker() { ctx->Unref(); }
172
280
  using Napi::AsyncWorker::Queue;
@@ -175,39 +283,72 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
175
283
  protected:
176
284
  void Execute() {
177
285
  // Perform the evaluation using llama_eval.
178
- int r = llama_eval(ctx->ctx, tokens.data(), tokens.size(), llama_get_kv_cache_token_count(ctx->ctx), 6);
286
+ int r = llama_eval(ctx->ctx, tokens.data(), int(tokens.size()), llama_get_kv_cache_token_count(ctx->ctx), 6);
179
287
  if (r != 0) {
180
288
  SetError("Eval has failed");
181
289
  return;
182
290
  }
183
291
 
292
+ llama_token new_token_id = 0;
293
+
184
294
  // Select the best prediction.
185
- float* logits = llama_get_logits(ctx->ctx);
186
- int n_vocab = llama_n_vocab(ctx->ctx);
187
- llama_token re;
188
- if (restriction.empty()) {
189
- float max = logits[0];
190
- re = 0;
191
- for (llama_token id = 1; id < n_vocab; id++) {
192
- float logit = logits[id];
193
- if (logit > max) {
194
- max = logit;
195
- re = id;
196
- }
295
+ auto logits = llama_get_logits(ctx->ctx);
296
+ auto n_vocab = llama_n_vocab(ctx->ctx);
297
+
298
+ std::vector<llama_token_data> candidates;
299
+ candidates.reserve(n_vocab);
300
+
301
+ for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
302
+ candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
303
+ }
304
+
305
+ llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
306
+
307
+ float originalEosLogit = 0;
308
+ auto eos_token = llama_token_eos(ctx->ctx);
309
+
310
+ for (auto& candidate : candidates) {
311
+ if (candidate.id == eos_token) {
312
+ originalEosLogit = candidate.logit;
313
+ break;
197
314
  }
198
- } else {
199
- float max = logits[restriction[0]];
200
- re = 0;
201
- for (size_t i = 1; i < restriction.size(); i++) {
202
- llama_token id = restriction[i];
203
- float logit = logits[id];
204
- if (logit > max) {
205
- max = logit;
206
- re = id;
207
- }
315
+ }
316
+
317
+ if (ctx->use_grammar) {
318
+ llama_sample_grammar(ctx->ctx, &candidates_p, (ctx->grammar)->grammar);
319
+ }
320
+
321
+ for (auto& candidate : candidates) {
322
+ if (candidate.id == eos_token) {
323
+ candidate.logit = originalEosLogit;
324
+ break;
208
325
  }
209
326
  }
210
- result = re;
327
+
328
+ if ((ctx->model)->temperature <= 0) {
329
+ new_token_id = llama_sample_token_greedy(ctx->ctx , &candidates_p);
330
+ } else {
331
+ const int32_t top_k = (ctx->model)->top_k <= 0 ? llama_n_vocab(ctx->ctx) : (ctx->model)->top_k;
332
+ const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
333
+ const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
334
+ const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
335
+ const float top_p = (ctx->model)->top_p; // Top p sampling - 1.0 = disabled
336
+
337
+ // Temperature sampling
338
+ size_t min_keep = std::max(1, n_probs);
339
+ llama_sample_top_k(ctx->ctx, &candidates_p, top_k, min_keep);
340
+ llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
341
+ llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
342
+ llama_sample_top_p(ctx->ctx, &candidates_p, top_p, min_keep);
343
+ llama_sample_temperature(ctx->ctx, &candidates_p, (ctx->model)->temperature);;
344
+ new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
345
+ }
346
+
347
+ if (new_token_id != eos_token && ctx->use_grammar) {
348
+ llama_grammar_accept_token(ctx->ctx, (ctx->grammar)->grammar, new_token_id);
349
+ }
350
+
351
+ result = new_token_id;
211
352
  }
212
353
  void OnOK() {
213
354
  Napi::Env env = Napi::AsyncWorker::Env();
@@ -223,18 +364,15 @@ Napi::Value LLAMAContext::Eval(const Napi::CallbackInfo& info) {
223
364
  return worker->Promise();
224
365
  }
225
366
 
226
- Napi::Value tokenBos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_bos()); }
227
- Napi::Value tokenEos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_eos()); }
228
367
  Napi::Value systemInfo(const Napi::CallbackInfo& info) { return Napi::String::From(info.Env(), llama_print_system_info()); }
229
368
 
230
369
  Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
231
370
  llama_backend_init(false);
232
371
  exports.DefineProperties({
233
- Napi::PropertyDescriptor::Function("tokenBos", tokenBos),
234
- Napi::PropertyDescriptor::Function("tokenEos", tokenEos),
235
372
  Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
236
373
  });
237
374
  LLAMAModel::init(exports);
375
+ LLAMAGrammar::init(exports);
238
376
  LLAMAContext::init(exports);
239
377
  return exports;
240
378
  }
@@ -0,0 +1,3 @@
1
+ {
2
+ "release": "b1107"
3
+ }
package/llama/binding.gyp CHANGED
@@ -7,15 +7,18 @@
7
7
  "llama.cpp/ggml.c",
8
8
  "llama.cpp/ggml-alloc.c",
9
9
  "llama.cpp/k_quants.c",
10
- "llama.cpp/llama.cpp"
10
+ "llama.cpp/llama.cpp",
11
+ "llama.cpp/common/common.cpp",
12
+ "llama.cpp/common/grammar-parser.cpp"
11
13
  ],
12
14
  "include_dirs": [
13
15
  "<!@(node -p \"require('node-addon-api').include\")",
14
- "llama.cpp"
16
+ "llama.cpp",
17
+ "llama.cpp/common"
15
18
  ],
16
19
  "cflags": ["-fexceptions"],
17
20
  "cflags_cc": ["-fexceptions"],
18
- "defines": [ "GGML_USE_K_QUANTS", "NAPI_CPP_EXCEPTIONS" ],
21
+ "defines": ["GGML_USE_K_QUANTS", "NAPI_CPP_EXCEPTIONS"],
19
22
  "msvs_settings": {
20
23
  "VCCLCompilerTool": { "AdditionalOptions": [ '/arch:AVX2', '/EHsc' ] }
21
24
  }