node-llama-cpp 1.3.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -28
- package/dist/ChatPromptWrapper.d.ts +3 -0
- package/dist/ChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLPromptWrapper.d.ts +11 -0
- package/dist/chatWrappers/ChatMLPromptWrapper.js +19 -0
- package/dist/chatWrappers/ChatMLPromptWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.js +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +11 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.js +28 -4
- package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +4 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +9 -5
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/createChatWrapperByBos.d.ts +2 -0
- package/dist/chatWrappers/createChatWrapperByBos.js +14 -0
- package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -0
- package/dist/cli/commands/BuildCommand.d.ts +3 -1
- package/dist/cli/commands/BuildCommand.js +24 -2
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +7 -1
- package/dist/cli/commands/ChatCommand.js +87 -12
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.js +1 -1
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +4 -1
- package/dist/cli/commands/DownloadCommand.js +71 -70
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +4 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +5 -0
- package/dist/config.js +11 -1
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
- package/dist/llamaEvaluator/LlamaBins.js +2 -2
- package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
- package/dist/llamaEvaluator/LlamaChatSession.d.ts +7 -2
- package/dist/llamaEvaluator/LlamaChatSession.js +51 -11
- package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +31 -2
- package/dist/llamaEvaluator/LlamaContext.js +74 -7
- package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +14 -0
- package/dist/llamaEvaluator/LlamaGrammar.js +30 -0
- package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -0
- package/dist/llamaEvaluator/LlamaModel.d.ts +49 -1
- package/dist/llamaEvaluator/LlamaModel.js +25 -9
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/binariesGithubRelease.d.ts +6 -0
- package/dist/utils/binariesGithubRelease.js +15 -0
- package/dist/utils/binariesGithubRelease.js.map +1 -0
- package/dist/utils/compileLLamaCpp.d.ts +3 -1
- package/dist/utils/compileLLamaCpp.js +34 -4
- package/dist/utils/compileLLamaCpp.js.map +1 -1
- package/dist/utils/getBin.d.ts +18 -4
- package/dist/utils/getBin.js +4 -2
- package/dist/utils/getBin.js.map +1 -1
- package/dist/utils/getGrammarsFolder.d.ts +1 -0
- package/dist/utils/getGrammarsFolder.js +18 -0
- package/dist/utils/getGrammarsFolder.js.map +1 -0
- package/dist/utils/getTextCompletion.d.ts +3 -0
- package/dist/utils/getTextCompletion.js +12 -0
- package/dist/utils/getTextCompletion.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +1 -0
- package/dist/utils/removeNullFields.js +9 -0
- package/dist/utils/removeNullFields.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +2 -1
- package/dist/utils/spawnCommand.js +2 -2
- package/dist/utils/spawnCommand.js.map +1 -1
- package/llama/addon.cpp +180 -42
- package/llama/binariesGithubRelease.json +3 -0
- package/llama/binding.gyp +6 -3
- package/llama/grammars/README.md +91 -0
- package/llama/grammars/arithmetic.gbnf +6 -0
- package/llama/grammars/chess.gbnf +13 -0
- package/llama/grammars/japanese.gbnf +7 -0
- package/llama/grammars/json.gbnf +25 -0
- package/llama/grammars/list.gbnf +4 -0
- package/llamaBins/linux-arm64-16.node +0 -0
- package/llamaBins/linux-arm64-17.node +0 -0
- package/llamaBins/linux-arm64-18.node +0 -0
- package/llamaBins/linux-arm64-19.node +0 -0
- package/llamaBins/linux-arm64-20.node +0 -0
- package/llamaBins/linux-armv7l-16.node +0 -0
- package/llamaBins/linux-armv7l-17.node +0 -0
- package/llamaBins/linux-armv7l-18.node +0 -0
- package/llamaBins/linux-armv7l-19.node +0 -0
- package/llamaBins/linux-armv7l-20.node +0 -0
- package/llamaBins/linux-ppc64le-16.node +0 -0
- package/llamaBins/linux-ppc64le-17.node +0 -0
- package/llamaBins/linux-ppc64le-18.node +0 -0
- package/llamaBins/linux-ppc64le-19.node +0 -0
- package/llamaBins/linux-ppc64le-20.node +0 -0
- package/llamaBins/linux-x64-16.node +0 -0
- package/llamaBins/linux-x64-17.node +0 -0
- package/llamaBins/linux-x64-18.node +0 -0
- package/llamaBins/linux-x64-19.node +0 -0
- package/llamaBins/linux-x64-20.node +0 -0
- package/llamaBins/mac-arm64-16.node +0 -0
- package/llamaBins/mac-arm64-17.node +0 -0
- package/llamaBins/mac-arm64-18.node +0 -0
- package/llamaBins/mac-arm64-19.node +0 -0
- package/llamaBins/mac-arm64-20.node +0 -0
- package/llamaBins/mac-x64-16.node +0 -0
- package/llamaBins/mac-x64-17.node +0 -0
- package/llamaBins/mac-x64-18.node +0 -0
- package/llamaBins/mac-x64-19.node +0 -0
- package/llamaBins/mac-x64-20.node +0 -0
- package/llamaBins/win-x64-16.node +0 -0
- package/llamaBins/win-x64-17.node +0 -0
- package/llamaBins/win-x64-18.node +0 -0
- package/llamaBins/win-x64-19.node +0 -0
- package/llamaBins/win-x64-20.node +0 -0
- package/package.json +12 -6
|
@@ -7,14 +7,44 @@ import { clearLlamaBuild } from "./clearLlamaBuild.js";
|
|
|
7
7
|
import { setUsedBinFlag } from "./usedBinFlag.js";
|
|
8
8
|
import { spawnCommand } from "./spawnCommand.js";
|
|
9
9
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
|
-
export async function compileLlamaCpp({ arch = process.arch, nodeTarget = process.version, setUsedBingFlag = true }) {
|
|
10
|
+
export async function compileLlamaCpp({ arch = process.arch, nodeTarget = process.version, setUsedBingFlag = true, metal = false, cuda = false }) {
|
|
11
11
|
try {
|
|
12
12
|
if (!(await fs.exists(llamaCppDirectory))) {
|
|
13
13
|
throw new Error(`"${llamaCppDirectory}" directory does not exist`);
|
|
14
14
|
}
|
|
15
|
+
const gypDefines = ["GGML_USE_K_QUANTS", "NAPI_CPP_EXCEPTIONS"];
|
|
16
|
+
if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1")
|
|
17
|
+
gypDefines.push("LLAMA_METAL=1");
|
|
18
|
+
if (cuda || process.env.LLAMA_CUBLAS === "1")
|
|
19
|
+
gypDefines.push("LLAMA_CUBLAS=1");
|
|
20
|
+
if (process.env.LLAMA_MPI === "1")
|
|
21
|
+
gypDefines.push("LLAMA_MPI=1");
|
|
22
|
+
if (process.env.LLAMA_OPENBLAS === "1")
|
|
23
|
+
gypDefines.push("LLAMA_OPENBLAS=1");
|
|
24
|
+
if (process.env.LLAMA_BLAS_VENDOR != null)
|
|
25
|
+
gypDefines.push("LLAMA_BLAS_VENDOR=" + process.env.LLAMA_BLAS_VENDOR);
|
|
26
|
+
if (process.env.LLAMA_CUDA_FORCE_DMMV != null)
|
|
27
|
+
gypDefines.push("LLAMA_CUDA_FORCE_DMMV=" + process.env.LLAMA_CUDA_FORCE_DMMV);
|
|
28
|
+
if (process.env.LLAMA_CUDA_DMMV_X != null)
|
|
29
|
+
gypDefines.push("LLAMA_CUDA_DMMV_X=" + process.env.LLAMA_CUDA_DMMV_X);
|
|
30
|
+
if (process.env.LLAMA_CUDA_MMV_Y != null)
|
|
31
|
+
gypDefines.push("LLAMA_CUDA_MMV_Y=" + process.env.LLAMA_CUDA_MMV_Y);
|
|
32
|
+
if (process.env.LLAMA_CUDA_F16 != null)
|
|
33
|
+
gypDefines.push("LLAMA_CUDA_F16=" + process.env.LLAMA_CUDA_F16);
|
|
34
|
+
if (process.env.LLAMA_CUDA_KQUANTS_ITER != null)
|
|
35
|
+
gypDefines.push("LLAMA_CUDA_KQUANTS_ITER=" + process.env.LLAMA_CUDA_KQUANTS_ITER);
|
|
36
|
+
if (process.env.LLAMA_HIPBLAS === "1")
|
|
37
|
+
gypDefines.push("LLAMA_HIPBLAS=1");
|
|
38
|
+
if (process.env.LLAMA_CLBLAST === "1")
|
|
39
|
+
gypDefines.push("LLAMA_CLBLAST=1");
|
|
40
|
+
const nodeGypEnv = {
|
|
41
|
+
...process.env,
|
|
42
|
+
"CMAKE_CURRENT_SOURCE_DIR": llamaCppDirectory,
|
|
43
|
+
"GYP_DEFINES": gypDefines.join(" ")
|
|
44
|
+
};
|
|
15
45
|
await clearLlamaBuild();
|
|
16
|
-
await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget], __dirname);
|
|
17
|
-
await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget, "--", "-f", "compile_commands_json"], __dirname);
|
|
46
|
+
await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget], __dirname, nodeGypEnv);
|
|
47
|
+
await spawnCommand("npm", ["run", "-s", "node-gyp-llama", "--", "configure", "--arch=" + arch, "--target=" + nodeTarget, "--", "-f", "compile_commands_json"], __dirname, nodeGypEnv);
|
|
18
48
|
if (await fs.exists(path.join(llamaDirectory, "Release", "compile_commands.json"))) {
|
|
19
49
|
await fs.move(path.join(llamaDirectory, "Release", "compile_commands.json"), path.join(llamaDirectory, "compile_commands.json"));
|
|
20
50
|
}
|
|
@@ -23,7 +53,7 @@ export async function compileLlamaCpp({ arch = process.arch, nodeTarget = proces
|
|
|
23
53
|
}
|
|
24
54
|
await fs.remove(path.join(llamaDirectory, "Release"));
|
|
25
55
|
await fs.remove(path.join(llamaDirectory, "Debug"));
|
|
26
|
-
await spawnCommand("npm", ["run", "-s", "node-gyp-llama-build", "--", "--arch=" + arch, "--target=" + nodeTarget], __dirname);
|
|
56
|
+
await spawnCommand("npm", ["run", "-s", "node-gyp-llama-build", "--", "--arch=" + arch, "--target=" + nodeTarget], __dirname, nodeGypEnv);
|
|
27
57
|
if (setUsedBingFlag) {
|
|
28
58
|
await setUsedBinFlag("localBuildFromSource");
|
|
29
59
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compileLLamaCpp.js","sourceRoot":"","sources":["../../src/utils/compileLLamaCpp.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAE,cAAc,EAAC,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAC,eAAe,EAAC,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,
|
|
1
|
+
{"version":3,"file":"compileLLamaCpp.js","sourceRoot":"","sources":["../../src/utils/compileLLamaCpp.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAE,cAAc,EAAC,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAC,eAAe,EAAC,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,EAClC,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,eAAe,GAAG,IAAI,EAAE,KAAK,GAAG,KAAK,EAAE,IAAI,GAAG,KAAK,EAGzG;IACG,IAAI;QACA,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,EAAE;YACvC,MAAM,IAAI,KAAK,CAAC,IAAI,iBAAiB,4BAA4B,CAAC,CAAC;SACtE;QAED,MAAM,UAAU,GAAG,CAAC,mBAAmB,EAAE,qBAAqB,CAAC,CAAC;QAEhE,IAAI,CAAC,KAAK,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAClH,IAAI,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAChF,IAAI,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAClE,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC5E,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,oBAAoB,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QACjH,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,wBAAwB,GAAG,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;QAC7H,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,oBAAoB,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QACjH,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QAC9G,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACxG,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,IAAI;YAAE,UAAU,CAAC,IAAI,CAAC,0BAA0B,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACnI,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAC1E,IAAI,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,GAAG;YAAE,UAAU,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAE1E,MAAM,UAAU,GAAsB;YAClC,GAAG,OAAO,CAAC,GAAG;YACd,0BAA0B,EAAE,iBAAiB;YAC7C,aAAa,EAAE,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC;SACtC,CAAC;QAEF,MAAM,eAAe,EAAE,CAAC;QAExB,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEjJ,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,uBAAuB,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAEtL,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,CAAC,EAAE;YAChF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,EAAE,uBAAuB,CAAC,EAC7D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;aAAM,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,CAAC,EAAE;YACrF,MAAM,EAAE,CAAC,IAAI,CACT,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,uBAAuB,CAAC,EAC3D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,uBAAuB,CAAC,CACrD,CAAC;SACL;QAED,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC,CAAC;QACtD,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC,CAAC;QAGpD,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,SAAS,GAAG,IAAI,EAAE,WAAW,GAAG,UAAU,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAE1I,IAAI,eAAe,EAAE;YACjB,MAAM,cAAc,CAAC,sBAAsB,CAAC,CAAC;SAChD;KACJ;IAAC,OAAO,GAAG,EAAE;QACV,IAAI,eAAe;YACf,MAAM,cAAc,CAAC,kBAAkB,CAAC,CAAC;QAE7C,MAAM,GAAG,CAAC;KACb;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,6BAA6B;IAC/C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;IAE/F,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC;QAC3B,OAAO,UAAU,CAAC;IAEtB,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
package/dist/utils/getBin.d.ts
CHANGED
|
@@ -3,9 +3,8 @@ export declare function loadBin(): Promise<LlamaCppNodeModule>;
|
|
|
3
3
|
export type LlamaCppNodeModule = {
|
|
4
4
|
LLAMAModel: LLAMAModel;
|
|
5
5
|
LLAMAContext: LLAMAContext;
|
|
6
|
-
|
|
6
|
+
LLAMAGrammar: LLAMAGrammar;
|
|
7
7
|
systemInfo(): string;
|
|
8
|
-
tokenEos(): number;
|
|
9
8
|
};
|
|
10
9
|
export type LLAMAModel = {
|
|
11
10
|
new (modelPath: string, params: {
|
|
@@ -20,11 +19,26 @@ export type LLAMAModel = {
|
|
|
20
19
|
useMmap?: boolean;
|
|
21
20
|
useMlock?: boolean;
|
|
22
21
|
embedding?: boolean;
|
|
22
|
+
temperature?: number;
|
|
23
|
+
topK?: number;
|
|
24
|
+
topP?: number;
|
|
23
25
|
}): LLAMAModel;
|
|
24
26
|
};
|
|
25
27
|
export type LLAMAContext = {
|
|
26
|
-
new (model: LLAMAModel
|
|
28
|
+
new (model: LLAMAModel, params?: {
|
|
29
|
+
grammar?: LLAMAGrammar;
|
|
30
|
+
}): LLAMAContext;
|
|
27
31
|
encode(text: string): Uint32Array;
|
|
28
|
-
eval(tokens: Uint32Array
|
|
32
|
+
eval(tokens: Uint32Array): Promise<number>;
|
|
29
33
|
decode(tokens: Uint32Array): string;
|
|
34
|
+
tokenBos(): number;
|
|
35
|
+
tokenEos(): number;
|
|
36
|
+
tokenNl(): number;
|
|
37
|
+
getContextSize(): number;
|
|
38
|
+
getTokenString(token: number): string;
|
|
39
|
+
};
|
|
40
|
+
export type LLAMAGrammar = {
|
|
41
|
+
new (grammarPath: string, params?: {
|
|
42
|
+
printGrammar?: boolean;
|
|
43
|
+
}): LLAMAGrammar;
|
|
30
44
|
};
|
package/dist/utils/getBin.js
CHANGED
|
@@ -3,7 +3,7 @@ import * as console from "console";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
import process from "process";
|
|
5
5
|
import fs from "fs-extra";
|
|
6
|
-
import { defaultLlamaCppGitHubRepo, defaultLlamaCppRelease, defaultSkipDownload, llamaBinsDirectory } from "../config.js";
|
|
6
|
+
import { defaultLlamaCppCudaSupport, defaultLlamaCppGitHubRepo, defaultLlamaCppMetalSupport, defaultLlamaCppRelease, defaultSkipDownload, llamaBinsDirectory } from "../config.js";
|
|
7
7
|
import { DownloadLlamaCppCommand } from "../cli/commands/DownloadCommand.js";
|
|
8
8
|
import { getUsedBinFlag } from "./usedBinFlag.js";
|
|
9
9
|
import { getCompiledLlamaCppBinaryPath } from "./compileLLamaCpp.js";
|
|
@@ -56,7 +56,9 @@ export async function loadBin() {
|
|
|
56
56
|
else {
|
|
57
57
|
await DownloadLlamaCppCommand({
|
|
58
58
|
repo: defaultLlamaCppGitHubRepo,
|
|
59
|
-
release: defaultLlamaCppRelease
|
|
59
|
+
release: defaultLlamaCppRelease,
|
|
60
|
+
metal: defaultLlamaCppMetalSupport,
|
|
61
|
+
cuda: defaultLlamaCppCudaSupport
|
|
60
62
|
});
|
|
61
63
|
const modulePath = await getCompiledLlamaCppBinaryPath();
|
|
62
64
|
if (modulePath == null) {
|
package/dist/utils/getBin.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"getBin.js","sourceRoot":"","sources":["../../src/utils/getBin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,QAAQ,CAAC;AACrC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,
|
|
1
|
+
{"version":3,"file":"getBin.js","sourceRoot":"","sources":["../../src/utils/getBin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,QAAQ,CAAC;AACrC,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EACH,0BAA0B,EAAE,yBAAyB,EAAE,2BAA2B,EAAE,sBAAsB,EAAE,mBAAmB,EAC/H,kBAAkB,EACrB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAC,6BAA6B,EAAC,MAAM,sBAAsB,CAAC;AAEnE,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACpC,MAAM,gBAAgB,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IACrE,MAAM,iBAAiB,GAAG,CAAC,gBAAgB,EAAE,gBAAgB,GAAG,CAAC,CAAC,CAAC;IAEnE,SAAS,UAAU,CAAC,QAAgB,EAAE,IAAY,EAAE,WAAmB;QACnE,OAAO,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,GAAG,QAAQ,IAAI,IAAI,IAAI,WAAW,OAAO,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,UAAU,WAAW,CAAC,QAAgB,EAAE,IAAY,EAAE,YAAsB;QAC7E,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE;YACpC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YAExD,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC;gBACxB,OAAO,OAAO,CAAC;SACtB;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,KAAK,UAAU,OAAO;QAClB,QAAQ,OAAO,CAAC,QAAQ,EAAE;YACtB,KAAK,OAAO,CAAC;YACb,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAE/D,KAAK,OAAO,CAAC;YACb,KAAK,SAAS;gBACV,OAAO,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;YAEjE,KAAK,QAAQ;gBACT,OAAO,WAAW,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;SAClE;QAED,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,OAAO,MAAM,OAAO,EAAE,CAAC;AAC3B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO;IACzB,MAAM,YAAY,GAAG,MAAM,cAAc,EAAE,CAAC;IAE5C,IAAI,YAAY,KAAK,kBAAkB,EAAE;QACrC,MAAM,eAAe,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAEnD,IAAI,eAAe,IAAI,IAAI,EAAE;YACzB,OAAO,CAAC,IAAI,CAAC,wEAAwE,CAAC,CAAC;SAC1F;aAAM;YACH,OAAO,OAAO,CAAC,eAAe,CAAC,CAAC;SACnC;KACJ;IAED,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;IAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;QACpB,IAAI,mBAAmB,EAAE;YACrB,MAAM,IAAI,KAAK,CAAC,oFAAoF,CAAC,CAAC;SACzG;aAAM;YACH,MAAM,uBAAuB,CAAC;gBAC1B,IAAI,EAAE,yBAAyB;gBAC/B,OAAO,EAAE,sBAAsB;gBAC/B,KAAK,EAAE,2BAA2B;gBAClC,IAAI,EAAE,0BAA0B;aACnC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,MAAM,6BAA6B,EAAE,CAAC;YAEzD,IAAI,UAAU,IAAI,IAAI,EAAE;gBACpB,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;aAC/D;YAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;SAC9B;KACJ;IAED,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC;AAC/B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getGrammarsFolder(): Promise<string>;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
import { llamaBinsGrammarsDirectory, llamaCppGrammarsDirectory } from "../config.js";
|
|
3
|
+
import { getUsedBinFlag } from "./usedBinFlag.js";
|
|
4
|
+
export async function getGrammarsFolder() {
|
|
5
|
+
const usedBingFlag = await getUsedBinFlag();
|
|
6
|
+
if (usedBingFlag === "localBuildFromSource") {
|
|
7
|
+
if (await fs.exists(llamaCppGrammarsDirectory))
|
|
8
|
+
return llamaCppGrammarsDirectory;
|
|
9
|
+
}
|
|
10
|
+
else if (usedBingFlag === "prebuiltBinaries") {
|
|
11
|
+
if (await fs.exists(llamaBinsGrammarsDirectory))
|
|
12
|
+
return llamaBinsGrammarsDirectory;
|
|
13
|
+
else if (await fs.exists(llamaCppGrammarsDirectory))
|
|
14
|
+
return llamaCppGrammarsDirectory;
|
|
15
|
+
}
|
|
16
|
+
throw new Error("Grammars folder not found");
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=getGrammarsFolder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getGrammarsFolder.js","sourceRoot":"","sources":["../../src/utils/getGrammarsFolder.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,0BAA0B,EAAE,yBAAyB,EAAC,MAAM,cAAc,CAAC;AACnF,OAAO,EAAC,cAAc,EAAC,MAAM,kBAAkB,CAAC;AAEhD,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACnC,MAAM,YAAY,GAAG,MAAM,cAAc,EAAE,CAAC;IAE5C,IAAI,YAAY,KAAK,sBAAsB,EAAE;QACzC,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC;YAC1C,OAAO,yBAAyB,CAAC;KACxC;SAAM,IAAI,YAAY,KAAK,kBAAkB,EAAE;QAC5C,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,0BAA0B,CAAC;YAC3C,OAAO,0BAA0B,CAAC;aACjC,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC;YAC/C,OAAO,yBAAyB,CAAC;KACxC;IAED,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;AACjD,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export declare function getTextCompletion(text: null, fullText: string | string[]): null;
|
|
2
|
+
export declare function getTextCompletion(text: string, fullText: string | string[]): string | null;
|
|
3
|
+
export declare function getTextCompletion(text: string | null, fullText: string | string[]): string | null;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export function getTextCompletion(text, fullText) {
|
|
2
|
+
if (text == null) {
|
|
3
|
+
return null;
|
|
4
|
+
}
|
|
5
|
+
const fullTexts = typeof fullText === "string" ? [fullText] : fullText;
|
|
6
|
+
for (const fullText of fullTexts) {
|
|
7
|
+
if (fullText.startsWith(text))
|
|
8
|
+
return fullText.slice(text.length);
|
|
9
|
+
}
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=getTextCompletion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"getTextCompletion.js","sourceRoot":"","sources":["../../src/utils/getTextCompletion.ts"],"names":[],"mappings":"AAGA,MAAM,UAAU,iBAAiB,CAAC,IAAmB,EAAE,QAA2B;IAC9E,IAAI,IAAI,IAAI,IAAI,EAAE;QACd,OAAO,IAAI,CAAC;KACf;IAED,MAAM,SAAS,GAAG,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;IAEvE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE;QAC9B,IAAI,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC;YACzB,OAAO,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;KAC1C;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function removeNullFields<T extends object>(obj: T): T;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"removeNullFields.js","sourceRoot":"","sources":["../../src/utils/removeNullFields.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,gBAAgB,CAAmB,GAAM;IACrD,MAAM,MAAM,GAAM,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE;QACnB,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI;YACnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
export declare function spawnCommand(command: string, args: string[], cwd: string, env?: NodeJS.ProcessEnv): Promise<void>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import spawn from "cross-spawn";
|
|
2
|
-
export function spawnCommand(command, args, cwd) {
|
|
2
|
+
export function spawnCommand(command, args, cwd, env = process.env) {
|
|
3
3
|
function getCommandString() {
|
|
4
4
|
let res = command;
|
|
5
5
|
for (const arg of args) {
|
|
@@ -16,7 +16,7 @@ export function spawnCommand(command, args, cwd) {
|
|
|
16
16
|
const child = spawn(command, args, {
|
|
17
17
|
stdio: "inherit",
|
|
18
18
|
cwd,
|
|
19
|
-
env
|
|
19
|
+
env,
|
|
20
20
|
detached: false,
|
|
21
21
|
windowsHide: true
|
|
22
22
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"spawnCommand.js","sourceRoot":"","sources":["../../src/utils/spawnCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,aAAa,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,IAAc,EAAE,GAAW;
|
|
1
|
+
{"version":3,"file":"spawnCommand.js","sourceRoot":"","sources":["../../src/utils/spawnCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,aAAa,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,IAAc,EAAE,GAAW,EAAE,GAAG,GAAG,OAAO,CAAC,GAAG;IACxF,SAAS,gBAAgB;QACrB,IAAI,GAAG,GAAG,OAAO,CAAC;QAElB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;YACpB,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;gBACnB,GAAG,IAAI,KAAK,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;aAC7C;iBAAM;gBACH,GAAG,IAAI,IAAI,GAAG,EAAE,CAAC;aACpB;SACJ;QAED,OAAO,GAAG,CAAC;IACf,CAAC;IAED,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACzC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,EAAE,IAAI,EAAE;YAC/B,KAAK,EAAE,SAAS;YAChB,GAAG;YACH,GAAG;YACH,QAAQ,EAAE,KAAK;YACf,WAAW,EAAE,IAAI;SACpB,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YACtB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC1B,KAAK,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;QAC9F,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE;YACrB,IAAI,IAAI,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC;;gBAEV,MAAM,CAAC,IAAI,KAAK,CAAC,WAAW,gBAAgB,EAAE,qBAAqB,IAAI,EAAE,CAAC,CAAC,CAAC;QACpF,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC"}
|
package/llama/addon.cpp
CHANGED
|
@@ -3,18 +3,26 @@
|
|
|
3
3
|
#include <sstream>
|
|
4
4
|
#include <vector>
|
|
5
5
|
|
|
6
|
+
#include "common.h"
|
|
6
7
|
#include "llama.h"
|
|
8
|
+
#include "common/grammar-parser.h"
|
|
7
9
|
#include "napi.h"
|
|
8
10
|
|
|
9
11
|
class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
10
12
|
public:
|
|
11
13
|
llama_context_params params;
|
|
12
14
|
llama_model* model;
|
|
15
|
+
float temperature;
|
|
16
|
+
int32_t top_k;
|
|
17
|
+
float top_p;
|
|
13
18
|
|
|
14
19
|
LLAMAModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAModel>(info) {
|
|
15
20
|
params = llama_context_default_params();
|
|
16
21
|
params.seed = -1;
|
|
17
22
|
params.n_ctx = 4096;
|
|
23
|
+
temperature = 0.0f;
|
|
24
|
+
top_k = 40;
|
|
25
|
+
top_p = 0.95f;
|
|
18
26
|
|
|
19
27
|
// Get the model path
|
|
20
28
|
std::string modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
@@ -65,8 +73,21 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
|
65
73
|
if (options.Has("embedding")) {
|
|
66
74
|
params.embedding = options.Get("embedding").As<Napi::Boolean>().Value();
|
|
67
75
|
}
|
|
76
|
+
|
|
77
|
+
if (options.Has("temperature")) {
|
|
78
|
+
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (options.Has("topK")) {
|
|
82
|
+
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (options.Has("topP")) {
|
|
86
|
+
top_p = options.Get("topP").As<Napi::Number>().FloatValue();
|
|
87
|
+
}
|
|
68
88
|
}
|
|
69
89
|
|
|
90
|
+
llama_backend_init(false);
|
|
70
91
|
model = llama_load_model_from_file(modelPath.c_str(), params);
|
|
71
92
|
|
|
72
93
|
if (model == NULL) {
|
|
@@ -84,20 +105,84 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
|
84
105
|
}
|
|
85
106
|
};
|
|
86
107
|
|
|
108
|
+
class LLAMAGrammar : public Napi::ObjectWrap<LLAMAGrammar> {
|
|
109
|
+
public:
|
|
110
|
+
grammar_parser::parse_state parsed_grammar;
|
|
111
|
+
llama_grammar *grammar = nullptr;
|
|
112
|
+
|
|
113
|
+
LLAMAGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAGrammar>(info) {
|
|
114
|
+
// Get the model path
|
|
115
|
+
std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
|
|
116
|
+
bool should_print_grammar = false;
|
|
117
|
+
|
|
118
|
+
if (info.Length() > 1 && info[1].IsObject()) {
|
|
119
|
+
Napi::Object options = info[1].As<Napi::Object>();
|
|
120
|
+
|
|
121
|
+
if (options.Has("printGrammar")) {
|
|
122
|
+
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
parsed_grammar = grammar_parser::parse(grammarCode.c_str());
|
|
127
|
+
// will be empty (default) if there are parse errors
|
|
128
|
+
if (parsed_grammar.rules.empty()) {
|
|
129
|
+
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (should_print_grammar) {
|
|
134
|
+
grammar_parser::print_grammar(stderr, parsed_grammar);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
|
|
138
|
+
grammar = llama_grammar_init(
|
|
139
|
+
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
~LLAMAGrammar() {
|
|
143
|
+
if (grammar != nullptr) {
|
|
144
|
+
llama_grammar_free(grammar);
|
|
145
|
+
grammar = nullptr;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
static void init(Napi::Object exports) {
|
|
150
|
+
exports.Set("LLAMAGrammar", DefineClass(exports.Env(), "LLAMAGrammar", {}));
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
|
|
87
154
|
class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
88
155
|
public:
|
|
89
156
|
LLAMAModel* model;
|
|
90
157
|
llama_context* ctx;
|
|
158
|
+
LLAMAGrammar* grammar;
|
|
159
|
+
bool use_grammar = false;
|
|
160
|
+
|
|
91
161
|
LLAMAContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<LLAMAContext>(info) {
|
|
92
162
|
model = Napi::ObjectWrap<LLAMAModel>::Unwrap(info[0].As<Napi::Object>());
|
|
93
163
|
model->Ref();
|
|
94
164
|
ctx = llama_new_context_with_model(model->model, model->params);
|
|
95
165
|
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
166
|
+
|
|
167
|
+
if (info.Length() > 1 && info[1].IsObject()) {
|
|
168
|
+
Napi::Object options = info[1].As<Napi::Object>();
|
|
169
|
+
|
|
170
|
+
if (options.Has("grammar")) {
|
|
171
|
+
grammar = Napi::ObjectWrap<LLAMAGrammar>::Unwrap(options.Get("grammar").As<Napi::Object>());
|
|
172
|
+
grammar->Ref();
|
|
173
|
+
use_grammar = true;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
96
176
|
}
|
|
97
177
|
~LLAMAContext() {
|
|
98
178
|
Napi::MemoryManagement::AdjustExternalMemory(Env(), -(int64_t)llama_get_state_size(ctx));
|
|
99
179
|
llama_free(ctx);
|
|
100
180
|
model->Unref();
|
|
181
|
+
|
|
182
|
+
if (use_grammar) {
|
|
183
|
+
grammar->Unref();
|
|
184
|
+
use_grammar = false;
|
|
185
|
+
}
|
|
101
186
|
}
|
|
102
187
|
Napi::Value Encode(const Napi::CallbackInfo& info) {
|
|
103
188
|
std::string text = info[0].As<Napi::String>().Utf8Value();
|
|
@@ -124,16 +209,42 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
124
209
|
|
|
125
210
|
// Decode each token and accumulate the result.
|
|
126
211
|
for (size_t i = 0; i < tokens.ElementLength(); i++) {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
212
|
+
const std::string piece = llama_token_to_piece(ctx, (llama_token)tokens[i]);
|
|
213
|
+
|
|
214
|
+
if (piece.empty()) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
ss << piece;
|
|
133
219
|
}
|
|
134
220
|
|
|
135
221
|
return Napi::String::New(info.Env(), ss.str());
|
|
136
222
|
}
|
|
223
|
+
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
224
|
+
return Napi::Number::From(info.Env(), llama_token_bos(ctx));
|
|
225
|
+
}
|
|
226
|
+
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
227
|
+
return Napi::Number::From(info.Env(), llama_token_eos(ctx));
|
|
228
|
+
}
|
|
229
|
+
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
230
|
+
return Napi::Number::From(info.Env(), llama_token_nl(ctx));
|
|
231
|
+
}
|
|
232
|
+
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
233
|
+
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
|
|
234
|
+
}
|
|
235
|
+
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
236
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
237
|
+
std::stringstream ss;
|
|
238
|
+
|
|
239
|
+
const char* str = llama_token_get_text(ctx, token);
|
|
240
|
+
if (str == nullptr) {
|
|
241
|
+
return info.Env().Undefined();
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
ss << str;
|
|
245
|
+
|
|
246
|
+
return Napi::String::New(info.Env(), ss.str());
|
|
247
|
+
}
|
|
137
248
|
Napi::Value Eval(const Napi::CallbackInfo& info);
|
|
138
249
|
static void init(Napi::Object exports) {
|
|
139
250
|
exports.Set("LLAMAContext",
|
|
@@ -142,6 +253,11 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
142
253
|
{
|
|
143
254
|
InstanceMethod("encode", &LLAMAContext::Encode),
|
|
144
255
|
InstanceMethod("decode", &LLAMAContext::Decode),
|
|
256
|
+
InstanceMethod("tokenBos", &LLAMAContext::TokenBos),
|
|
257
|
+
InstanceMethod("tokenEos", &LLAMAContext::TokenEos),
|
|
258
|
+
InstanceMethod("tokenNl", &LLAMAContext::TokenNl),
|
|
259
|
+
InstanceMethod("getContextSize", &LLAMAContext::GetContextSize),
|
|
260
|
+
InstanceMethod("getTokenString", &LLAMAContext::GetTokenString),
|
|
145
261
|
InstanceMethod("eval", &LLAMAContext::Eval),
|
|
146
262
|
}));
|
|
147
263
|
}
|
|
@@ -151,7 +267,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
151
267
|
class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
152
268
|
LLAMAContext* ctx;
|
|
153
269
|
std::vector<llama_token> tokens;
|
|
154
|
-
std::vector<llama_token> restriction;
|
|
155
270
|
llama_token result;
|
|
156
271
|
|
|
157
272
|
public:
|
|
@@ -160,13 +275,6 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
|
160
275
|
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
|
|
161
276
|
this->tokens.reserve(tokens.ElementLength());
|
|
162
277
|
for (size_t i = 0; i < tokens.ElementLength(); i++) { this->tokens.push_back(static_cast<llama_token>(tokens[i])); }
|
|
163
|
-
|
|
164
|
-
if (info.Length() > 1 && info[1].IsTypedArray()) {
|
|
165
|
-
Napi::Uint32Array restriction = info[1].As<Napi::Uint32Array>();
|
|
166
|
-
this->restriction.reserve(restriction.ElementLength());
|
|
167
|
-
for (size_t i = 0; i < restriction.ElementLength(); i++) { this->restriction.push_back(static_cast<llama_token>(restriction[i])); }
|
|
168
|
-
std::sort(this->restriction.begin(), this->restriction.end());
|
|
169
|
-
}
|
|
170
278
|
}
|
|
171
279
|
~LLAMAContextEvalWorker() { ctx->Unref(); }
|
|
172
280
|
using Napi::AsyncWorker::Queue;
|
|
@@ -175,39 +283,72 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
|
175
283
|
protected:
|
|
176
284
|
void Execute() {
|
|
177
285
|
// Perform the evaluation using llama_eval.
|
|
178
|
-
int r = llama_eval(ctx->ctx, tokens.data(), tokens.size(), llama_get_kv_cache_token_count(ctx->ctx), 6);
|
|
286
|
+
int r = llama_eval(ctx->ctx, tokens.data(), int(tokens.size()), llama_get_kv_cache_token_count(ctx->ctx), 6);
|
|
179
287
|
if (r != 0) {
|
|
180
288
|
SetError("Eval has failed");
|
|
181
289
|
return;
|
|
182
290
|
}
|
|
183
291
|
|
|
292
|
+
llama_token new_token_id = 0;
|
|
293
|
+
|
|
184
294
|
// Select the best prediction.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
295
|
+
auto logits = llama_get_logits(ctx->ctx);
|
|
296
|
+
auto n_vocab = llama_n_vocab(ctx->ctx);
|
|
297
|
+
|
|
298
|
+
std::vector<llama_token_data> candidates;
|
|
299
|
+
candidates.reserve(n_vocab);
|
|
300
|
+
|
|
301
|
+
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
302
|
+
candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
306
|
+
|
|
307
|
+
float originalEosLogit = 0;
|
|
308
|
+
auto eos_token = llama_token_eos(ctx->ctx);
|
|
309
|
+
|
|
310
|
+
for (auto& candidate : candidates) {
|
|
311
|
+
if (candidate.id == eos_token) {
|
|
312
|
+
originalEosLogit = candidate.logit;
|
|
313
|
+
break;
|
|
197
314
|
}
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
if (ctx->use_grammar) {
|
|
318
|
+
llama_sample_grammar(ctx->ctx, &candidates_p, (ctx->grammar)->grammar);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
for (auto& candidate : candidates) {
|
|
322
|
+
if (candidate.id == eos_token) {
|
|
323
|
+
candidate.logit = originalEosLogit;
|
|
324
|
+
break;
|
|
208
325
|
}
|
|
209
326
|
}
|
|
210
|
-
|
|
327
|
+
|
|
328
|
+
if ((ctx->model)->temperature <= 0) {
|
|
329
|
+
new_token_id = llama_sample_token_greedy(ctx->ctx , &candidates_p);
|
|
330
|
+
} else {
|
|
331
|
+
const int32_t top_k = (ctx->model)->top_k <= 0 ? llama_n_vocab(ctx->ctx) : (ctx->model)->top_k;
|
|
332
|
+
const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
|
|
333
|
+
const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
|
|
334
|
+
const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
|
|
335
|
+
const float top_p = (ctx->model)->top_p; // Top p sampling - 1.0 = disabled
|
|
336
|
+
|
|
337
|
+
// Temperature sampling
|
|
338
|
+
size_t min_keep = std::max(1, n_probs);
|
|
339
|
+
llama_sample_top_k(ctx->ctx, &candidates_p, top_k, min_keep);
|
|
340
|
+
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
341
|
+
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
342
|
+
llama_sample_top_p(ctx->ctx, &candidates_p, top_p, min_keep);
|
|
343
|
+
llama_sample_temperature(ctx->ctx, &candidates_p, (ctx->model)->temperature);;
|
|
344
|
+
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (new_token_id != eos_token && ctx->use_grammar) {
|
|
348
|
+
llama_grammar_accept_token(ctx->ctx, (ctx->grammar)->grammar, new_token_id);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
result = new_token_id;
|
|
211
352
|
}
|
|
212
353
|
void OnOK() {
|
|
213
354
|
Napi::Env env = Napi::AsyncWorker::Env();
|
|
@@ -223,18 +364,15 @@ Napi::Value LLAMAContext::Eval(const Napi::CallbackInfo& info) {
|
|
|
223
364
|
return worker->Promise();
|
|
224
365
|
}
|
|
225
366
|
|
|
226
|
-
Napi::Value tokenBos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_bos()); }
|
|
227
|
-
Napi::Value tokenEos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_eos()); }
|
|
228
367
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) { return Napi::String::From(info.Env(), llama_print_system_info()); }
|
|
229
368
|
|
|
230
369
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
231
370
|
llama_backend_init(false);
|
|
232
371
|
exports.DefineProperties({
|
|
233
|
-
Napi::PropertyDescriptor::Function("tokenBos", tokenBos),
|
|
234
|
-
Napi::PropertyDescriptor::Function("tokenEos", tokenEos),
|
|
235
372
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
236
373
|
});
|
|
237
374
|
LLAMAModel::init(exports);
|
|
375
|
+
LLAMAGrammar::init(exports);
|
|
238
376
|
LLAMAContext::init(exports);
|
|
239
377
|
return exports;
|
|
240
378
|
}
|
package/llama/binding.gyp
CHANGED
|
@@ -7,15 +7,18 @@
|
|
|
7
7
|
"llama.cpp/ggml.c",
|
|
8
8
|
"llama.cpp/ggml-alloc.c",
|
|
9
9
|
"llama.cpp/k_quants.c",
|
|
10
|
-
"llama.cpp/llama.cpp"
|
|
10
|
+
"llama.cpp/llama.cpp",
|
|
11
|
+
"llama.cpp/common/common.cpp",
|
|
12
|
+
"llama.cpp/common/grammar-parser.cpp"
|
|
11
13
|
],
|
|
12
14
|
"include_dirs": [
|
|
13
15
|
"<!@(node -p \"require('node-addon-api').include\")",
|
|
14
|
-
"llama.cpp"
|
|
16
|
+
"llama.cpp",
|
|
17
|
+
"llama.cpp/common"
|
|
15
18
|
],
|
|
16
19
|
"cflags": ["-fexceptions"],
|
|
17
20
|
"cflags_cc": ["-fexceptions"],
|
|
18
|
-
"defines": [
|
|
21
|
+
"defines": ["GGML_USE_K_QUANTS", "NAPI_CPP_EXCEPTIONS"],
|
|
19
22
|
"msvs_settings": {
|
|
20
23
|
"VCCLCompilerTool": { "AdditionalOptions": [ '/arch:AVX2', '/EHsc' ] }
|
|
21
24
|
}
|