node-llama-cpp 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +1 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +2 -1
- package/dist/cli/commands/DownloadCommand.js +12 -2
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/config.d.ts +1 -0
- package/dist/config.js +3 -1
- package/dist/config.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +1 -1
- package/dist/llamaEvaluator/LlamaContext.js +5 -5
- package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +15 -3
- package/dist/llamaEvaluator/LlamaModel.js +3 -3
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/dist/utils/binariesGithubRelease.d.ts +6 -0
- package/dist/utils/binariesGithubRelease.js +15 -0
- package/dist/utils/binariesGithubRelease.js.map +1 -0
- package/dist/utils/getBin.d.ts +4 -3
- package/llama/addon.cpp +44 -41
- package/llama/binariesGithubRelease.json +3 -0
- package/llamaBins/linux-arm64-16.node +0 -0
- package/llamaBins/linux-arm64-17.node +0 -0
- package/llamaBins/linux-arm64-18.node +0 -0
- package/llamaBins/linux-arm64-19.node +0 -0
- package/llamaBins/linux-arm64-20.node +0 -0
- package/llamaBins/linux-armv7l-16.node +0 -0
- package/llamaBins/linux-armv7l-17.node +0 -0
- package/llamaBins/linux-armv7l-18.node +0 -0
- package/llamaBins/linux-armv7l-19.node +0 -0
- package/llamaBins/linux-armv7l-20.node +0 -0
- package/llamaBins/linux-ppc64le-16.node +0 -0
- package/llamaBins/linux-ppc64le-17.node +0 -0
- package/llamaBins/linux-ppc64le-18.node +0 -0
- package/llamaBins/linux-ppc64le-19.node +0 -0
- package/llamaBins/linux-ppc64le-20.node +0 -0
- package/llamaBins/linux-x64-16.node +0 -0
- package/llamaBins/linux-x64-17.node +0 -0
- package/llamaBins/linux-x64-18.node +0 -0
- package/llamaBins/linux-x64-19.node +0 -0
- package/llamaBins/linux-x64-20.node +0 -0
- package/llamaBins/mac-arm64-16.node +0 -0
- package/llamaBins/mac-arm64-17.node +0 -0
- package/llamaBins/mac-arm64-18.node +0 -0
- package/llamaBins/mac-arm64-19.node +0 -0
- package/llamaBins/mac-arm64-20.node +0 -0
- package/llamaBins/mac-x64-16.node +0 -0
- package/llamaBins/mac-x64-17.node +0 -0
- package/llamaBins/mac-x64-18.node +0 -0
- package/llamaBins/mac-x64-19.node +0 -0
- package/llamaBins/mac-x64-20.node +0 -0
- package/llamaBins/win-x64-16.node +0 -0
- package/llamaBins/win-x64-17.node +0 -0
- package/llamaBins/win-x64-18.node +0 -0
- package/llamaBins/win-x64-19.node +0 -0
- package/llamaBins/win-x64-20.node +0 -0
- package/package.json +3 -4
package/README.md
CHANGED
|
@@ -25,12 +25,12 @@ To disable this behavior set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNL
|
|
|
25
25
|
```typescript
|
|
26
26
|
import {fileURLToPath} from "url";
|
|
27
27
|
import path from "path";
|
|
28
|
-
import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
|
|
28
|
+
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
|
|
29
29
|
|
|
30
30
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
31
31
|
|
|
32
32
|
const model = new LlamaModel({
|
|
33
|
-
modelPath: path.join(__dirname, "models", "
|
|
33
|
+
modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
|
|
34
34
|
});
|
|
35
35
|
const context = new LlamaContext({model});
|
|
36
36
|
const session = new LlamaChatSession({context});
|
|
@@ -54,7 +54,7 @@ console.log("AI: " + a2);
|
|
|
54
54
|
```typescript
|
|
55
55
|
import {fileURLToPath} from "url";
|
|
56
56
|
import path from "path";
|
|
57
|
-
import {LlamaModel, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
|
|
57
|
+
import {LlamaModel, LlamaContext, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
|
|
58
58
|
|
|
59
59
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
60
60
|
|
|
@@ -73,7 +73,7 @@ export class MyCustomChatPromptWrapper extends ChatPromptWrapper {
|
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
const model = new LlamaModel({
|
|
76
|
-
modelPath: path.join(__dirname, "models", "
|
|
76
|
+
modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf"),
|
|
77
77
|
promptWrapper: new MyCustomChatPromptWrapper() // by default, LlamaChatPromptWrapper is used
|
|
78
78
|
})
|
|
79
79
|
const context = new LlamaContext({model});
|
|
@@ -98,12 +98,12 @@ console.log("AI: " + a2);
|
|
|
98
98
|
```typescript
|
|
99
99
|
import {fileURLToPath} from "url";
|
|
100
100
|
import path from "path";
|
|
101
|
-
import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
|
|
101
|
+
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
|
|
102
102
|
|
|
103
103
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
104
104
|
|
|
105
105
|
const model = new LlamaModel({
|
|
106
|
-
modelPath: path.join(__dirname, "models", "
|
|
106
|
+
modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
|
|
107
107
|
});
|
|
108
108
|
|
|
109
109
|
const context = new LlamaContext({model});
|
|
@@ -2,7 +2,7 @@ import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
|
|
|
2
2
|
// source: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
|
|
3
3
|
export class LlamaChatPromptWrapper extends ChatPromptWrapper {
|
|
4
4
|
wrapPrompt(prompt, { systemPrompt, promptIndex }) {
|
|
5
|
-
if (promptIndex === 0) {
|
|
5
|
+
if (promptIndex === 0 && systemPrompt != "") {
|
|
6
6
|
return "<s>[INST] <<SYS>>\n" + systemPrompt + "\n<</SYS>>\n\n" + prompt + " [/INST]\n\n";
|
|
7
7
|
}
|
|
8
8
|
else {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaChatPromptWrapper.js","sourceRoot":"","sources":["../../src/chatWrappers/LlamaChatPromptWrapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAE1D,mEAAmE;AACnE,MAAM,OAAO,sBAAuB,SAAQ,iBAAiB;IACzC,UAAU,CAAC,MAAc,EAAE,EAAC,YAAY,EAAE,WAAW,EAA8C;QAC/G,IAAI,WAAW,KAAK,CAAC,EAAE;
|
|
1
|
+
{"version":3,"file":"LlamaChatPromptWrapper.js","sourceRoot":"","sources":["../../src/chatWrappers/LlamaChatPromptWrapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAE1D,mEAAmE;AACnE,MAAM,OAAO,sBAAuB,SAAQ,iBAAiB;IACzC,UAAU,CAAC,MAAc,EAAE,EAAC,YAAY,EAAE,WAAW,EAA8C;QAC/G,IAAI,WAAW,KAAK,CAAC,IAAI,YAAY,IAAI,EAAE,EAAE;YACzC,OAAO,qBAAqB,GAAG,YAAY,GAAG,gBAAgB,GAAG,MAAM,GAAG,cAAc,CAAC;SAC5F;aAAM;YACH,OAAO,YAAY,GAAG,MAAM,GAAG,cAAc,CAAC;SACjD;IACL,CAAC;IAEe,cAAc;QAC1B,OAAO,CAAC,eAAe,CAAC,CAAC;IAC7B,CAAC;CACJ"}
|
|
@@ -5,7 +5,8 @@ type DownloadCommandArgs = {
|
|
|
5
5
|
arch?: string;
|
|
6
6
|
nodeTarget?: string;
|
|
7
7
|
skipBuild?: boolean;
|
|
8
|
+
updateBinariesReleaseMetadata?: boolean;
|
|
8
9
|
};
|
|
9
10
|
export declare const DownloadCommand: CommandModule<object, DownloadCommandArgs>;
|
|
10
|
-
export declare function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget, skipBuild }: DownloadCommandArgs): Promise<void>;
|
|
11
|
+
export declare function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget, skipBuild, updateBinariesReleaseMetadata }: DownloadCommandArgs): Promise<void>;
|
|
11
12
|
export {};
|
|
@@ -10,6 +10,7 @@ import { defaultLlamaCppGitHubRepo, defaultLlamaCppRelease, llamaCppDirectory, t
|
|
|
10
10
|
import { compileLlamaCpp } from "../../utils/compileLLamaCpp.js";
|
|
11
11
|
import withOra from "../../utils/withOra.js";
|
|
12
12
|
import { clearTempFolder } from "../../utils/clearTempFolder.js";
|
|
13
|
+
import { setBinariesGithubRelease } from "../../utils/binariesGithubRelease.js";
|
|
13
14
|
export const DownloadCommand = {
|
|
14
15
|
command: "download",
|
|
15
16
|
describe: "Download a release of llama.cpp and compile it",
|
|
@@ -23,7 +24,7 @@ export const DownloadCommand = {
|
|
|
23
24
|
.option("release", {
|
|
24
25
|
type: "string",
|
|
25
26
|
default: defaultLlamaCppRelease,
|
|
26
|
-
description: "The tag of the llama.cpp release to download. Can also be set via the NODE_LLAMA_CPP_REPO_RELEASE environment variable"
|
|
27
|
+
description: "The tag of the llama.cpp release to download. Set to \"latest\" to download the latest release. Can also be set via the NODE_LLAMA_CPP_REPO_RELEASE environment variable"
|
|
27
28
|
})
|
|
28
29
|
.option("arch", {
|
|
29
30
|
type: "string",
|
|
@@ -37,11 +38,17 @@ export const DownloadCommand = {
|
|
|
37
38
|
type: "boolean",
|
|
38
39
|
default: false,
|
|
39
40
|
description: "Skip building llama.cpp after downloading it"
|
|
41
|
+
})
|
|
42
|
+
.option("updateBinariesReleaseMetadata", {
|
|
43
|
+
type: "boolean",
|
|
44
|
+
hidden: true,
|
|
45
|
+
default: false,
|
|
46
|
+
description: "Update the binariesGithubRelease.json file with the release of llama.cpp that was downloaded"
|
|
40
47
|
});
|
|
41
48
|
},
|
|
42
49
|
handler: DownloadLlamaCppCommand
|
|
43
50
|
};
|
|
44
|
-
export async function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget, skipBuild }) {
|
|
51
|
+
export async function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget, skipBuild, updateBinariesReleaseMetadata }) {
|
|
45
52
|
const octokit = new Octokit();
|
|
46
53
|
const [githubOwner, githubRepo] = repo.split("/");
|
|
47
54
|
console.log(`${chalk.yellow("Repo:")} ${repo}`);
|
|
@@ -120,6 +127,9 @@ export async function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget,
|
|
|
120
127
|
setUsedBingFlag: true
|
|
121
128
|
});
|
|
122
129
|
}
|
|
130
|
+
if (updateBinariesReleaseMetadata) {
|
|
131
|
+
await setBinariesGithubRelease(githubRelease.data.tag_name);
|
|
132
|
+
}
|
|
123
133
|
console.log();
|
|
124
134
|
console.log();
|
|
125
135
|
console.log(`${chalk.yellow("Repo:")} ${repo}`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DownloadCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/DownloadCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B,OAAO,EAAC,OAAO,EAAC,MAAM,SAAS,CAAC;AAChC,OAAO,KAAK,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,SAAS,MAAM,iBAAiB,CAAC;AACxC,OAAO,EAAC,yBAAyB,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,qBAAqB,EAAC,MAAM,iBAAiB,CAAC;AAC5H,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;
|
|
1
|
+
{"version":3,"file":"DownloadCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/DownloadCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAE7B,OAAO,EAAC,OAAO,EAAC,MAAM,SAAS,CAAC;AAChC,OAAO,KAAK,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,WAAW,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,SAAS,MAAM,iBAAiB,CAAC;AACxC,OAAO,EAAC,yBAAyB,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,qBAAqB,EAAC,MAAM,iBAAiB,CAAC;AAC5H,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAC,wBAAwB,EAAC,MAAM,sCAAsC,CAAC;AAW9E,MAAM,CAAC,MAAM,eAAe,GAA+C;IACvE,OAAO,EAAE,UAAU;IACnB,QAAQ,EAAE,gDAAgD;IAC1D,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,MAAM,CAAC,MAAM,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,yBAAyB;YAClC,WAAW,EAAE,iIAAiI;SACjJ,CAAC;aACD,MAAM,CAAC,SAAS,EAAE;YACf,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,sBAAsB;YAC/B,WAAW,EAAE,0KAA0K;SAC1L,CAAC;aACD,MAAM,CAAC,MAAM,EAAE;YACZ,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,2CAA2C;SAC3D,CAAC;aACD,MAAM,CAAC,YAAY,EAAE;YAClB,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,gEAAgE;SAChF,CAAC;aACD,MAAM,CAAC,WAAW,EAAE;YACjB,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,8CAA8C;SAC9D,CAAC;aACD,MAAM,CAAC,+BAA+B,EAAE;YACrC,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,IAAI;YACZ,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,8FAA8F;SAC9G,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,uBAAuB;CACnC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,EAAC,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,6BAA6B,EAAsB;IAC1I,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,CAAC,WAAW,EAAE,UAAU,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAElD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,EAAE,CAAC;IAKd,IAAI,aAAa,GAA6B,IAAI,CAAC;IACnD,IAAI,MAAc,CAAC;IACnB,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;QAC9C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC;QAC7C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,gCAAgC,CAAC;KACrD,EAAE,KAAK,IAAI,EAAE;QACV,IAAI;YACA,IAAI,OAAO,KAAK,QAAQ,EAAE;gBACtB,aAAa,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC;oBACtD,KAAK,EAAE,WAAW;oBAClB,IAAI,EAAE,UAAU;iBACnB,CAAC,CAAC;aACN;iBAAM;gBACH,aAAa,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC;oBACrD,KAAK,EAAE,WAAW;oBAClB,IAAI,EAAE,UAAU;oBAChB,GAAG,EAAE,OAAO;iBACf,CAAC,CAAC;aACN;SACJ;QAAC,OAAO,GAAG,EAAE;YACV,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,GAAG,CAAC,CAAC;SAChE;QAED,IAAI,aAAa,IAAI,IAAI,EAAE;YACvB,MAAM,IAAI,KAAK,CAAC,2BAA2B,OAAO,SAAS,IAAI,GAAG,CAAC,CAAC;SACvE;QAED,IAAI,aAAa,CAAC,IAAI,EAAE,WAAW,IAAI,IAAI,EAAE;YACzC,MAAM,IAAI,KAAK,CAAC,6CAA6C,OAAO,SAAS,IAAI,GAAG,CAAC,CAAC;SACzF;QAED,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC;YACnE,KAAK,EAAE,WAAW;YAClB,IAAI,EAAE,UAAU;YAChB,GAAG,EAAE,aAAc,CAAC,IAAI,CAAC,gBAAgB;SAC5C,CAAC,CAAC;QAEH,IAAI,cAAc,CAAC,GAAG,IAAI,IAAI;YAC1B,MAAM,IAAI,KAAK,CAAC,8CAA8C,OAAO,SAAS,IAAI,GAAG,CAAC,CAAC;QAE3F,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,MAAM,eAAe,EAAE,CAAC;IAGxB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC,CAAC;IAChD,MAAM,EAAE,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAC1C,MAAM,YAAY,CAAC,MAAO,EAAE,eAAe,EAAE,qBAAqB,CAAC,CAAC;IAEpE,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,uCAAuC,CAAC;QAC5D,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,sCAAsC,CAAC;QAC3D,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;KACpE,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC;QACpD,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,8BAA8B,CAAC;QACnD,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,sCAAsC,CAAC;KAC3D,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,wBAAwB,CAAC,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,eAAe,CAAC,EAAE,iBAAiB,CAAC,CAAC;IACzG,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;QAC/C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;QAC9C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC;KACvD,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,eAAe,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,SAAS,EAAE;QACZ,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC;QAC/C,MAAM,eAAe,CAAC;YAClB,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC7B,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;YAC/C,eAAe,EAAE,IAAI;SACxB,CAAC,CAAC;KACN;IAED,IAAI,6BAA6B,EAAE;QAC/B,MAAM,wBAAwB,CAAC,aAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;KAChE;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;AACrC,CAAC;AAGD,KAAK,UAAU,YAAY,CAAC,GAAW,EAAE,QAAgB,EAAE,SAAiB;IACxE,MAAM,QAAQ,GAAG,IAAI,gBAAgB,CAAC,GAAG,EAAE,SAAS,EAAE;QAClD,QAAQ,EAAE,QAAQ;QAClB,KAAK,EAAE;YACH,UAAU,EAAE,EAAE;YACd,KAAK,EAAE,IAAI,GAAG,CAAC;SAClB;KACJ,CAAC,CAAC;IAEH,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC;QACpC,eAAe,EAAE,KAAK;QACtB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,IAAI;QACjB,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,KAAK,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE;KACzI,EAAE,WAAW,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;IACvC,WAAW,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE;QACtB,KAAK,EAAE,EAAE;QACT,QAAQ,EAAE,QAAQ;KACrB,CAAC,CAAC;IAEH,QAAQ,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,KAAK,EAAE,EAAE;QAC9B,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,GAAG,GAAG,EAAE;YAC3E,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;SACpH,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;QACpB,WAAW,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACxB,WAAW,CAAC,IAAI,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,4CAA4C;IAC5C,gEAAgE;IAChE,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAE/B,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC;AAC3B,CAAC;AAED,KAAK,UAAU,wBAAwB,CAAC,WAAmB,EAAE,SAAiB;IAC1E,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,KAAK,CAAC,EAAC,IAAI,EAAE,WAAW,EAAC,CAAC,CAAC;IAErD,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,OAAO,EAAE,CAAC;IACpC,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEpD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE;QACxC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,cAAc,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,oBAAoB,GAAG,iBAAiB,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QACxE,iBAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,oBAAoB,GAAG,CAAC,CAAC,CAAC;KACnE;IAED,MAAM,sBAAsB,GAAG,CAAC,GAAG,iBAAiB,CAAC,IAAI,EAAE,CAAC;SACvD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAE,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC;SACrE,KAAK,EAAE,CAAC;IAEb,IAAI,sBAAsB,IAAI,IAAI;QAC9B,MAAM,IAAI,KAAK,CAAC,kEAAkE,CAAC,CAAC;IAExF,MAAM,GAAG,CAAC,OAAO,CAAC,sBAAsB,EAAE,SAAS,CAAC,CAAC;AACzD,CAAC"}
|
package/dist/config.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ export declare const llamaBinsDirectory: string;
|
|
|
3
3
|
export declare const llamaCppDirectory: string;
|
|
4
4
|
export declare const tempDownloadDirectory: string;
|
|
5
5
|
export declare const usedBinFlagJsonPath: string;
|
|
6
|
+
export declare const binariesGithubReleasePath: string;
|
|
6
7
|
export declare const defaultLlamaCppGitHubRepo: string;
|
|
7
8
|
export declare const defaultLlamaCppRelease: string;
|
|
8
9
|
export declare const defaultSkipDownload: boolean;
|
package/dist/config.js
CHANGED
|
@@ -3,6 +3,7 @@ import * as path from "path";
|
|
|
3
3
|
import * as os from "os";
|
|
4
4
|
import envVar from "env-var";
|
|
5
5
|
import * as uuid from "uuid";
|
|
6
|
+
import { getBinariesGithubRelease } from "./utils/binariesGithubRelease.js";
|
|
6
7
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
8
|
const env = envVar.from(process.env);
|
|
8
9
|
export const llamaDirectory = path.join(__dirname, "..", "llama");
|
|
@@ -10,11 +11,12 @@ export const llamaBinsDirectory = path.join(__dirname, "..", "llamaBins");
|
|
|
10
11
|
export const llamaCppDirectory = path.join(llamaDirectory, "llama.cpp");
|
|
11
12
|
export const tempDownloadDirectory = path.join(os.tmpdir(), "node-llama-cpp", uuid.v4());
|
|
12
13
|
export const usedBinFlagJsonPath = path.join(llamaDirectory, "usedBin.json");
|
|
14
|
+
export const binariesGithubReleasePath = path.join(llamaDirectory, "binariesGithubRelease.json");
|
|
13
15
|
export const defaultLlamaCppGitHubRepo = env.get("NODE_LLAMA_CPP_REPO")
|
|
14
16
|
.default("ggerganov/llama.cpp")
|
|
15
17
|
.asString();
|
|
16
18
|
export const defaultLlamaCppRelease = env.get("NODE_LLAMA_CPP_REPO_RELEASE")
|
|
17
|
-
.default(
|
|
19
|
+
.default(await getBinariesGithubRelease())
|
|
18
20
|
.asString();
|
|
19
21
|
export const defaultSkipDownload = env.get("NODE_LLAMA_CPP_SKIP_DOWNLOAD")
|
|
20
22
|
.default("false")
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAC,wBAAwB,EAAC,MAAM,kCAAkC,CAAC;AAE1E,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;AAGrC,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AAClE,MAAM,CAAC,MAAM,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;AAC1E,MAAM,CAAC,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,WAAW,CAAC,CAAC;AACxE,MAAM,CAAC,MAAM,qBAAqB,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gBAAgB,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;AACzF,MAAM,CAAC,MAAM,mBAAmB,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC,CAAC;AAC7E,MAAM,CAAC,MAAM,yBAAyB,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,4BAA4B,CAAC,CAAC;AAEjG,MAAM,CAAC,MAAM,yBAAyB,GAAG,GAAG,CAAC,GAAG,CAAC,qBAAqB,CAAC;KAClE,OAAO,CAAC,qBAAqB,CAAC;KAC9B,QAAQ,EAAE,CAAC;AAChB,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACvE,OAAO,CAAC,MAAM,wBAAwB,EAAE,CAAC;KACzC,QAAQ,EAAE,CAAC;AAChB,MAAM,CAAC,MAAM,mBAAmB,GAAG,GAAG,CAAC,GAAG,CAAC,8BAA8B,CAAC;KACrE,OAAO,CAAC,OAAO,CAAC;KAChB,MAAM,EAAE,CAAC;AACd,MAAM,CAAC,MAAM,uBAAuB,GAAG,+FAA+F;IAClI,+HAA+H;IAC/H,mFAAmF,CAAC"}
|
|
@@ -8,5 +8,5 @@ export declare class LlamaContext {
|
|
|
8
8
|
});
|
|
9
9
|
encode(text: string): Uint32Array;
|
|
10
10
|
decode(tokens: Uint32Array): string;
|
|
11
|
-
evaluate(tokens: Uint32Array
|
|
11
|
+
evaluate(tokens: Uint32Array): AsyncGenerator<number, void, unknown>;
|
|
12
12
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LLAMAContext
|
|
1
|
+
import { LLAMAContext } from "./LlamaBins.js";
|
|
2
2
|
export class LlamaContext {
|
|
3
3
|
_ctx;
|
|
4
4
|
_prependBos;
|
|
@@ -12,20 +12,20 @@ export class LlamaContext {
|
|
|
12
12
|
decode(tokens) {
|
|
13
13
|
return this._ctx.decode(tokens);
|
|
14
14
|
}
|
|
15
|
-
async *evaluate(tokens
|
|
15
|
+
async *evaluate(tokens) {
|
|
16
16
|
let evalTokens = tokens;
|
|
17
17
|
if (this._prependBos) {
|
|
18
18
|
const tokenArray = Array.from(tokens);
|
|
19
|
-
tokenArray.unshift(
|
|
19
|
+
tokenArray.unshift(this._ctx.tokenBos());
|
|
20
20
|
evalTokens = Uint32Array.from(tokenArray);
|
|
21
21
|
this._prependBos = false;
|
|
22
22
|
}
|
|
23
23
|
// eslint-disable-next-line no-constant-condition
|
|
24
24
|
while (true) {
|
|
25
25
|
// Evaluate to get the next token.
|
|
26
|
-
const nextToken = await this._ctx.eval(evalTokens
|
|
26
|
+
const nextToken = await this._ctx.eval(evalTokens);
|
|
27
27
|
// the assistant finished answering
|
|
28
|
-
if (nextToken ===
|
|
28
|
+
if (nextToken === this._ctx.tokenEos())
|
|
29
29
|
break;
|
|
30
30
|
yield nextToken;
|
|
31
31
|
// Create tokens for the next eval.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,
|
|
1
|
+
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAG5C,MAAM,OAAO,YAAY;IACJ,IAAI,CAAe;IAC5B,WAAW,CAAU;IAE7B,YAAmB,EAAC,KAAK,EAAE,UAAU,GAAG,IAAI,EAA4C;QACpF,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAAmB;QAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAEM,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB;QACtC,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACtC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEzC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;SAC5B;QAED,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAEnD,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CAEJ"}
|
|
@@ -7,7 +7,7 @@ export declare class LlamaModel {
|
|
|
7
7
|
* @param {number | null} [options.seed] - If null, a random seed will be used
|
|
8
8
|
* @param {number} [options.contextSize] - text context size
|
|
9
9
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
10
|
-
* @param {number} [options.
|
|
10
|
+
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
11
11
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
12
12
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
13
13
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
@@ -16,18 +16,30 @@ export declare class LlamaModel {
|
|
|
16
16
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
17
17
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
18
18
|
*/
|
|
19
|
-
constructor({ modelPath, seed, contextSize, batchSize,
|
|
19
|
+
constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
|
|
20
|
+
/** path to the model on the filesystem */
|
|
20
21
|
modelPath: string;
|
|
22
|
+
/** If null, a random seed will be used */
|
|
21
23
|
seed?: number | null;
|
|
24
|
+
/** text context size */
|
|
22
25
|
contextSize?: number;
|
|
26
|
+
/** prompt processing batch size */
|
|
23
27
|
batchSize?: number;
|
|
24
|
-
|
|
28
|
+
/** number of layers to store in VRAM */
|
|
29
|
+
gpuLayers?: number;
|
|
30
|
+
/** if true, reduce VRAM usage at the cost of performance */
|
|
25
31
|
lowVram?: boolean;
|
|
32
|
+
/** use fp16 for KV cache */
|
|
26
33
|
f16Kv?: boolean;
|
|
34
|
+
/** the llama_eval() call computes all logits, not just the last one */
|
|
27
35
|
logitsAll?: boolean;
|
|
36
|
+
/** only load the vocabulary, no weights */
|
|
28
37
|
vocabOnly?: boolean;
|
|
38
|
+
/** use mmap if possible */
|
|
29
39
|
useMmap?: boolean;
|
|
40
|
+
/** force system to keep model in RAM */
|
|
30
41
|
useMlock?: boolean;
|
|
42
|
+
/** embedding mode only */
|
|
31
43
|
embedding?: boolean;
|
|
32
44
|
});
|
|
33
45
|
static get systemInfo(): string;
|
|
@@ -10,7 +10,7 @@ export class LlamaModel {
|
|
|
10
10
|
* @param {number | null} [options.seed] - If null, a random seed will be used
|
|
11
11
|
* @param {number} [options.contextSize] - text context size
|
|
12
12
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
13
|
-
* @param {number} [options.
|
|
13
|
+
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
14
14
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
15
15
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
16
16
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
@@ -19,12 +19,12 @@ export class LlamaModel {
|
|
|
19
19
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
20
20
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
21
21
|
*/
|
|
22
|
-
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize,
|
|
22
|
+
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
|
|
23
23
|
this._model = new LLAMAModel(modelPath, removeNullFields({
|
|
24
24
|
seed: seed != null ? Math.max(-1, seed) : undefined,
|
|
25
25
|
contextSize,
|
|
26
26
|
batchSize,
|
|
27
|
-
|
|
27
|
+
gpuLayers,
|
|
28
28
|
lowVram,
|
|
29
29
|
f16Kv,
|
|
30
30
|
logitsAll,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;OAgBG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,
|
|
1
|
+
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;OAgBG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EACpE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAqCrE;QACG,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC;YACrD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,OAAO;YACP,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;YACP,QAAQ;YACR,SAAS;SACZ,CAAC,CAAC,CAAC;IACR,CAAC;IAEM,MAAM,KAAK,UAAU;QACxB,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC;IACrC,CAAC;CACJ;AAED,SAAS,gBAAgB,CAAmB,GAAM;IAC9C,MAAM,MAAM,GAAM,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE;QACnB,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI;YACnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
import { binariesGithubReleasePath } from "../config.js";
|
|
3
|
+
export async function getBinariesGithubRelease() {
|
|
4
|
+
const binariesGithubRelease = await fs.readJson(binariesGithubReleasePath);
|
|
5
|
+
return binariesGithubRelease.release;
|
|
6
|
+
}
|
|
7
|
+
export async function setBinariesGithubRelease(release) {
|
|
8
|
+
const binariesGithubReleaseJson = {
|
|
9
|
+
release: release
|
|
10
|
+
};
|
|
11
|
+
await fs.writeJson(binariesGithubReleasePath, binariesGithubReleaseJson, {
|
|
12
|
+
spaces: 4
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=binariesGithubRelease.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"binariesGithubRelease.js","sourceRoot":"","sources":["../../src/utils/binariesGithubRelease.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,yBAAyB,EAAC,MAAM,cAAc,CAAC;AAMvD,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC1C,MAAM,qBAAqB,GAA8B,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC,CAAC;IAEtG,OAAO,qBAAqB,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,OAA6C;IACxF,MAAM,yBAAyB,GAA8B;QACzD,OAAO,EAAE,OAAO;KACnB,CAAC;IAEF,MAAM,EAAE,CAAC,SAAS,CAAC,yBAAyB,EAAE,yBAAyB,EAAE;QACrE,MAAM,EAAE,CAAC;KACZ,CAAC,CAAC;AACP,CAAC"}
|
package/dist/utils/getBin.d.ts
CHANGED
|
@@ -3,9 +3,7 @@ export declare function loadBin(): Promise<LlamaCppNodeModule>;
|
|
|
3
3
|
export type LlamaCppNodeModule = {
|
|
4
4
|
LLAMAModel: LLAMAModel;
|
|
5
5
|
LLAMAContext: LLAMAContext;
|
|
6
|
-
tokenBos(): number;
|
|
7
6
|
systemInfo(): string;
|
|
8
|
-
tokenEos(): number;
|
|
9
7
|
};
|
|
10
8
|
export type LLAMAModel = {
|
|
11
9
|
new (modelPath: string, params: {
|
|
@@ -25,6 +23,9 @@ export type LLAMAModel = {
|
|
|
25
23
|
export type LLAMAContext = {
|
|
26
24
|
new (model: LLAMAModel): LLAMAContext;
|
|
27
25
|
encode(text: string): Uint32Array;
|
|
28
|
-
eval(tokens: Uint32Array
|
|
26
|
+
eval(tokens: Uint32Array): Promise<number>;
|
|
29
27
|
decode(tokens: Uint32Array): string;
|
|
28
|
+
tokenBos(): number;
|
|
29
|
+
tokenEos(): number;
|
|
30
|
+
getMaxContextSize(): number;
|
|
30
31
|
};
|
package/llama/addon.cpp
CHANGED
|
@@ -34,8 +34,8 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
|
34
34
|
params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
if (options.Has("
|
|
38
|
-
params.n_gpu_layers = options.Get("
|
|
37
|
+
if (options.Has("gpuLayers")) {
|
|
38
|
+
params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
if (options.Has("lowVram")) {
|
|
@@ -67,6 +67,7 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
|
67
67
|
}
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
llama_backend_init(false);
|
|
70
71
|
model = llama_load_model_from_file(modelPath.c_str(), params);
|
|
71
72
|
|
|
72
73
|
if (model == NULL) {
|
|
@@ -124,7 +125,18 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
124
125
|
|
|
125
126
|
// Decode each token and accumulate the result.
|
|
126
127
|
for (size_t i = 0; i < tokens.ElementLength(); i++) {
|
|
127
|
-
|
|
128
|
+
// source: https://github.com/ggerganov/llama.cpp/blob/232caf3c1581a6cb023571780ff41dc2d66d1ca0/llama.cpp#L799-L811
|
|
129
|
+
std::vector<char> result(8, 0);
|
|
130
|
+
const int n_tokens = llama_token_to_str(ctx, (llama_token)tokens[i], result.data(), result.size());
|
|
131
|
+
if (n_tokens < 0) {
|
|
132
|
+
result.resize(-n_tokens);
|
|
133
|
+
int check = llama_token_to_str(ctx, (llama_token)tokens[i], result.data(), result.size());
|
|
134
|
+
GGML_ASSERT(check == -n_tokens);
|
|
135
|
+
} else {
|
|
136
|
+
result.resize(n_tokens);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const char* str = result.data();
|
|
128
140
|
if (str == nullptr) {
|
|
129
141
|
Napi::Error::New(info.Env(), "Invalid token").ThrowAsJavaScriptException();
|
|
130
142
|
return info.Env().Undefined();
|
|
@@ -134,6 +146,15 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
134
146
|
|
|
135
147
|
return Napi::String::New(info.Env(), ss.str());
|
|
136
148
|
}
|
|
149
|
+
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
150
|
+
return Napi::Number::From(info.Env(), llama_token_bos(ctx));
|
|
151
|
+
}
|
|
152
|
+
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
153
|
+
return Napi::Number::From(info.Env(), llama_token_eos(ctx));
|
|
154
|
+
}
|
|
155
|
+
Napi::Value GetMaxContextSize(const Napi::CallbackInfo& info) {
|
|
156
|
+
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
|
|
157
|
+
}
|
|
137
158
|
Napi::Value Eval(const Napi::CallbackInfo& info);
|
|
138
159
|
static void init(Napi::Object exports) {
|
|
139
160
|
exports.Set("LLAMAContext",
|
|
@@ -142,6 +163,9 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
142
163
|
{
|
|
143
164
|
InstanceMethod("encode", &LLAMAContext::Encode),
|
|
144
165
|
InstanceMethod("decode", &LLAMAContext::Decode),
|
|
166
|
+
InstanceMethod("tokenBos", &LLAMAContext::TokenBos),
|
|
167
|
+
InstanceMethod("tokenEos", &LLAMAContext::TokenEos),
|
|
168
|
+
InstanceMethod("getMaxContextSize", &LLAMAContext::GetMaxContextSize),
|
|
145
169
|
InstanceMethod("eval", &LLAMAContext::Eval),
|
|
146
170
|
}));
|
|
147
171
|
}
|
|
@@ -151,7 +175,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
151
175
|
class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
152
176
|
LLAMAContext* ctx;
|
|
153
177
|
std::vector<llama_token> tokens;
|
|
154
|
-
std::vector<llama_token> restriction;
|
|
155
178
|
llama_token result;
|
|
156
179
|
|
|
157
180
|
public:
|
|
@@ -160,13 +183,6 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
|
160
183
|
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
|
|
161
184
|
this->tokens.reserve(tokens.ElementLength());
|
|
162
185
|
for (size_t i = 0; i < tokens.ElementLength(); i++) { this->tokens.push_back(static_cast<llama_token>(tokens[i])); }
|
|
163
|
-
|
|
164
|
-
if (info.Length() > 1 && info[1].IsTypedArray()) {
|
|
165
|
-
Napi::Uint32Array restriction = info[1].As<Napi::Uint32Array>();
|
|
166
|
-
this->restriction.reserve(restriction.ElementLength());
|
|
167
|
-
for (size_t i = 0; i < restriction.ElementLength(); i++) { this->restriction.push_back(static_cast<llama_token>(restriction[i])); }
|
|
168
|
-
std::sort(this->restriction.begin(), this->restriction.end());
|
|
169
|
-
}
|
|
170
186
|
}
|
|
171
187
|
~LLAMAContextEvalWorker() { ctx->Unref(); }
|
|
172
188
|
using Napi::AsyncWorker::Queue;
|
|
@@ -175,39 +191,30 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
|
|
|
175
191
|
protected:
|
|
176
192
|
void Execute() {
|
|
177
193
|
// Perform the evaluation using llama_eval.
|
|
178
|
-
int r = llama_eval(ctx->ctx, tokens.data(), tokens.size(), llama_get_kv_cache_token_count(ctx->ctx), 6);
|
|
194
|
+
int r = llama_eval(ctx->ctx, tokens.data(), int(tokens.size()), llama_get_kv_cache_token_count(ctx->ctx), 6);
|
|
179
195
|
if (r != 0) {
|
|
180
196
|
SetError("Eval has failed");
|
|
181
197
|
return;
|
|
182
198
|
}
|
|
183
199
|
|
|
200
|
+
llama_token new_token_id = 0;
|
|
201
|
+
|
|
184
202
|
// Select the best prediction.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
if (logit > max) {
|
|
194
|
-
max = logit;
|
|
195
|
-
re = id;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
} else {
|
|
199
|
-
float max = logits[restriction[0]];
|
|
200
|
-
re = 0;
|
|
201
|
-
for (size_t i = 1; i < restriction.size(); i++) {
|
|
202
|
-
llama_token id = restriction[i];
|
|
203
|
-
float logit = logits[id];
|
|
204
|
-
if (logit > max) {
|
|
205
|
-
max = logit;
|
|
206
|
-
re = id;
|
|
207
|
-
}
|
|
208
|
-
}
|
|
203
|
+
auto logits = llama_get_logits(ctx->ctx);
|
|
204
|
+
auto n_vocab = llama_n_vocab(ctx->ctx);
|
|
205
|
+
|
|
206
|
+
std::vector<llama_token_data> candidates;
|
|
207
|
+
candidates.reserve(n_vocab);
|
|
208
|
+
|
|
209
|
+
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
210
|
+
candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
|
|
209
211
|
}
|
|
210
|
-
|
|
212
|
+
|
|
213
|
+
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
214
|
+
|
|
215
|
+
new_token_id = llama_sample_token_greedy(ctx->ctx , &candidates_p);
|
|
216
|
+
|
|
217
|
+
result = new_token_id;
|
|
211
218
|
}
|
|
212
219
|
void OnOK() {
|
|
213
220
|
Napi::Env env = Napi::AsyncWorker::Env();
|
|
@@ -223,15 +230,11 @@ Napi::Value LLAMAContext::Eval(const Napi::CallbackInfo& info) {
|
|
|
223
230
|
return worker->Promise();
|
|
224
231
|
}
|
|
225
232
|
|
|
226
|
-
Napi::Value tokenBos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_bos()); }
|
|
227
|
-
Napi::Value tokenEos(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_token_eos()); }
|
|
228
233
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) { return Napi::String::From(info.Env(), llama_print_system_info()); }
|
|
229
234
|
|
|
230
235
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
231
236
|
llama_backend_init(false);
|
|
232
237
|
exports.DefineProperties({
|
|
233
|
-
Napi::PropertyDescriptor::Function("tokenBos", tokenBos),
|
|
234
|
-
Napi::PropertyDescriptor::Function("tokenEos", tokenEos),
|
|
235
238
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
236
239
|
});
|
|
237
240
|
LLAMAModel::init(exports);
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "node.js bindings for llama.cpp",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -68,15 +68,14 @@
|
|
|
68
68
|
"node-gyp",
|
|
69
69
|
"prebuilt-binaries",
|
|
70
70
|
"llm",
|
|
71
|
-
"
|
|
72
|
-
"ggmlv3",
|
|
71
|
+
"gguf",
|
|
73
72
|
"raspberry-pi",
|
|
74
73
|
"self-hosted",
|
|
75
74
|
"local",
|
|
76
75
|
"catai"
|
|
77
76
|
],
|
|
78
77
|
"author": "Gilad S.",
|
|
79
|
-
"license": "
|
|
78
|
+
"license": "MIT",
|
|
80
79
|
"bugs": {
|
|
81
80
|
"url": "https://github.com/withcatai/node-llama-cpp/issues"
|
|
82
81
|
},
|