node-llama-cpp 1.3.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +100 -28
- package/dist/ChatPromptWrapper.d.ts +3 -0
- package/dist/ChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLPromptWrapper.d.ts +11 -0
- package/dist/chatWrappers/ChatMLPromptWrapper.js +19 -0
- package/dist/chatWrappers/ChatMLPromptWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.js +1 -0
- package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +11 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.js +28 -4
- package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +4 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +9 -5
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +1 -1
- package/dist/chatWrappers/createChatWrapperByBos.d.ts +2 -0
- package/dist/chatWrappers/createChatWrapperByBos.js +14 -0
- package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -0
- package/dist/cli/commands/BuildCommand.d.ts +3 -1
- package/dist/cli/commands/BuildCommand.js +24 -2
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +7 -1
- package/dist/cli/commands/ChatCommand.js +87 -12
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.js +1 -1
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +4 -1
- package/dist/cli/commands/DownloadCommand.js +71 -70
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +4 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +5 -0
- package/dist/config.js +11 -1
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
- package/dist/llamaEvaluator/LlamaBins.js +2 -2
- package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
- package/dist/llamaEvaluator/LlamaChatSession.d.ts +7 -2
- package/dist/llamaEvaluator/LlamaChatSession.js +51 -11
- package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +31 -2
- package/dist/llamaEvaluator/LlamaContext.js +74 -7
- package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +14 -0
- package/dist/llamaEvaluator/LlamaGrammar.js +30 -0
- package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -0
- package/dist/llamaEvaluator/LlamaModel.d.ts +49 -1
- package/dist/llamaEvaluator/LlamaModel.js +25 -9
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/binariesGithubRelease.d.ts +6 -0
- package/dist/utils/binariesGithubRelease.js +15 -0
- package/dist/utils/binariesGithubRelease.js.map +1 -0
- package/dist/utils/compileLLamaCpp.d.ts +3 -1
- package/dist/utils/compileLLamaCpp.js +34 -4
- package/dist/utils/compileLLamaCpp.js.map +1 -1
- package/dist/utils/getBin.d.ts +18 -4
- package/dist/utils/getBin.js +4 -2
- package/dist/utils/getBin.js.map +1 -1
- package/dist/utils/getGrammarsFolder.d.ts +1 -0
- package/dist/utils/getGrammarsFolder.js +18 -0
- package/dist/utils/getGrammarsFolder.js.map +1 -0
- package/dist/utils/getTextCompletion.d.ts +3 -0
- package/dist/utils/getTextCompletion.js +12 -0
- package/dist/utils/getTextCompletion.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +1 -0
- package/dist/utils/removeNullFields.js +9 -0
- package/dist/utils/removeNullFields.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +2 -1
- package/dist/utils/spawnCommand.js +2 -2
- package/dist/utils/spawnCommand.js.map +1 -1
- package/llama/addon.cpp +180 -42
- package/llama/binariesGithubRelease.json +3 -0
- package/llama/binding.gyp +6 -3
- package/llama/grammars/README.md +91 -0
- package/llama/grammars/arithmetic.gbnf +6 -0
- package/llama/grammars/chess.gbnf +13 -0
- package/llama/grammars/japanese.gbnf +7 -0
- package/llama/grammars/json.gbnf +25 -0
- package/llama/grammars/list.gbnf +4 -0
- package/llamaBins/linux-arm64-16.node +0 -0
- package/llamaBins/linux-arm64-17.node +0 -0
- package/llamaBins/linux-arm64-18.node +0 -0
- package/llamaBins/linux-arm64-19.node +0 -0
- package/llamaBins/linux-arm64-20.node +0 -0
- package/llamaBins/linux-armv7l-16.node +0 -0
- package/llamaBins/linux-armv7l-17.node +0 -0
- package/llamaBins/linux-armv7l-18.node +0 -0
- package/llamaBins/linux-armv7l-19.node +0 -0
- package/llamaBins/linux-armv7l-20.node +0 -0
- package/llamaBins/linux-ppc64le-16.node +0 -0
- package/llamaBins/linux-ppc64le-17.node +0 -0
- package/llamaBins/linux-ppc64le-18.node +0 -0
- package/llamaBins/linux-ppc64le-19.node +0 -0
- package/llamaBins/linux-ppc64le-20.node +0 -0
- package/llamaBins/linux-x64-16.node +0 -0
- package/llamaBins/linux-x64-17.node +0 -0
- package/llamaBins/linux-x64-18.node +0 -0
- package/llamaBins/linux-x64-19.node +0 -0
- package/llamaBins/linux-x64-20.node +0 -0
- package/llamaBins/mac-arm64-16.node +0 -0
- package/llamaBins/mac-arm64-17.node +0 -0
- package/llamaBins/mac-arm64-18.node +0 -0
- package/llamaBins/mac-arm64-19.node +0 -0
- package/llamaBins/mac-arm64-20.node +0 -0
- package/llamaBins/mac-x64-16.node +0 -0
- package/llamaBins/mac-x64-17.node +0 -0
- package/llamaBins/mac-x64-18.node +0 -0
- package/llamaBins/mac-x64-19.node +0 -0
- package/llamaBins/mac-x64-20.node +0 -0
- package/llamaBins/win-x64-16.node +0 -0
- package/llamaBins/win-x64-17.node +0 -0
- package/llamaBins/win-x64-18.node +0 -0
- package/llamaBins/win-x64-19.node +0 -0
- package/llamaBins/win-x64-20.node +0 -0
- package/package.json +12 -6
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type LLAMAModel, type LLAMAContext } from "../utils/getBin.js";
|
|
1
|
+
import { type LLAMAModel, type LLAMAContext, type LLAMAGrammar } from "../utils/getBin.js";
|
|
2
2
|
export declare const llamaCppNode: import("../utils/getBin.js").LlamaCppNodeModule;
|
|
3
|
-
declare const LLAMAModel: LLAMAModel, LLAMAContext: LLAMAContext;
|
|
4
|
-
export { LLAMAModel, LLAMAContext };
|
|
3
|
+
declare const LLAMAModel: LLAMAModel, LLAMAContext: LLAMAContext, LLAMAGrammar: LLAMAGrammar;
|
|
4
|
+
export { LLAMAModel, LLAMAContext, LLAMAGrammar };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { loadBin } from "../utils/getBin.js";
|
|
2
2
|
export const llamaCppNode = await loadBin();
|
|
3
|
-
const { LLAMAModel, LLAMAContext } = llamaCppNode;
|
|
4
|
-
export { LLAMAModel, LLAMAContext };
|
|
3
|
+
const { LLAMAModel, LLAMAContext, LLAMAGrammar } = llamaCppNode;
|
|
4
|
+
export { LLAMAModel, LLAMAContext, LLAMAGrammar };
|
|
5
5
|
//# sourceMappingURL=LlamaBins.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaBins.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaBins.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,
|
|
1
|
+
{"version":3,"file":"LlamaBins.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaBins.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,EAAwD,MAAM,oBAAoB,CAAC;AAElG,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,OAAO,EAAE,CAAC;AAC5C,MAAM,EAAC,UAAU,EAAE,YAAY,EAAE,YAAY,EAAC,GAAG,YAAY,CAAC;AAE9D,OAAO,EAAC,UAAU,EAAE,YAAY,EAAE,YAAY,EAAC,CAAC"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
|
|
3
|
+
import { Token } from "../types.js";
|
|
3
4
|
import { LlamaContext } from "./LlamaContext.js";
|
|
4
5
|
export declare class LlamaChatSession {
|
|
5
6
|
private readonly _systemPrompt;
|
|
@@ -7,18 +8,22 @@ export declare class LlamaChatSession {
|
|
|
7
8
|
private readonly _promptWrapper;
|
|
8
9
|
private _promptIndex;
|
|
9
10
|
private _initialized;
|
|
11
|
+
private _lastStopString;
|
|
12
|
+
private _lastStopStringSuffix;
|
|
10
13
|
private readonly _ctx;
|
|
11
14
|
constructor({ context, printLLamaSystemInfo, promptWrapper, systemPrompt }: {
|
|
12
15
|
context: LlamaContext;
|
|
13
16
|
printLLamaSystemInfo?: boolean;
|
|
14
|
-
promptWrapper?: ChatPromptWrapper;
|
|
17
|
+
promptWrapper?: ChatPromptWrapper | "auto";
|
|
15
18
|
systemPrompt?: string;
|
|
16
19
|
});
|
|
17
20
|
get initialized(): boolean;
|
|
18
21
|
get context(): LlamaContext;
|
|
19
22
|
init(): Promise<void>;
|
|
20
|
-
prompt(prompt: string, onToken
|
|
23
|
+
prompt(prompt: string, { onToken, signal, maxTokens }?: {
|
|
24
|
+
onToken?(tokens: Token[]): void;
|
|
21
25
|
signal?: AbortSignal;
|
|
26
|
+
maxTokens?: number;
|
|
22
27
|
}): Promise<string>;
|
|
23
28
|
private _evalTokens;
|
|
24
29
|
private _checkStopString;
|
|
@@ -2,6 +2,7 @@ import { defaultChatSystemPrompt } from "../config.js";
|
|
|
2
2
|
import { withLock } from "../utils/withLock.js";
|
|
3
3
|
import { AbortError } from "../AbortError.js";
|
|
4
4
|
import { GeneralChatPromptWrapper } from "../chatWrappers/GeneralChatPromptWrapper.js";
|
|
5
|
+
import { getChatWrapperByBos } from "../chatWrappers/createChatWrapperByBos.js";
|
|
5
6
|
import { LlamaModel } from "./LlamaModel.js";
|
|
6
7
|
const UNKNOWN_UNICODE_CHAR = "\ufffd";
|
|
7
8
|
export class LlamaChatSession {
|
|
@@ -10,12 +11,22 @@ export class LlamaChatSession {
|
|
|
10
11
|
_promptWrapper;
|
|
11
12
|
_promptIndex = 0;
|
|
12
13
|
_initialized = false;
|
|
14
|
+
_lastStopString = null;
|
|
15
|
+
_lastStopStringSuffix = null;
|
|
13
16
|
_ctx;
|
|
14
17
|
constructor({ context, printLLamaSystemInfo = false, promptWrapper = new GeneralChatPromptWrapper(), systemPrompt = defaultChatSystemPrompt }) {
|
|
15
18
|
this._ctx = context;
|
|
16
19
|
this._printLLamaSystemInfo = printLLamaSystemInfo;
|
|
17
|
-
this._promptWrapper = promptWrapper;
|
|
18
20
|
this._systemPrompt = systemPrompt;
|
|
21
|
+
if (promptWrapper === "auto") {
|
|
22
|
+
const chatWrapper = getChatWrapperByBos(context.getBosString());
|
|
23
|
+
if (chatWrapper != null)
|
|
24
|
+
this._promptWrapper = new chatWrapper();
|
|
25
|
+
else
|
|
26
|
+
this._promptWrapper = new GeneralChatPromptWrapper();
|
|
27
|
+
}
|
|
28
|
+
else
|
|
29
|
+
this._promptWrapper = promptWrapper;
|
|
19
30
|
}
|
|
20
31
|
get initialized() {
|
|
21
32
|
return this._initialized;
|
|
@@ -32,17 +43,30 @@ export class LlamaChatSession {
|
|
|
32
43
|
this._initialized = true;
|
|
33
44
|
});
|
|
34
45
|
}
|
|
35
|
-
async prompt(prompt, onToken,
|
|
46
|
+
async prompt(prompt, { onToken, signal, maxTokens } = {}) {
|
|
36
47
|
if (!this.initialized)
|
|
37
48
|
await this.init();
|
|
38
49
|
return await withLock(this, "prompt", async () => {
|
|
39
|
-
const promptText = this._promptWrapper.wrapPrompt(prompt, {
|
|
50
|
+
const promptText = this._promptWrapper.wrapPrompt(prompt, {
|
|
51
|
+
systemPrompt: this._systemPrompt,
|
|
52
|
+
promptIndex: this._promptIndex,
|
|
53
|
+
lastStopString: this._lastStopString,
|
|
54
|
+
lastStopStringSuffix: this._promptIndex == 0
|
|
55
|
+
? (this._ctx.prependBos
|
|
56
|
+
? this._ctx.getBosString()
|
|
57
|
+
: null)
|
|
58
|
+
: this._lastStopStringSuffix
|
|
59
|
+
});
|
|
40
60
|
this._promptIndex++;
|
|
41
|
-
|
|
61
|
+
this._lastStopString = null;
|
|
62
|
+
this._lastStopStringSuffix = null;
|
|
63
|
+
const { text, stopString, stopStringSuffix } = await this._evalTokens(this._ctx.encode(promptText), { onToken, signal, maxTokens });
|
|
64
|
+
this._lastStopString = stopString;
|
|
65
|
+
this._lastStopStringSuffix = stopStringSuffix;
|
|
66
|
+
return text;
|
|
42
67
|
});
|
|
43
68
|
}
|
|
44
|
-
async _evalTokens(tokens, onToken,
|
|
45
|
-
const decodeTokens = (tokens) => this._ctx.decode(Uint32Array.from(tokens));
|
|
69
|
+
async _evalTokens(tokens, { onToken, signal, maxTokens } = {}) {
|
|
46
70
|
const stopStrings = this._promptWrapper.getStopStrings();
|
|
47
71
|
const stopStringIndexes = Array(stopStrings.length).fill(0);
|
|
48
72
|
const skippedChunksQueue = [];
|
|
@@ -50,10 +74,14 @@ export class LlamaChatSession {
|
|
|
50
74
|
for await (const chunk of this._ctx.evaluate(tokens)) {
|
|
51
75
|
if (signal?.aborted)
|
|
52
76
|
throw new AbortError();
|
|
53
|
-
const tokenStr =
|
|
54
|
-
const { shouldReturn, skipTokenEvent } = this._checkStopString(tokenStr, stopStringIndexes);
|
|
77
|
+
const tokenStr = this._ctx.decode(Uint32Array.from([chunk]));
|
|
78
|
+
const { shouldReturn, skipTokenEvent, stopString, stopStringSuffix } = this._checkStopString(tokenStr, stopStringIndexes);
|
|
55
79
|
if (shouldReturn)
|
|
56
|
-
return
|
|
80
|
+
return {
|
|
81
|
+
text: this._ctx.decode(Uint32Array.from(res)),
|
|
82
|
+
stopString,
|
|
83
|
+
stopStringSuffix
|
|
84
|
+
};
|
|
57
85
|
// if the token is unknown, it means it's not complete character
|
|
58
86
|
if (tokenStr === UNKNOWN_UNICODE_CHAR || skipTokenEvent) {
|
|
59
87
|
skippedChunksQueue.push(chunk);
|
|
@@ -66,8 +94,14 @@ export class LlamaChatSession {
|
|
|
66
94
|
}
|
|
67
95
|
res.push(chunk);
|
|
68
96
|
onToken?.([chunk]);
|
|
97
|
+
if (maxTokens != null && maxTokens > 0 && res.length >= maxTokens)
|
|
98
|
+
break;
|
|
69
99
|
}
|
|
70
|
-
return
|
|
100
|
+
return {
|
|
101
|
+
text: this._ctx.decode(Uint32Array.from(res)),
|
|
102
|
+
stopString: null,
|
|
103
|
+
stopStringSuffix: null
|
|
104
|
+
};
|
|
71
105
|
}
|
|
72
106
|
_checkStopString(tokenStr, stopStringIndexes) {
|
|
73
107
|
const stopStrings = this._promptWrapper.getStopStrings();
|
|
@@ -87,7 +121,13 @@ export class LlamaChatSession {
|
|
|
87
121
|
}
|
|
88
122
|
}
|
|
89
123
|
if (stopStringIndexes[stopStringIndex] === stopString.length) {
|
|
90
|
-
return {
|
|
124
|
+
return {
|
|
125
|
+
shouldReturn: true,
|
|
126
|
+
stopString,
|
|
127
|
+
stopStringSuffix: tokenStr.length === stopString.length
|
|
128
|
+
? null
|
|
129
|
+
: tokenStr.slice(stopString.length)
|
|
130
|
+
};
|
|
91
131
|
}
|
|
92
132
|
skipTokenEvent ||= localShouldSkipTokenEvent;
|
|
93
133
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACR,aAAa,CAAS;IACtB,qBAAqB,CAAU;IAC/B,cAAc,CAAoB;IAC3C,YAAY,GAAW,CAAC,CAAC;IACzB,YAAY,GAAY,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,mBAAmB,EAAC,MAAM,2CAA2C,CAAC;AAE9E,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACR,aAAa,CAAS;IACtB,qBAAqB,CAAU;IAC/B,cAAc,CAAoB;IAC3C,YAAY,GAAW,CAAC,CAAC;IACzB,YAAY,GAAY,KAAK,CAAC;IAC9B,eAAe,GAAkB,IAAI,CAAC;IACtC,qBAAqB,GAAkB,IAAI,CAAC;IACnC,IAAI,CAAe;IAEpC,YAAmB,EACf,OAAO,EACP,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,IAAI,wBAAwB,EAAE,EAC9C,YAAY,GAAG,uBAAuB,EAMzC;QACG,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;QACpB,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAElC,IAAI,aAAa,KAAK,MAAM,EAAE;YAC1B,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;YAEhE,IAAI,WAAW,IAAI,IAAI;gBACnB,IAAI,CAAC,cAAc,GAAG,IAAI,WAAW,EAAE,CAAC;;gBAExC,IAAI,CAAC,cAAc,GAAG,IAAI,wBAAwB,EAAE,CAAC;SAC5D;;YACG,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;IAC5C,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,IAAI,CAAC;IACrB,CAAC;IAEM,KAAK,CAAC,IAAI;QACb,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACpC,IAAI,IAAI,CAAC,YAAY;gBACjB,OAAO;YAEX,IAAI,IAAI,CAAC,qBAAqB;gBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC;YAE5D,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC7B,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,EAChC,OAAO,EAAE,MAAM,EAAE,SAAS,KACqD,EAAE;QACjF,IAAI,CAAC,IAAI,CAAC,WAAW;YACjB,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAEtB,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,IAAI,EAAE;YAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,MAAM,EAAE;gBACtD,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,WAAW,EAAE,IAAI,CAAC,YAAY;gBAC9B,cAAc,EAAE,IAAI,CAAC,eAAe;gBACpC,oBAAoB,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;oBACxC,CAAC,CAAC,CACE,IAAI,CAAC,IAAI,CAAC,UAAU;wBAChB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE;wBAC1B,CAAC,CAAC,IAAI,CACb;oBACD,CAAC,CAAC,IAAI,CAAC,qBAAqB;aACnC,CAAC,CAAC;YACH,IAAI,CAAC,YAAY,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;YAC5B,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC;YAElC,MAAM,EAAC,IAAI,EAAE,UAAU,EAAE,gBAAgB,EAAC,GACtC,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,EAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAC,CAAC,CAAC;YACvF,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC;YAClC,IAAI,CAAC,qBAAqB,GAAG,gBAAgB,CAAC;YAE9C,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,MAAmB,EAAE,EAC3C,OAAO,EAAE,MAAM,EAAE,SAAS,KACqD,EAAE;QACjF,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,MAAM,iBAAiB,GAAG,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,kBAAkB,GAAY,EAAE,CAAC;QACvC,MAAM,GAAG,GAAY,EAAE,CAAC;QAExB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;YAClD,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,IAAI,UAAU,EAAE,CAAC;YAE3B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,EAAC,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,EAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;YAExH,IAAI,YAAY;gBACZ,OAAO;oBACH,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC7C,UAAU;oBACV,gBAAgB;iBACnB,CAAC;YAEN,gEAAgE;YAChE,IAAI,QAAQ,KAAK,oBAAoB,IAAI,cAAc,EAAE;gBACrD,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/B,SAAS;aACZ;YAED,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC/B,GAAG,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;gBAChC,OAAO,EAAE,CAAC,kBAAkB,CAAC,CAAC;gBAC9B,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC;YAED,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChB,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAEnB,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,IAAI,SAAS;gBAC7D,MAAM;SACb;QAED,OAAO;YACH,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7C,UAAU,EAAE,IAAI;YAChB,gBAAgB,EAAE,IAAI;SACzB,CAAC;IACN,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,iBAA2B;QAClE,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,IAAI,cAAc,GAAG,KAAK,CAAC;QAE3B,KAAK,IAAI,eAAe,GAAG,CAAC,EAAE,eAAe,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,EAAE;YACnF,MAAM,UAAU,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;YAEhD,IAAI,yBAAyB,GAAG,KAAK,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClG,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,EAAE;oBAChE,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC;oBACrC,yBAAyB,GAAG,IAAI,CAAC;iBACpC;qBAAM;oBACH,iBAAiB,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;oBACvC,yBAAyB,GAAG,KAAK,CAAC;oBAClC,MAAM;iBACT;aACJ;YAED,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE;gBAC1D,OAAO;oBACH,YAAY,EAAE,IAAI;oBAClB,UAAU;oBACV,gBAAgB,EAAE,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,MAAM;wBACnD,CAAC,CAAC,IAAI;wBACN,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;iBAC1C,CAAC;aACL;YAED,cAAc,KAAK,yBAAyB,CAAC;SAChD;QAED,OAAO,EAAC,cAAc,EAAC,CAAC;IAC5B,CAAC;CACJ"}
|
|
@@ -1,12 +1,41 @@
|
|
|
1
|
+
import { Token } from "../types.js";
|
|
1
2
|
import { LlamaModel } from "./LlamaModel.js";
|
|
3
|
+
import { LlamaGrammar } from "./LlamaGrammar.js";
|
|
2
4
|
export declare class LlamaContext {
|
|
3
5
|
private readonly _ctx;
|
|
4
6
|
private _prependBos;
|
|
5
|
-
constructor({ model, prependBos }: {
|
|
7
|
+
constructor({ model, grammar, prependBos }: {
|
|
6
8
|
model: LlamaModel;
|
|
9
|
+
grammar?: LlamaGrammar;
|
|
7
10
|
prependBos?: boolean;
|
|
8
11
|
});
|
|
9
12
|
encode(text: string): Uint32Array;
|
|
10
13
|
decode(tokens: Uint32Array): string;
|
|
11
|
-
|
|
14
|
+
get prependBos(): boolean;
|
|
15
|
+
/**
|
|
16
|
+
* @returns {Token | null} The BOS (Beginning Of Sequence) token.
|
|
17
|
+
*/
|
|
18
|
+
getBosToken(): Token | null;
|
|
19
|
+
/**
|
|
20
|
+
* @returns {Token | null} The EOS (End Of Sequence) token.
|
|
21
|
+
*/
|
|
22
|
+
getEosToken(): Token | null;
|
|
23
|
+
/**
|
|
24
|
+
* @returns {Token | null} The NL (New Line) token.
|
|
25
|
+
*/
|
|
26
|
+
getNlToken(): Token | null;
|
|
27
|
+
/**
|
|
28
|
+
* @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
|
|
29
|
+
*/
|
|
30
|
+
getBosString(): string | null;
|
|
31
|
+
/**
|
|
32
|
+
* @returns {string | null} The EOS (End Of Sequence) token as a string.
|
|
33
|
+
*/
|
|
34
|
+
getEosString(): string | null;
|
|
35
|
+
/**
|
|
36
|
+
* @returns {string | null} The NL (New Line) token as a string.
|
|
37
|
+
*/
|
|
38
|
+
getNlString(): string | null;
|
|
39
|
+
getContextSize(): number;
|
|
40
|
+
evaluate(tokens: Uint32Array): AsyncGenerator<Token, void>;
|
|
12
41
|
}
|
|
@@ -1,31 +1,98 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { removeNullFields } from "../utils/removeNullFields.js";
|
|
2
|
+
import { LLAMAContext } from "./LlamaBins.js";
|
|
2
3
|
export class LlamaContext {
|
|
3
4
|
_ctx;
|
|
4
5
|
_prependBos;
|
|
5
|
-
constructor({ model, prependBos = true }) {
|
|
6
|
-
this._ctx = new LLAMAContext(model._model
|
|
6
|
+
constructor({ model, grammar, prependBos = true }) {
|
|
7
|
+
this._ctx = new LLAMAContext(model._model, removeNullFields({
|
|
8
|
+
grammar: grammar?._grammar
|
|
9
|
+
}));
|
|
7
10
|
this._prependBos = prependBos;
|
|
8
11
|
}
|
|
9
12
|
encode(text) {
|
|
13
|
+
if (text === "")
|
|
14
|
+
return new Uint32Array();
|
|
10
15
|
return this._ctx.encode(text);
|
|
11
16
|
}
|
|
12
17
|
decode(tokens) {
|
|
18
|
+
if (tokens.length === 0)
|
|
19
|
+
return "";
|
|
13
20
|
return this._ctx.decode(tokens);
|
|
14
21
|
}
|
|
15
|
-
|
|
22
|
+
get prependBos() {
|
|
23
|
+
return this._prependBos;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* @returns {Token | null} The BOS (Beginning Of Sequence) token.
|
|
27
|
+
*/
|
|
28
|
+
getBosToken() {
|
|
29
|
+
const bosToken = this._ctx.tokenBos();
|
|
30
|
+
if (bosToken === -1)
|
|
31
|
+
return null;
|
|
32
|
+
return bosToken;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* @returns {Token | null} The EOS (End Of Sequence) token.
|
|
36
|
+
*/
|
|
37
|
+
getEosToken() {
|
|
38
|
+
const eosToken = this._ctx.tokenEos();
|
|
39
|
+
if (eosToken === -1)
|
|
40
|
+
return null;
|
|
41
|
+
return eosToken;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* @returns {Token | null} The NL (New Line) token.
|
|
45
|
+
*/
|
|
46
|
+
getNlToken() {
|
|
47
|
+
const nlToken = this._ctx.tokenNl();
|
|
48
|
+
if (nlToken === -1)
|
|
49
|
+
return null;
|
|
50
|
+
return nlToken;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
|
|
54
|
+
*/
|
|
55
|
+
getBosString() {
|
|
56
|
+
const bosToken = this.getBosToken();
|
|
57
|
+
if (bosToken == null)
|
|
58
|
+
return null;
|
|
59
|
+
return this._ctx.getTokenString(bosToken);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* @returns {string | null} The EOS (End Of Sequence) token as a string.
|
|
63
|
+
*/
|
|
64
|
+
getEosString() {
|
|
65
|
+
const eosToken = this.getEosToken();
|
|
66
|
+
if (eosToken == null)
|
|
67
|
+
return null;
|
|
68
|
+
return this._ctx.getTokenString(eosToken);
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* @returns {string | null} The NL (New Line) token as a string.
|
|
72
|
+
*/
|
|
73
|
+
getNlString() {
|
|
74
|
+
const nlToken = this.getNlToken();
|
|
75
|
+
if (nlToken == null)
|
|
76
|
+
return null;
|
|
77
|
+
return this._ctx.getTokenString(nlToken);
|
|
78
|
+
}
|
|
79
|
+
getContextSize() {
|
|
80
|
+
return this._ctx.getContextSize();
|
|
81
|
+
}
|
|
82
|
+
async *evaluate(tokens) {
|
|
16
83
|
let evalTokens = tokens;
|
|
17
84
|
if (this._prependBos) {
|
|
18
85
|
const tokenArray = Array.from(tokens);
|
|
19
|
-
tokenArray.unshift(
|
|
86
|
+
tokenArray.unshift(this._ctx.tokenBos());
|
|
20
87
|
evalTokens = Uint32Array.from(tokenArray);
|
|
21
88
|
this._prependBos = false;
|
|
22
89
|
}
|
|
23
90
|
// eslint-disable-next-line no-constant-condition
|
|
24
91
|
while (true) {
|
|
25
92
|
// Evaluate to get the next token.
|
|
26
|
-
const nextToken = await this._ctx.eval(evalTokens
|
|
93
|
+
const nextToken = await this._ctx.eval(evalTokens);
|
|
27
94
|
// the assistant finished answering
|
|
28
|
-
if (nextToken ===
|
|
95
|
+
if (nextToken === this._ctx.tokenEos())
|
|
29
96
|
break;
|
|
30
97
|
yield nextToken;
|
|
31
98
|
// Create tokens for the next eval.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,
|
|
1
|
+
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAI5C,MAAM,OAAO,YAAY;IACJ,IAAI,CAAe;IAC5B,WAAW,CAAU;IAE7B,YAAmB,EAAC,KAAK,EAAE,OAAO,EAAE,UAAU,GAAG,IAAI,EAAoE;QACrH,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,QAAQ;SAC7B,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAAmB;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAEM,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB;QACtC,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,MAAM,UAAU,GAAY,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC/C,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEzC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;SAC5B;QAED,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAE1D,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CAEJ"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare class LlamaGrammar {
|
|
2
|
+
/**
|
|
3
|
+
* GBNF files are supported.
|
|
4
|
+
* More info here: https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md
|
|
5
|
+
* @param {object} options
|
|
6
|
+
* @param {string} options.grammar - GBNF grammar
|
|
7
|
+
* @param {boolean} [options.printGrammar] - print the grammar to stdout
|
|
8
|
+
*/
|
|
9
|
+
constructor({ grammar, printGrammar }: {
|
|
10
|
+
grammar: string;
|
|
11
|
+
printGrammar?: boolean;
|
|
12
|
+
});
|
|
13
|
+
static getFor(type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs-extra";
|
|
3
|
+
import { getGrammarsFolder } from "../utils/getGrammarsFolder.js";
|
|
4
|
+
import { LLAMAGrammar } from "./LlamaBins.js";
|
|
5
|
+
export class LlamaGrammar {
|
|
6
|
+
/** @internal */
|
|
7
|
+
_grammar;
|
|
8
|
+
/**
|
|
9
|
+
* GBNF files are supported.
|
|
10
|
+
* More info here: https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md
|
|
11
|
+
* @param {object} options
|
|
12
|
+
* @param {string} options.grammar - GBNF grammar
|
|
13
|
+
* @param {boolean} [options.printGrammar] - print the grammar to stdout
|
|
14
|
+
*/
|
|
15
|
+
constructor({ grammar, printGrammar = false }) {
|
|
16
|
+
this._grammar = new LLAMAGrammar(grammar, {
|
|
17
|
+
printGrammar
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
static async getFor(type) {
|
|
21
|
+
const grammarsFolder = await getGrammarsFolder();
|
|
22
|
+
const grammarFile = path.join(grammarsFolder, type + ".gbnf");
|
|
23
|
+
if (await fs.exists(grammarFile)) {
|
|
24
|
+
const grammar = await fs.readFile(grammarFile, "utf8");
|
|
25
|
+
return new LlamaGrammar({ grammar });
|
|
26
|
+
}
|
|
27
|
+
throw new Error(`Grammar file for type "${type}" was not found in "${grammarsFolder}"`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=LlamaGrammar.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAG5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IAEvC;;;;;;OAMG;IACH,YAAmB,EAAC,OAAO,EAAE,YAAY,GAAG,KAAK,EAA4C;QACzF,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;IACP,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;YAC9B,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC,EAAC,OAAO,EAAC,CAAC,CAAC;SACtC;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
|
|
@@ -9,6 +9,26 @@ export declare class LlamaModel {
|
|
|
9
9
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
10
10
|
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
11
11
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
12
|
+
* @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
|
|
13
|
+
* It affects the probability distribution of the model's output tokens.
|
|
14
|
+
* A higher temperature (e.g., 1.5) makes the output more random and creative,
|
|
15
|
+
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
|
|
16
|
+
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
|
|
17
|
+
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
|
|
18
|
+
*
|
|
19
|
+
* Set to `0` to disable.
|
|
20
|
+
* @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
|
|
21
|
+
* sequence generation.
|
|
22
|
+
* An integer number between `1` and the size of the vocabulary.
|
|
23
|
+
* Set to `0` to disable (which uses the full vocabulary).
|
|
24
|
+
*
|
|
25
|
+
* Only relevant when `temperature` is set to a value greater than 0.
|
|
26
|
+
* @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
|
|
27
|
+
* and samples the next token only from this set.
|
|
28
|
+
* A float number between `0` and `1`.
|
|
29
|
+
* Set to `1` to disable.
|
|
30
|
+
*
|
|
31
|
+
* Only relevant when `temperature` is set to a value greater than `0`.
|
|
12
32
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
13
33
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
14
34
|
* @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
|
|
@@ -16,7 +36,7 @@ export declare class LlamaModel {
|
|
|
16
36
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
17
37
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
18
38
|
*/
|
|
19
|
-
constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
|
|
39
|
+
constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, temperature, topK, topP, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
|
|
20
40
|
/** path to the model on the filesystem */
|
|
21
41
|
modelPath: string;
|
|
22
42
|
/** If null, a random seed will be used */
|
|
@@ -29,6 +49,34 @@ export declare class LlamaModel {
|
|
|
29
49
|
gpuLayers?: number;
|
|
30
50
|
/** if true, reduce VRAM usage at the cost of performance */
|
|
31
51
|
lowVram?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Temperature is a hyperparameter that controls the randomness of the generated text.
|
|
54
|
+
* It affects the probability distribution of the model's output tokens.
|
|
55
|
+
* A higher temperature (e.g., 1.5) makes the output more random and creative,
|
|
56
|
+
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
|
|
57
|
+
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
|
|
58
|
+
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
|
|
59
|
+
*
|
|
60
|
+
* Set to `0` to disable.
|
|
61
|
+
*/
|
|
62
|
+
temperature?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
|
|
65
|
+
* An integer number between `1` and the size of the vocabulary.
|
|
66
|
+
* Set to `0` to disable (which uses the full vocabulary).
|
|
67
|
+
*
|
|
68
|
+
* Only relevant when `temperature` is set to a value greater than 0.
|
|
69
|
+
* */
|
|
70
|
+
topK?: number;
|
|
71
|
+
/**
|
|
72
|
+
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
|
|
73
|
+
* and samples the next token only from this set.
|
|
74
|
+
* A float number between `0` and `1`.
|
|
75
|
+
* Set to `1` to disable.
|
|
76
|
+
*
|
|
77
|
+
* Only relevant when `temperature` is set to a value greater than `0`.
|
|
78
|
+
* */
|
|
79
|
+
topP?: number;
|
|
32
80
|
/** use fp16 for KV cache */
|
|
33
81
|
f16Kv?: boolean;
|
|
34
82
|
/** the llama_eval() call computes all logits, not just the last one */
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { removeNullFields } from "../utils/removeNullFields.js";
|
|
1
2
|
import { llamaCppNode, LLAMAModel } from "./LlamaBins.js";
|
|
2
3
|
export class LlamaModel {
|
|
3
4
|
/** @internal */
|
|
@@ -12,6 +13,26 @@ export class LlamaModel {
|
|
|
12
13
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
13
14
|
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
14
15
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
16
|
+
* @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
|
|
17
|
+
* It affects the probability distribution of the model's output tokens.
|
|
18
|
+
* A higher temperature (e.g., 1.5) makes the output more random and creative,
|
|
19
|
+
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
|
|
20
|
+
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
|
|
21
|
+
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
|
|
22
|
+
*
|
|
23
|
+
* Set to `0` to disable.
|
|
24
|
+
* @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
|
|
25
|
+
* sequence generation.
|
|
26
|
+
* An integer number between `1` and the size of the vocabulary.
|
|
27
|
+
* Set to `0` to disable (which uses the full vocabulary).
|
|
28
|
+
*
|
|
29
|
+
* Only relevant when `temperature` is set to a value greater than 0.
|
|
30
|
+
* @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
|
|
31
|
+
* and samples the next token only from this set.
|
|
32
|
+
* A float number between `0` and `1`.
|
|
33
|
+
* Set to `1` to disable.
|
|
34
|
+
*
|
|
35
|
+
* Only relevant when `temperature` is set to a value greater than `0`.
|
|
15
36
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
16
37
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
17
38
|
* @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
|
|
@@ -19,13 +40,16 @@ export class LlamaModel {
|
|
|
19
40
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
20
41
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
21
42
|
*/
|
|
22
|
-
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
|
|
43
|
+
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
|
|
23
44
|
this._model = new LLAMAModel(modelPath, removeNullFields({
|
|
24
45
|
seed: seed != null ? Math.max(-1, seed) : undefined,
|
|
25
46
|
contextSize,
|
|
26
47
|
batchSize,
|
|
27
48
|
gpuLayers,
|
|
28
49
|
lowVram,
|
|
50
|
+
temperature,
|
|
51
|
+
topK,
|
|
52
|
+
topP,
|
|
29
53
|
f16Kv,
|
|
30
54
|
logitsAll,
|
|
31
55
|
vocabOnly,
|
|
@@ -38,12 +62,4 @@ export class LlamaModel {
|
|
|
38
62
|
return llamaCppNode.systemInfo();
|
|
39
63
|
}
|
|
40
64
|
}
|
|
41
|
-
function removeNullFields(obj) {
|
|
42
|
-
const newObj = Object.assign({}, obj);
|
|
43
|
-
for (const key in obj) {
|
|
44
|
-
if (newObj[key] == null)
|
|
45
|
-
delete newObj[key];
|
|
46
|
-
}
|
|
47
|
-
return newObj;
|
|
48
|
-
}
|
|
49
65
|
//# sourceMappingURL=LlamaModel.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC
|
|
1
|
+
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoCG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EACpE,OAAO,EAAE,WAAW,GAAG,CAAC,EAAE,IAAI,GAAG,EAAE,EAAE,IAAI,GAAG,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAoE9G;QACG,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC;YACrD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,OAAO;YACP,WAAW;YACX,IAAI;YACJ,IAAI;YACJ,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;YACP,QAAQ;YACR,SAAS;SACZ,CAAC,CAAC,CAAC;IACR,CAAC;IAEM,MAAM,KAAK,UAAU;QACxB,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC;IACrC,CAAC;CACJ"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export type Token = number;
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
import { binariesGithubReleasePath } from "../config.js";
|
|
3
|
+
export async function getBinariesGithubRelease() {
|
|
4
|
+
const binariesGithubRelease = await fs.readJson(binariesGithubReleasePath);
|
|
5
|
+
return binariesGithubRelease.release;
|
|
6
|
+
}
|
|
7
|
+
export async function setBinariesGithubRelease(release) {
|
|
8
|
+
const binariesGithubReleaseJson = {
|
|
9
|
+
release: release
|
|
10
|
+
};
|
|
11
|
+
await fs.writeJson(binariesGithubReleasePath, binariesGithubReleaseJson, {
|
|
12
|
+
spaces: 4
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=binariesGithubRelease.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"binariesGithubRelease.js","sourceRoot":"","sources":["../../src/utils/binariesGithubRelease.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,yBAAyB,EAAC,MAAM,cAAc,CAAC;AAMvD,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC1C,MAAM,qBAAqB,GAA8B,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC,CAAC;IAEtG,OAAO,qBAAqB,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,OAA6C;IACxF,MAAM,yBAAyB,GAA8B;QACzD,OAAO,EAAE,OAAO;KACnB,CAAC;IAEF,MAAM,EAAE,CAAC,SAAS,CAAC,yBAAyB,EAAE,yBAAyB,EAAE;QACrE,MAAM,EAAE,CAAC;KACZ,CAAC,CAAC;AACP,CAAC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
export declare function compileLlamaCpp({ arch, nodeTarget, setUsedBingFlag }: {
|
|
1
|
+
export declare function compileLlamaCpp({ arch, nodeTarget, setUsedBingFlag, metal, cuda }: {
|
|
2
2
|
arch?: string;
|
|
3
3
|
nodeTarget?: string;
|
|
4
4
|
setUsedBingFlag?: boolean;
|
|
5
|
+
metal?: boolean;
|
|
6
|
+
cuda?: boolean;
|
|
5
7
|
}): Promise<void>;
|
|
6
8
|
export declare function getCompiledLlamaCppBinaryPath(): Promise<string | null>;
|