node-llama-cpp 2.5.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -301
- package/dist/chatWrappers/{ChatMLPromptWrapper.d.ts → ChatMLChatPromptWrapper.d.ts} +1 -1
- package/dist/chatWrappers/{ChatMLPromptWrapper.js → ChatMLChatPromptWrapper.js} +2 -2
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +1 -0
- package/dist/chatWrappers/createChatWrapperByBos.js +2 -2
- package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -1
- package/dist/cli/commands/BuildCommand.js +3 -1
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +8 -1
- package/dist/cli/commands/ChatCommand.js +88 -21
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +2 -2
- package/dist/cli/commands/DownloadCommand.js +13 -38
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/config.d.ts +5 -0
- package/dist/config.js +7 -0
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +5 -4
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
- package/dist/llamaEvaluator/LlamaBins.js +2 -2
- package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
- package/dist/llamaEvaluator/LlamaChatSession.d.ts +79 -2
- package/dist/llamaEvaluator/LlamaChatSession.js +52 -8
- package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +60 -3
- package/dist/llamaEvaluator/LlamaContext.js +36 -4
- package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +16 -3
- package/dist/llamaEvaluator/LlamaGrammar.js +23 -4
- package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +14 -0
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +16 -0
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/llamaEvaluator/LlamaModel.d.ts +46 -14
- package/dist/llamaEvaluator/LlamaModel.js +23 -16
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +8 -0
- package/dist/state.js.map +1 -0
- package/dist/utils/cloneLlamaCppRepo.d.ts +1 -0
- package/dist/utils/cloneLlamaCppRepo.js +62 -0
- package/dist/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/utils/compileLLamaCpp.js +24 -6
- package/dist/utils/compileLLamaCpp.js.map +1 -1
- package/dist/utils/getBin.d.ts +21 -13
- package/dist/utils/gitReleaseBundles.d.ts +2 -0
- package/dist/utils/gitReleaseBundles.js +25 -0
- package/dist/utils/gitReleaseBundles.js.map +1 -0
- package/llama/addon.cpp +184 -110
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/toolchains/darwin.host-x64.target-arm64.cmake +8 -0
- package/llama/toolchains/linux.host-arm64.target-x64.cmake +5 -0
- package/llama/toolchains/linux.host-x64.target-arm64.cmake +5 -0
- package/llama/toolchains/linux.host-x64.target-arm71.cmake +5 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/ggml-metal.metal +246 -79
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +246 -79
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/package.json +10 -4
- package/dist/chatWrappers/ChatMLPromptWrapper.js.map +0 -1
- package/llamaBins/linux-ppc64le/llama-addon.node +0 -0
|
@@ -2,14 +2,47 @@
|
|
|
2
2
|
import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
|
|
3
3
|
import { ConversationInteraction, Token } from "../types.js";
|
|
4
4
|
import { LlamaContext } from "./LlamaContext.js";
|
|
5
|
+
import { LlamaGrammar } from "./LlamaGrammar.js";
|
|
5
6
|
export type LlamaChatSessionOptions = {
|
|
6
7
|
context: LlamaContext;
|
|
7
8
|
printLLamaSystemInfo?: boolean;
|
|
9
|
+
/** GeneralChatPromptWrapper is ued by default */
|
|
8
10
|
promptWrapper?: ChatPromptWrapper | "auto";
|
|
9
11
|
systemPrompt?: string;
|
|
10
12
|
/** Conversation history to load into the context to continue an existing conversation */
|
|
11
13
|
conversationHistory?: readonly ConversationInteraction[];
|
|
12
14
|
};
|
|
15
|
+
export type LlamaChatSessionRepeatPenalty = {
|
|
16
|
+
/**
|
|
17
|
+
* Number of recent tokens generated by the model to apply penalties to repetition of.
|
|
18
|
+
* Defaults to `64`.
|
|
19
|
+
*/
|
|
20
|
+
lastTokens?: number;
|
|
21
|
+
punishTokensFilter?: (tokens: Token[]) => Token[];
|
|
22
|
+
/**
|
|
23
|
+
* Penalize new line tokens.
|
|
24
|
+
* Enabled by default.
|
|
25
|
+
*/
|
|
26
|
+
penalizeNewLine?: boolean;
|
|
27
|
+
/**
|
|
28
|
+
* The relative amount to lower the probability of the tokens in `punishTokens` by
|
|
29
|
+
* Defaults to `1.1`.
|
|
30
|
+
* Set to `1` to disable.
|
|
31
|
+
*/
|
|
32
|
+
penalty?: number;
|
|
33
|
+
/**
|
|
34
|
+
* For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
|
|
35
|
+
* Disabled by default (`0`).
|
|
36
|
+
* Set to a value between `0` and `1` to enable.
|
|
37
|
+
*/
|
|
38
|
+
frequencyPenalty?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
|
|
41
|
+
* Disabled by default (`0`).
|
|
42
|
+
* Set to a value between `0` and `1` to enable.
|
|
43
|
+
*/
|
|
44
|
+
presencePenalty?: number;
|
|
45
|
+
};
|
|
13
46
|
export declare class LlamaChatSession {
|
|
14
47
|
private readonly _systemPrompt;
|
|
15
48
|
private readonly _printLLamaSystemInfo;
|
|
@@ -20,14 +53,58 @@ export declare class LlamaChatSession {
|
|
|
20
53
|
private _lastStopStringSuffix;
|
|
21
54
|
private _conversationHistoryToLoad;
|
|
22
55
|
private readonly _ctx;
|
|
56
|
+
/**
|
|
57
|
+
* @param {LlamaChatSessionOptions} options
|
|
58
|
+
*/
|
|
23
59
|
constructor({ context, printLLamaSystemInfo, promptWrapper, systemPrompt, conversationHistory }: LlamaChatSessionOptions);
|
|
24
60
|
get initialized(): boolean;
|
|
25
61
|
get context(): LlamaContext;
|
|
26
62
|
init(): Promise<void>;
|
|
27
|
-
|
|
28
|
-
|
|
63
|
+
/**
|
|
64
|
+
* @param {string} prompt
|
|
65
|
+
* @param {object} options
|
|
66
|
+
* @returns {Promise<string>}
|
|
67
|
+
*/
|
|
68
|
+
prompt(prompt: string, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix, repeatPenalty }?: {
|
|
69
|
+
onToken?: (tokens: Token[]) => void;
|
|
29
70
|
signal?: AbortSignal;
|
|
30
71
|
maxTokens?: number;
|
|
72
|
+
/**
|
|
73
|
+
* Temperature is a hyperparameter that controls the randomness of the generated text.
|
|
74
|
+
* It affects the probability distribution of the model's output tokens.
|
|
75
|
+
* A higher temperature (e.g., 1.5) makes the output more random and creative,
|
|
76
|
+
* while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
|
|
77
|
+
* The suggested temperature is 0.8, which provides a balance between randomness and determinism.
|
|
78
|
+
* At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
|
|
79
|
+
*
|
|
80
|
+
* Set to `0` to disable.
|
|
81
|
+
* Disabled by default (set to `0`).
|
|
82
|
+
*/
|
|
83
|
+
temperature?: number;
|
|
84
|
+
/**
|
|
85
|
+
* Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
|
|
86
|
+
* An integer number between `1` and the size of the vocabulary.
|
|
87
|
+
* Set to `0` to disable (which uses the full vocabulary).
|
|
88
|
+
*
|
|
89
|
+
* Only relevant when `temperature` is set to a value greater than 0.
|
|
90
|
+
*/
|
|
91
|
+
topK?: number;
|
|
92
|
+
/**
|
|
93
|
+
* Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
|
|
94
|
+
* and samples the next token only from this set.
|
|
95
|
+
* A float number between `0` and `1`.
|
|
96
|
+
* Set to `1` to disable.
|
|
97
|
+
*
|
|
98
|
+
* Only relevant when `temperature` is set to a value greater than `0`.
|
|
99
|
+
*/
|
|
100
|
+
topP?: number;
|
|
101
|
+
grammar?: LlamaGrammar;
|
|
102
|
+
/**
|
|
103
|
+
* Trim whitespace from the end of the generated text
|
|
104
|
+
* Disabled by default.
|
|
105
|
+
*/
|
|
106
|
+
trimWhitespaceSuffix?: boolean;
|
|
107
|
+
repeatPenalty?: false | LlamaChatSessionRepeatPenalty;
|
|
31
108
|
}): Promise<string>;
|
|
32
109
|
private _evalTokens;
|
|
33
110
|
private _checkStopString;
|
|
@@ -4,7 +4,9 @@ import { AbortError } from "../AbortError.js";
|
|
|
4
4
|
import { GeneralChatPromptWrapper } from "../chatWrappers/GeneralChatPromptWrapper.js";
|
|
5
5
|
import { getChatWrapperByBos } from "../chatWrappers/createChatWrapperByBos.js";
|
|
6
6
|
import { generateContextTextFromConversationHistory } from "../chatWrappers/generateContextTextFromConversationHistory.js";
|
|
7
|
+
import { removeNullFields } from "../utils/removeNullFields.js";
|
|
7
8
|
import { LlamaModel } from "./LlamaModel.js";
|
|
9
|
+
import { LlamaGrammarEvaluationState } from "./LlamaGrammarEvaluationState.js";
|
|
8
10
|
const UNKNOWN_UNICODE_CHAR = "\ufffd";
|
|
9
11
|
export class LlamaChatSession {
|
|
10
12
|
_systemPrompt;
|
|
@@ -16,6 +18,9 @@ export class LlamaChatSession {
|
|
|
16
18
|
_lastStopStringSuffix = null;
|
|
17
19
|
_conversationHistoryToLoad = null;
|
|
18
20
|
_ctx;
|
|
21
|
+
/**
|
|
22
|
+
* @param {LlamaChatSessionOptions} options
|
|
23
|
+
*/
|
|
19
24
|
constructor({ context, printLLamaSystemInfo = false, promptWrapper = new GeneralChatPromptWrapper(), systemPrompt = defaultChatSystemPrompt, conversationHistory }) {
|
|
20
25
|
this._ctx = context;
|
|
21
26
|
this._printLLamaSystemInfo = printLLamaSystemInfo;
|
|
@@ -48,7 +53,12 @@ export class LlamaChatSession {
|
|
|
48
53
|
this._initialized = true;
|
|
49
54
|
});
|
|
50
55
|
}
|
|
51
|
-
|
|
56
|
+
/**
|
|
57
|
+
* @param {string} prompt
|
|
58
|
+
* @param {object} options
|
|
59
|
+
* @returns {Promise<string>}
|
|
60
|
+
*/
|
|
61
|
+
async prompt(prompt, { onToken, signal, maxTokens, temperature, topK, topP, grammar = this.context._chatGrammar, trimWhitespaceSuffix = false, repeatPenalty } = {}) {
|
|
52
62
|
if (!this.initialized)
|
|
53
63
|
await this.init();
|
|
54
64
|
return await withLock(this, "prompt", async () => {
|
|
@@ -83,18 +93,47 @@ export class LlamaChatSession {
|
|
|
83
93
|
this._promptIndex++;
|
|
84
94
|
this._lastStopString = null;
|
|
85
95
|
this._lastStopStringSuffix = null;
|
|
86
|
-
const { text, stopString, stopStringSuffix } = await this._evalTokens(this._ctx.encode(promptText), {
|
|
96
|
+
const { text, stopString, stopStringSuffix } = await this._evalTokens(this._ctx.encode(promptText), {
|
|
97
|
+
onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix,
|
|
98
|
+
repeatPenalty: repeatPenalty == false ? { lastTokens: 0 } : repeatPenalty
|
|
99
|
+
});
|
|
87
100
|
this._lastStopString = stopString;
|
|
88
101
|
this._lastStopStringSuffix = stopStringSuffix;
|
|
89
102
|
return text;
|
|
90
103
|
});
|
|
91
104
|
}
|
|
92
|
-
async _evalTokens(tokens, { onToken, signal, maxTokens } = {}) {
|
|
93
|
-
|
|
105
|
+
async _evalTokens(tokens, { onToken, signal, maxTokens, temperature, topK, topP, grammar = this.context._chatGrammar, trimWhitespaceSuffix = false, repeatPenalty: { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = {} } = {}) {
|
|
106
|
+
let stopStrings = this._promptWrapper.getStopStrings();
|
|
107
|
+
if (grammar != null)
|
|
108
|
+
stopStrings = stopStrings.concat(grammar.stopStrings);
|
|
94
109
|
const stopStringIndexes = Array(stopStrings.length).fill(0);
|
|
95
110
|
const skippedChunksQueue = [];
|
|
96
111
|
const res = [];
|
|
97
|
-
|
|
112
|
+
const grammarEvaluationState = grammar != null
|
|
113
|
+
? new LlamaGrammarEvaluationState({ grammar })
|
|
114
|
+
: undefined;
|
|
115
|
+
const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
|
|
116
|
+
const getPenaltyTokens = () => {
|
|
117
|
+
let punishTokens = res.slice(-repeatPenaltyLastTokens);
|
|
118
|
+
if (punishTokensFilter != null)
|
|
119
|
+
punishTokens = punishTokensFilter(punishTokens);
|
|
120
|
+
if (!penalizeNewLine) {
|
|
121
|
+
const nlToken = this.context.getNlToken();
|
|
122
|
+
if (nlToken != null)
|
|
123
|
+
punishTokens = punishTokens.filter(token => token !== nlToken);
|
|
124
|
+
}
|
|
125
|
+
return Uint32Array.from(punishTokens);
|
|
126
|
+
};
|
|
127
|
+
const evaluationIterator = this._ctx.evaluate(tokens, removeNullFields({
|
|
128
|
+
temperature, topK, topP, grammarEvaluationState,
|
|
129
|
+
repeatPenalty: !repeatPenaltyEnabled ? undefined : {
|
|
130
|
+
punishTokens: getPenaltyTokens,
|
|
131
|
+
penalty,
|
|
132
|
+
frequencyPenalty,
|
|
133
|
+
presencePenalty
|
|
134
|
+
}
|
|
135
|
+
}));
|
|
136
|
+
for await (const chunk of evaluationIterator) {
|
|
98
137
|
if (signal?.aborted)
|
|
99
138
|
throw new AbortError();
|
|
100
139
|
const tokenStr = this._ctx.decode(Uint32Array.from([chunk]));
|
|
@@ -104,7 +143,9 @@ export class LlamaChatSession {
|
|
|
104
143
|
const skippedChunksText = skippedChunksQueue.length > 0
|
|
105
144
|
? this._ctx.decode(Uint32Array.from(skippedChunksQueue))
|
|
106
145
|
: "";
|
|
107
|
-
|
|
146
|
+
let [queuedTextBeforeStopString] = skippedChunksText.split(stopString);
|
|
147
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
|
|
148
|
+
queuedTextBeforeStopString = queuedTextBeforeStopString.trimEnd();
|
|
108
149
|
if (queuedTextBeforeStopString.length > 0) {
|
|
109
150
|
const beforeStopStringTokens = Array.from(this._ctx.encode(queuedTextBeforeStopString));
|
|
110
151
|
res.push(...beforeStopStringTokens);
|
|
@@ -118,7 +159,7 @@ export class LlamaChatSession {
|
|
|
118
159
|
};
|
|
119
160
|
}
|
|
120
161
|
// if the token is unknown, it means it's not complete character
|
|
121
|
-
if (tokenStr === UNKNOWN_UNICODE_CHAR || skipTokenEvent) {
|
|
162
|
+
if (tokenStr === UNKNOWN_UNICODE_CHAR || skipTokenEvent || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && tokenStr.trim() === "")) {
|
|
122
163
|
skippedChunksQueue.push(chunk);
|
|
123
164
|
continue;
|
|
124
165
|
}
|
|
@@ -132,8 +173,11 @@ export class LlamaChatSession {
|
|
|
132
173
|
if (maxTokens != null && maxTokens > 0 && res.length >= maxTokens)
|
|
133
174
|
break;
|
|
134
175
|
}
|
|
176
|
+
let resText = this._ctx.decode(Uint32Array.from(res));
|
|
177
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
|
|
178
|
+
resText = resText.trimEnd();
|
|
135
179
|
return {
|
|
136
|
-
text:
|
|
180
|
+
text: resText,
|
|
137
181
|
stopString: null,
|
|
138
182
|
stopStringSuffix: null
|
|
139
183
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,mBAAmB,EAAC,MAAM,2CAA2C,CAAC;AAE9E,OAAO,EAAC,0CAA0C,EAAC,MAAM,+DAA+D,CAAC;AACzH,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,MAAM,oBAAoB,GAAG,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,mBAAmB,EAAC,MAAM,2CAA2C,CAAC;AAE9E,OAAO,EAAC,0CAA0C,EAAC,MAAM,+DAA+D,CAAC;AACzH,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,OAAO,EAAC,2BAA2B,EAAC,MAAM,kCAAkC,CAAC;AAE7E,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAqDtC,MAAM,OAAO,gBAAgB;IACR,aAAa,CAAS;IACtB,qBAAqB,CAAU;IAC/B,cAAc,CAAoB;IAC3C,YAAY,GAAW,CAAC,CAAC;IACzB,YAAY,GAAY,KAAK,CAAC;IAC9B,eAAe,GAAkB,IAAI,CAAC;IACtC,qBAAqB,GAAkB,IAAI,CAAC;IAC5C,0BAA0B,GAA8C,IAAI,CAAC;IACpE,IAAI,CAAe;IAEpC;;OAEG;IACH,YAAmB,EACf,OAAO,EACP,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,IAAI,wBAAwB,EAAE,EAC9C,YAAY,GAAG,uBAAuB,EACtC,mBAAmB,EACG;QACtB,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;QACpB,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,IAAI,CAAC,0BAA0B,GAAG,CAAC,mBAAmB,IAAI,IAAI,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7F,CAAC,CAAC,mBAAmB;YACrB,CAAC,CAAC,IAAI,CAAC;QAEX,IAAI,aAAa,KAAK,MAAM,EAAE;YAC1B,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;YAEhE,IAAI,WAAW,IAAI,IAAI;gBACnB,IAAI,CAAC,cAAc,GAAG,IAAI,WAAW,EAAE,CAAC;;gBAExC,IAAI,CAAC,cAAc,GAAG,IAAI,wBAAwB,EAAE,CAAC;SAC5D;;YACG,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;IAC5C,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,IAAI,CAAC;IACrB,CAAC;IAEM,KAAK,CAAC,IAAI;QACb,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACpC,IAAI,IAAI,CAAC,YAAY;gBACjB,OAAO;YAEX,IAAI,IAAI,CAAC,qBAAqB;gBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC;YAE5D,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC7B,CAAC,CAAC,CAAC;IACP,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,EAChC,OAAO,EACP,MAAM,EACN,SAAS,EACT,WAAW,EACX,IAAI,EACJ,IAAI,EACJ,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EACnC,oBAAoB,GAAG,KAAK,EAC5B,aAAa,KA+Cb,EAAE;QACF,IAAI,CAAC,IAAI,CAAC,WAAW;YACjB,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAEtB,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,IAAI,EAAE;YAC7C,IAAI,UAAU,GAAG,EAAE,CAAC;YAEpB,IAAI,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,IAAI,CAAC,0BAA0B,IAAI,IAAI,EAAE;gBACnE,MAAM,EAAC,IAAI,EAAE,UAAU,EAAE,gBAAgB,EAAC,GACtC,0CAA0C,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,0BAA0B,EAAE;oBAC7F,YAAY,EAAE,IAAI,CAAC,aAAa;oBAChC,kBAAkB,EAAE,IAAI,CAAC,YAAY;oBACrC,cAAc,EAAE,IAAI,CAAC,eAAe;oBACpC,oBAAoB,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;wBACxC,CAAC,CAAC,CACE,IAAI,CAAC,IAAI,CAAC,UAAU;4BAChB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE;4BAC1B,CAAC,CAAC,IAAI,CACb;wBACD,CAAC,CAAC,IAAI,CAAC,qBAAqB;iBACnC,CAAC,CAAC;gBAEP,UAAU,IAAI,IAAI,CAAC;gBACnB,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC;gBAClC,IAAI,CAAC,qBAAqB,GAAG,gBAAgB,CAAC;gBAC9C,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,0BAA0B,CAAC,MAAM,CAAC;gBAE5D,IAAI,CAAC,0BAA0B,GAAG,IAAI,CAAC;aAC1C;YAED,UAAU,IAAI,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,MAAM,EAAE;gBACjD,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,WAAW,EAAE,IAAI,CAAC,YAAY;gBAC9B,cAAc,EAAE,IAAI,CAAC,eAAe;gBACpC,oBAAoB,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;oBACxC,CAAC,CAAC,CACE,IAAI,CAAC,IAAI,CAAC,UAAU;wBAChB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE;wBAC1B,CAAC,CAAC,IAAI,CACb;oBACD,CAAC,CAAC,IAAI,CAAC,qBAAqB;aACnC,CAAC,CAAC;YACH,IAAI,CAAC,YAAY,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;YAC5B,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC;YAElC,MAAM,EAAC,IAAI,EAAE,UAAU,EAAE,gBAAgB,EAAC,GACtC,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE;gBACjD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,oBAAoB;gBAClF,aAAa,EAAE,aAAa,IAAI,KAAK,CAAC,CAAC,CAAC,EAAC,UAAU,EAAE,CAAC,EAAC,CAAC,CAAC,CAAC,aAAa;aAC1E,CAAC,CAAC;YACP,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC;YAClC,IAAI,CAAC,qBAAqB,GAAG,gBAAgB,CAAC;YAE9C,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,MAAmB,EAAE,EAC3C,OAAO,EACP,MAAM,EACN,SAAS,EACT,WAAW,EACX,IAAI,EACJ,IAAI,EACJ,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EACnC,oBAAoB,GAAG,KAAK,EAC5B,aAAa,EAAE,EACX,UAAU,EAAE,uBAAuB,GAAG,EAAE,EACxC,kBAAkB,EAClB,eAAe,EACf,OAAO,EACP,gBAAgB,EAChB,eAAe,EAClB,GAAG,EAAE,KAWN,EAAE;QACF,IAAI,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QAEvD,IAAI,OAAO,IAAI,IAAI;YACf,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAE1D,MAAM,iBAAiB,GAAa,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtE,MAAM,kBAAkB,GAAY,EAAE,CAAC;QACvC,MAAM,GAAG,GAAY,EAAE,CAAC;QACxB,MAAM,sBAAsB,GAAG,OAAO,IAAI,IAAI;YAC1C,CAAC,CAAC,IAAI,2BAA2B,CAAC,EAAC,OAAO,EAAC,CAAC;YAC5C,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,oBAAoB,GAAG,uBAAuB,GAAG,CAAC,CAAC;QAEzD,MAAM,gBAAgB,GAAG,GAAG,EAAE;YAC1B,IAAI,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,uBAAuB,CAAC,CAAC;YAEvD,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,YAAY,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;YAEpD,IAAI,CAAC,eAAe,EAAE;gBAClB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;gBAE1C,IAAI,OAAO,IAAI,IAAI;oBACf,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,KAAK,OAAO,CAAC,CAAC;aACtE;YAED,OAAO,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC1C,CAAC,CAAC;QAEF,MAAM,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACnE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,sBAAsB;YAC/C,aAAa,EAAE,CAAC,oBAAoB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC/C,YAAY,EAAE,gBAAgB;gBAC9B,OAAO;gBACP,gBAAgB;gBAChB,eAAe;aAClB;SACJ,CAAC,CAAC,CAAC;QAEJ,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,kBAAkB,EAAE;YAC1C,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,IAAI,UAAU,EAAE,CAAC;YAE3B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,EACF,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,EAC7D,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,WAAW,EAAE,iBAAiB,CAAC,CAAC;YAEpE,IAAI,YAAY,EAAE;gBACd,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/B,MAAM,iBAAiB,GAAG,kBAAkB,CAAC,MAAM,GAAG,CAAC;oBACnD,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;oBACxD,CAAC,CAAC,EAAE,CAAC;gBAET,IAAI,CAAC,0BAA0B,CAAC,GAAG,iBAAiB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBAEvE,IAAI,OAAO,EAAE,oBAAoB,IAAI,oBAAoB;oBACrD,0BAA0B,GAAG,0BAA0B,CAAC,OAAO,EAAE,CAAC;gBAEtE,IAAI,0BAA0B,CAAC,MAAM,GAAG,CAAC,EAAE;oBACvC,MAAM,sBAAsB,GAAY,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,0BAA0B,CAAC,CAAC,CAAC;oBAEjG,GAAG,CAAC,IAAI,CAAC,GAAG,sBAAsB,CAAC,CAAC;oBACpC,OAAO,EAAE,CAAC,sBAAsB,CAAC,CAAC;oBAClC,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC;iBACjC;gBAED,OAAO;oBACH,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC7C,UAAU;oBACV,gBAAgB;iBACnB,CAAC;aACL;YAED,gEAAgE;YAChE,IAAI,QAAQ,KAAK,oBAAoB,IAAI,cAAc,IAAI,CACvD,CAAC,OAAO,EAAE,oBAAoB,IAAI,oBAAoB,CAAC,IAAI,QAAQ,CAAC,IAAI,EAAE,KAAK,EAAE,CACpF,EAAE;gBACC,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/B,SAAS;aACZ;YAED,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC/B,GAAG,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;gBAChC,OAAO,EAAE,CAAC,kBAAkB,CAAC,CAAC;gBAC9B,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC;YAED,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChB,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAEnB,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,IAAI,SAAS;gBAC7D,MAAM;SACb;QAED,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAEtD,IAAI,OAAO,EAAE,oBAAoB,IAAI,oBAAoB;YACrD,OAAO,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;QAEhC,OAAO;YACH,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,IAAI;YAChB,gBAAgB,EAAE,IAAI;SACzB,CAAC;IACN,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,WAAqB,EAAE,iBAA2B;QACzF,IAAI,cAAc,GAAG,KAAK,CAAC;QAE3B,KAAK,IAAI,eAAe,GAAG,CAAC,EAAE,eAAe,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,EAAE;YACnF,MAAM,UAAU,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;YAEhD,IAAI,yBAAyB,GAAG,KAAK,CAAC;YACtC,IAAI,CAAC,GAAG,CAAC,CAAC;YACV,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACzF,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,EAAE;oBAChE,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC;oBACrC,yBAAyB,GAAG,IAAI,CAAC;iBACpC;qBAAM;oBACH,iBAAiB,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;oBACvC,yBAAyB,GAAG,KAAK,CAAC;iBACrC;aACJ;YAED,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE;gBAC1D,OAAO;oBACH,YAAY,EAAE,IAAI;oBAClB,UAAU;oBACV,gBAAgB,EAAE,QAAQ,CAAC,MAAM,KAAK,CAAC;wBACnC,CAAC,CAAC,IAAI;wBACN,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;iBAC1B,CAAC;aACL;YAED,cAAc,KAAK,yBAAyB,CAAC;SAChD;QAED,OAAO,EAAC,cAAc,EAAC,CAAC;IAC5B,CAAC;CACJ"}
|
|
@@ -1,16 +1,62 @@
|
|
|
1
1
|
import { Token } from "../types.js";
|
|
2
2
|
import { LlamaModel } from "./LlamaModel.js";
|
|
3
|
+
import { LlamaGrammarEvaluationState } from "./LlamaGrammarEvaluationState.js";
|
|
3
4
|
import { LlamaGrammar } from "./LlamaGrammar.js";
|
|
4
5
|
export type LlamaContextOptions = {
|
|
5
6
|
model: LlamaModel;
|
|
6
|
-
grammar?: LlamaGrammar;
|
|
7
7
|
prependBos?: boolean;
|
|
8
|
+
/**
|
|
9
|
+
* @deprecated use the `grammar` option on `LlamaChatSession`'s `prompt` function
|
|
10
|
+
* or the `grammarEvaluationState` option on `LlamaContext`'s `evaluate` function instead
|
|
11
|
+
* @hidden
|
|
12
|
+
*/
|
|
13
|
+
grammar?: LlamaGrammar;
|
|
14
|
+
/** If null, a random seed will be used */
|
|
15
|
+
seed?: number | null;
|
|
16
|
+
/** text context size */
|
|
17
|
+
contextSize?: number;
|
|
18
|
+
/** prompt processing batch size */
|
|
19
|
+
batchSize?: number;
|
|
20
|
+
/** use fp16 for KV cache */
|
|
21
|
+
f16Kv?: boolean;
|
|
22
|
+
/** the llama_eval() call computes all logits, not just the last one */
|
|
23
|
+
logitsAll?: boolean;
|
|
24
|
+
/** embedding mode only */
|
|
25
|
+
embedding?: boolean;
|
|
26
|
+
/** number of threads to use to evaluate tokens */
|
|
27
|
+
threads?: number;
|
|
28
|
+
};
|
|
29
|
+
export type LlamaContextRepeatPenalty = {
|
|
30
|
+
/** Tokens to lower the predication probability of to be the next predicted token */
|
|
31
|
+
punishTokens: Uint32Array | (() => Uint32Array);
|
|
32
|
+
/**
|
|
33
|
+
* The relative amount to lower the probability of the tokens in `punishTokens` by
|
|
34
|
+
* Defaults to `1.1`.
|
|
35
|
+
* Set to `1` to disable.
|
|
36
|
+
*/
|
|
37
|
+
penalty?: number;
|
|
38
|
+
/**
|
|
39
|
+
* For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
|
|
40
|
+
* Disabled by default (`0`).
|
|
41
|
+
* Set to a value between `0` and `1` to enable.
|
|
42
|
+
*/
|
|
43
|
+
frequencyPenalty?: number;
|
|
44
|
+
/**
|
|
45
|
+
* Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
|
|
46
|
+
* Disabled by default (`0`).
|
|
47
|
+
* Set to a value between `0` and `1` to enable.
|
|
48
|
+
*/
|
|
49
|
+
presencePenalty?: number;
|
|
8
50
|
};
|
|
9
51
|
export declare class LlamaContext {
|
|
52
|
+
private readonly _model;
|
|
10
53
|
private readonly _ctx;
|
|
11
54
|
private readonly _prependBos;
|
|
12
55
|
private _prependTokens;
|
|
13
|
-
|
|
56
|
+
/**
|
|
57
|
+
* @param {LlamaContextOptions} options
|
|
58
|
+
*/
|
|
59
|
+
constructor({ model, prependBos, grammar, seed, contextSize, batchSize, f16Kv, logitsAll, embedding, threads }: LlamaContextOptions);
|
|
14
60
|
encode(text: string): Uint32Array;
|
|
15
61
|
decode(tokens: Uint32Array | Token[]): string;
|
|
16
62
|
get prependBos(): boolean;
|
|
@@ -39,5 +85,16 @@ export declare class LlamaContext {
|
|
|
39
85
|
*/
|
|
40
86
|
getNlString(): string | null;
|
|
41
87
|
getContextSize(): number;
|
|
42
|
-
|
|
88
|
+
/**
|
|
89
|
+
* @param {Uint32Array} tokens
|
|
90
|
+
* @param {object} options
|
|
91
|
+
* @returns {AsyncGenerator<Token, void>}
|
|
92
|
+
*/
|
|
93
|
+
evaluate(tokens: Uint32Array, { temperature, topK, topP, grammarEvaluationState, repeatPenalty }?: {
|
|
94
|
+
temperature?: number;
|
|
95
|
+
topK?: number;
|
|
96
|
+
topP?: number;
|
|
97
|
+
grammarEvaluationState?: LlamaGrammarEvaluationState;
|
|
98
|
+
repeatPenalty?: LlamaContextRepeatPenalty;
|
|
99
|
+
}): AsyncGenerator<Token, void>;
|
|
43
100
|
}
|
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
import { removeNullFields } from "../utils/removeNullFields.js";
|
|
2
2
|
import { LLAMAContext } from "./LlamaBins.js";
|
|
3
3
|
export class LlamaContext {
|
|
4
|
+
_model;
|
|
4
5
|
_ctx;
|
|
5
6
|
_prependBos;
|
|
6
7
|
_prependTokens;
|
|
7
|
-
|
|
8
|
+
/** @internal */
|
|
9
|
+
_chatGrammar;
|
|
10
|
+
/**
|
|
11
|
+
* @param {LlamaContextOptions} options
|
|
12
|
+
*/
|
|
13
|
+
constructor({ model, prependBos = true, grammar, seed = model._contextOptions.seed, contextSize = model._contextOptions.contextSize, batchSize = model._contextOptions.batchSize, f16Kv = model._contextOptions.f16Kv, logitsAll = model._contextOptions.logitsAll, embedding = model._contextOptions.embedding, threads = model._contextOptions.threads }) {
|
|
14
|
+
this._model = model;
|
|
8
15
|
this._ctx = new LLAMAContext(model._model, removeNullFields({
|
|
9
|
-
|
|
16
|
+
seed: seed != null ? Math.max(-1, seed) : undefined,
|
|
17
|
+
contextSize,
|
|
18
|
+
batchSize,
|
|
19
|
+
f16Kv,
|
|
20
|
+
logitsAll,
|
|
21
|
+
embedding,
|
|
22
|
+
threads
|
|
10
23
|
}));
|
|
11
24
|
this._prependBos = prependBos;
|
|
12
25
|
this._prependTokens = [];
|
|
26
|
+
this._chatGrammar = grammar;
|
|
13
27
|
if (prependBos) {
|
|
14
28
|
this._prependTokens.unshift(this._ctx.tokenBos());
|
|
15
29
|
}
|
|
@@ -86,17 +100,35 @@ export class LlamaContext {
|
|
|
86
100
|
getContextSize() {
|
|
87
101
|
return this._ctx.getContextSize();
|
|
88
102
|
}
|
|
89
|
-
|
|
103
|
+
/**
|
|
104
|
+
* @param {Uint32Array} tokens
|
|
105
|
+
* @param {object} options
|
|
106
|
+
* @returns {AsyncGenerator<Token, void>}
|
|
107
|
+
*/
|
|
108
|
+
async *evaluate(tokens, { temperature = this._model._evaluationOptions.temperature, topK = this._model._evaluationOptions.topK, topP = this._model._evaluationOptions.topP, grammarEvaluationState, repeatPenalty } = {}) {
|
|
90
109
|
let evalTokens = tokens;
|
|
91
110
|
if (this._prependTokens.length > 0) {
|
|
92
111
|
const tokenArray = this._prependTokens.concat(Array.from(tokens));
|
|
93
112
|
evalTokens = Uint32Array.from(tokenArray);
|
|
94
113
|
this._prependTokens = [];
|
|
95
114
|
}
|
|
115
|
+
if (evalTokens.length === 0)
|
|
116
|
+
return;
|
|
96
117
|
// eslint-disable-next-line no-constant-condition
|
|
97
118
|
while (true) {
|
|
98
119
|
// Evaluate to get the next token.
|
|
99
|
-
const nextToken = await this._ctx.eval(evalTokens
|
|
120
|
+
const nextToken = await this._ctx.eval(evalTokens, removeNullFields({
|
|
121
|
+
temperature,
|
|
122
|
+
topK,
|
|
123
|
+
topP,
|
|
124
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
125
|
+
repeatPenaltyTokens: repeatPenalty?.punishTokens instanceof Function
|
|
126
|
+
? repeatPenalty.punishTokens()
|
|
127
|
+
: repeatPenalty?.punishTokens,
|
|
128
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
129
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
130
|
+
grammarEvaluationState: grammarEvaluationState?._state
|
|
131
|
+
}));
|
|
100
132
|
// the assistant finished answering
|
|
101
133
|
if (nextToken === this._ctx.tokenEos())
|
|
102
134
|
break;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiE5C,MAAM,OAAO,YAAY;IACJ,MAAM,CAAa;IACnB,IAAI,CAAe;IACnB,WAAW,CAAU;IAC9B,cAAc,CAAU;IAEhC,gBAAgB;IACA,YAAY,CAAgB;IAG5C;;OAEG;IACH,YAAmB,EACf,KAAK,EACL,UAAU,GAAG,IAAI,EACjB,OAAO,EACP,IAAI,GAAG,KAAK,CAAC,eAAe,CAAC,IAAI,EACjC,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,WAAW,EAC/C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,KAAK,GAAG,KAAK,CAAC,eAAe,CAAC,KAAK,EACnC,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,OAAO,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EACrB;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;SACV,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;QAE5B,IAAI,UAAU,EAAE;YACZ,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;SACrD;IACL,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAA6B;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,IAAI,MAAM,YAAY,WAAW;YAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB,EAAE,EACxC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,WAAW,EACxD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,sBAAsB,EACtB,aAAa,KAIb,EAAE;QACF,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE;YAChC,MAAM,UAAU,GAAY,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;SAC5B;QAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YACvB,OAAO;QAEX,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC;gBACvE,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,aAAa,EAAE,aAAa,EAAE,OAAO;gBACrC,mBAAmB,EAAE,aAAa,EAAE,YAAY,YAAY,QAAQ;oBAChE,CAAC,CAAC,aAAa,CAAC,YAAY,EAAE;oBAC9B,CAAC,CAAC,aAAa,EAAE,YAAY;gBACjC,4BAA4B,EAAE,aAAa,EAAE,eAAe;gBAC5D,6BAA6B,EAAE,aAAa,EAAE,gBAAgB;gBAC9D,sBAAsB,EAAE,sBAAsB,EAAE,MAAM;aACzD,CAAC,CAAC,CAAC;YAEJ,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CACJ"}
|
|
@@ -3,15 +3,28 @@ export type LlamaGrammarOptions = {
|
|
|
3
3
|
grammar: string;
|
|
4
4
|
/** print the grammar to stdout */
|
|
5
5
|
printGrammar?: boolean;
|
|
6
|
+
/** Consider any of these texts as EOS for the generated out. Only supported by `LlamaChatSession` */
|
|
7
|
+
stopStrings?: string[];
|
|
8
|
+
/** Trim whitespace from the end of the generated text. Only supported by `LlamaChatSession` */
|
|
9
|
+
trimWhitespaceSuffix?: boolean;
|
|
6
10
|
};
|
|
7
11
|
export declare class LlamaGrammar {
|
|
12
|
+
private readonly _stopStrings;
|
|
13
|
+
private readonly _trimWhitespaceSuffix;
|
|
8
14
|
/**
|
|
9
|
-
* GBNF files are supported.
|
|
10
|
-
* More info here:
|
|
15
|
+
* > GBNF files are supported.
|
|
16
|
+
* > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
|
|
17
|
+
* > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
|
|
11
18
|
* @param {object} options
|
|
12
19
|
* @param {string} options.grammar - GBNF grammar
|
|
20
|
+
* @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
|
|
21
|
+
* Only supported by `LlamaChatSession`
|
|
22
|
+
* @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
|
|
23
|
+
* Only supported by `LlamaChatSession`
|
|
13
24
|
* @param {boolean} [options.printGrammar] - print the grammar to stdout
|
|
14
25
|
*/
|
|
15
|
-
constructor({ grammar, printGrammar }: LlamaGrammarOptions);
|
|
26
|
+
constructor({ grammar, stopStrings, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
|
|
27
|
+
get stopStrings(): readonly string[];
|
|
28
|
+
get trimWhitespaceSuffix(): boolean;
|
|
16
29
|
static getFor(type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
|
|
17
30
|
}
|
|
@@ -5,24 +5,43 @@ import { LLAMAGrammar } from "./LlamaBins.js";
|
|
|
5
5
|
export class LlamaGrammar {
|
|
6
6
|
/** @internal */
|
|
7
7
|
_grammar;
|
|
8
|
+
_stopStrings;
|
|
9
|
+
_trimWhitespaceSuffix;
|
|
8
10
|
/**
|
|
9
|
-
* GBNF files are supported.
|
|
10
|
-
* More info here:
|
|
11
|
+
* > GBNF files are supported.
|
|
12
|
+
* > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
|
|
13
|
+
* > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
|
|
11
14
|
* @param {object} options
|
|
12
15
|
* @param {string} options.grammar - GBNF grammar
|
|
16
|
+
* @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
|
|
17
|
+
* Only supported by `LlamaChatSession`
|
|
18
|
+
* @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
|
|
19
|
+
* Only supported by `LlamaChatSession`
|
|
13
20
|
* @param {boolean} [options.printGrammar] - print the grammar to stdout
|
|
14
21
|
*/
|
|
15
|
-
constructor({ grammar, printGrammar = false }) {
|
|
22
|
+
constructor({ grammar, stopStrings = [], trimWhitespaceSuffix = false, printGrammar = false }) {
|
|
16
23
|
this._grammar = new LLAMAGrammar(grammar, {
|
|
17
24
|
printGrammar
|
|
18
25
|
});
|
|
26
|
+
this._stopStrings = stopStrings ?? [];
|
|
27
|
+
this._trimWhitespaceSuffix = trimWhitespaceSuffix;
|
|
28
|
+
}
|
|
29
|
+
get stopStrings() {
|
|
30
|
+
return this._stopStrings;
|
|
31
|
+
}
|
|
32
|
+
get trimWhitespaceSuffix() {
|
|
33
|
+
return this._trimWhitespaceSuffix;
|
|
19
34
|
}
|
|
20
35
|
static async getFor(type) {
|
|
21
36
|
const grammarsFolder = await getGrammarsFolder();
|
|
22
37
|
const grammarFile = path.join(grammarsFolder, type + ".gbnf");
|
|
23
38
|
if (await fs.pathExists(grammarFile)) {
|
|
24
39
|
const grammar = await fs.readFile(grammarFile, "utf8");
|
|
25
|
-
return new LlamaGrammar({
|
|
40
|
+
return new LlamaGrammar({
|
|
41
|
+
grammar,
|
|
42
|
+
stopStrings: ["\n".repeat(10)],
|
|
43
|
+
trimWhitespaceSuffix: true
|
|
44
|
+
});
|
|
26
45
|
}
|
|
27
46
|
throw new Error(`Grammar file for type "${type}" was not found in "${grammarsFolder}"`);
|
|
28
47
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiB5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IACtB,YAAY,CAAoB;IAChC,qBAAqB,CAAU;IAEhD;;;;;;;;;;;OAWG;IACH,YAAmB,EACf,OAAO,EAAE,WAAW,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,EAC3D;QAClB,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,WAAW,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;IACtD,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,OAAO;gBACP,WAAW,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC9B,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { LlamaGrammar } from "./LlamaGrammar.js";
|
|
2
|
+
export type LlamaGrammarEvaluationStateOptions = {
|
|
3
|
+
grammar: LlamaGrammar;
|
|
4
|
+
};
|
|
5
|
+
export declare class LlamaGrammarEvaluationState {
|
|
6
|
+
/**
|
|
7
|
+
* Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
|
|
8
|
+
* Create a new grammar evaluation state for every response you generate with the model.
|
|
9
|
+
* This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
|
|
10
|
+
* @param {object} options
|
|
11
|
+
* @param {LlamaGrammar} options.grammar
|
|
12
|
+
*/
|
|
13
|
+
constructor({ grammar }: LlamaGrammarEvaluationStateOptions);
|
|
14
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { LLAMAGrammarEvaluationState } from "./LlamaBins.js";
|
|
2
|
+
export class LlamaGrammarEvaluationState {
|
|
3
|
+
/** @internal */
|
|
4
|
+
_state;
|
|
5
|
+
/**
|
|
6
|
+
* Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
|
|
7
|
+
* Create a new grammar evaluation state for every response you generate with the model.
|
|
8
|
+
* This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
|
|
9
|
+
* @param {object} options
|
|
10
|
+
* @param {LlamaGrammar} options.grammar
|
|
11
|
+
*/
|
|
12
|
+
constructor({ grammar }) {
|
|
13
|
+
this._state = new LLAMAGrammarEvaluationState(grammar._grammar);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=LlamaGrammarEvaluationState.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,2BAA2B,EAAC,MAAM,gBAAgB,CAAC;AAQ3D,MAAM,OAAO,2BAA2B;IACpC,gBAAgB;IACA,MAAM,CAA8B;IAEpD;;;;;;OAMG;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,IAAI,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpE,CAAC;CACJ"}
|