node-llama-cpp 2.8.0 → 3.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -8
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -8
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +1 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +13 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +49 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -0
- package/dist/cli/cli.js +1 -1
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +20 -10
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +19 -4
- package/dist/llamaEvaluator/LlamaBins.js +3 -3
- package/dist/llamaEvaluator/LlamaChatSession.d.ts +24 -23
- package/dist/llamaEvaluator/LlamaChatSession.js +90 -36
- package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.d.ts +112 -0
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js +640 -0
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +90 -0
- package/dist/llamaEvaluator/LlamaContext/types.js +2 -0
- package/dist/llamaEvaluator/LlamaContext/types.js.map +1 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +5 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +16 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +5 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +42 -0
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +2 -0
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +13 -0
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +5 -5
- package/dist/llamaEvaluator/LlamaGrammar.js +7 -7
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +6 -5
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +8 -7
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +93 -112
- package/dist/llamaEvaluator/LlamaModel.js +294 -59
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/dist/types.d.ts +3 -1
- package/dist/utils/ReplHistory.js +1 -1
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/getBin.d.ts +71 -39
- package/dist/utils/getBin.js.map +1 -1
- package/dist/utils/getReleaseInfo.d.ts +1 -1
- package/dist/utils/getReleaseInfo.js.map +1 -1
- package/dist/utils/parseModelFileName.d.ts +9 -0
- package/dist/utils/parseModelFileName.js +68 -0
- package/dist/utils/parseModelFileName.js.map +1 -0
- package/dist/utils/parseModelTypeDescription.d.ts +6 -0
- package/dist/utils/parseModelTypeDescription.js +9 -0
- package/dist/utils/parseModelTypeDescription.js.map +1 -0
- package/llama/.clang-format +10 -9
- package/llama/addon.cpp +689 -356
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +2 -2
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/ggml-metal.metal +107 -1
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +107 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/package.json +13 -7
- package/dist/chatWrappers/createChatWrapperByBos.d.ts +0 -2
- package/dist/chatWrappers/createChatWrapperByBos.js +0 -14
- package/dist/chatWrappers/createChatWrapperByBos.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +0 -100
- package/dist/llamaEvaluator/LlamaContext.js +0 -141
- package/dist/llamaEvaluator/LlamaContext.js.map +0 -1
- package/dist/utils/withLock.d.ts +0 -1
- package/dist/utils/withLock.js +0 -19
- package/dist/utils/withLock.js.map +0 -1
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
|
|
2
|
+
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
|
+
import { AddonContext } from "../LlamaBins.js";
|
|
4
|
+
import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
|
|
5
|
+
export class LlamaContext {
|
|
6
|
+
/** @internal */ _ctx;
|
|
7
|
+
/** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
|
|
8
|
+
/** @internal */ _model;
|
|
9
|
+
/** @internal */ _contextSize;
|
|
10
|
+
/** @internal */ _batchSize;
|
|
11
|
+
/** @internal */ _totalSequences;
|
|
12
|
+
/** @internal */ _unusedSequenceIds = [];
|
|
13
|
+
/** @internal */ _batchingOptions;
|
|
14
|
+
/** @internal */ _queuedDecodeSequenceIds = new Set();
|
|
15
|
+
/** @internal */ _queuedDecodes = [];
|
|
16
|
+
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
17
|
+
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
18
|
+
/** @internal */ _dispatchDecodeScheduled = false;
|
|
19
|
+
/** @internal */ _batchDispatchPending = false;
|
|
20
|
+
/** @internal */ _currentDispatchBatchHandle = {};
|
|
21
|
+
/** @internal */ _allocatedContextSize;
|
|
22
|
+
/** @internal */ _disposed = false;
|
|
23
|
+
onDispose = new EventRelay();
|
|
24
|
+
/**
|
|
25
|
+
* @param options
|
|
26
|
+
*/
|
|
27
|
+
constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, f16Kv, logitsAll, embedding, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {} }) {
|
|
28
|
+
if (model.disposed)
|
|
29
|
+
throw new DisposedError();
|
|
30
|
+
this._model = model;
|
|
31
|
+
this._totalSequences = Math.max(1, Math.floor(sequences));
|
|
32
|
+
this._contextSize = Math.max(2, contextSize);
|
|
33
|
+
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
34
|
+
this._ctx = new AddonContext(this._model._model, removeNullFields({
|
|
35
|
+
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
36
|
+
contextSize: contextSize * this._totalSequences,
|
|
37
|
+
batchSize: this._batchSize,
|
|
38
|
+
f16Kv,
|
|
39
|
+
logitsAll,
|
|
40
|
+
embedding,
|
|
41
|
+
threads
|
|
42
|
+
}));
|
|
43
|
+
this._batchingOptions = {
|
|
44
|
+
dispatchSchedule: batchingDispatchSchedule,
|
|
45
|
+
itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
|
|
46
|
+
};
|
|
47
|
+
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
48
|
+
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
49
|
+
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
50
|
+
this._disposeAggregator.add(() => {
|
|
51
|
+
this._ctx.dispose();
|
|
52
|
+
});
|
|
53
|
+
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
54
|
+
}
|
|
55
|
+
dispose() {
|
|
56
|
+
if (this._disposed)
|
|
57
|
+
return;
|
|
58
|
+
this._disposed = true;
|
|
59
|
+
this._disposeAggregator.dispose();
|
|
60
|
+
}
|
|
61
|
+
/** @hidden */
|
|
62
|
+
[Symbol.dispose]() {
|
|
63
|
+
return this.dispose();
|
|
64
|
+
}
|
|
65
|
+
get disposed() {
|
|
66
|
+
return this._disposed;
|
|
67
|
+
}
|
|
68
|
+
get model() {
|
|
69
|
+
return this._model;
|
|
70
|
+
}
|
|
71
|
+
get contextSize() {
|
|
72
|
+
return this._contextSize;
|
|
73
|
+
}
|
|
74
|
+
get batchSize() {
|
|
75
|
+
return this._batchSize;
|
|
76
|
+
}
|
|
77
|
+
getAllocatedContextSize() {
|
|
78
|
+
this._ensureNotDisposed();
|
|
79
|
+
if (this._allocatedContextSize == null)
|
|
80
|
+
this._allocatedContextSize = this._ctx.getContextSize();
|
|
81
|
+
return this._allocatedContextSize;
|
|
82
|
+
}
|
|
83
|
+
get totalSequences() {
|
|
84
|
+
return this._totalSequences;
|
|
85
|
+
}
|
|
86
|
+
get sequencesLeft() {
|
|
87
|
+
return this._totalSequences - this._nextGeneratedSequenceId + this._unusedSequenceIds.length;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
|
|
91
|
+
* When there are no sequences left, this method will throw an error.
|
|
92
|
+
* @param [options]
|
|
93
|
+
*/
|
|
94
|
+
getSequence({ prependBos = true, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
|
|
95
|
+
this._ensureNotDisposed();
|
|
96
|
+
const nextSequenceId = this._popSequenceId();
|
|
97
|
+
if (nextSequenceId == null)
|
|
98
|
+
throw new Error("No sequences left");
|
|
99
|
+
return LlamaContextSequence._create({
|
|
100
|
+
sequenceId: nextSequenceId,
|
|
101
|
+
context: this,
|
|
102
|
+
prependBos,
|
|
103
|
+
contextShift: {
|
|
104
|
+
size: contextShiftSize,
|
|
105
|
+
strategy: contextShiftStrategy
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
dispatchPendingBatch() {
|
|
110
|
+
this._currentDispatchBatchHandle = {};
|
|
111
|
+
this._dispatchDecodeScheduled = false;
|
|
112
|
+
if (this._batchDispatchPending)
|
|
113
|
+
return;
|
|
114
|
+
this._batchDispatchPending = true;
|
|
115
|
+
void withLock(this, "context", async () => {
|
|
116
|
+
this._currentDispatchBatchHandle = {};
|
|
117
|
+
this._dispatchDecodeScheduled = false;
|
|
118
|
+
this._batchDispatchPending = false;
|
|
119
|
+
let prioritizeStrategy;
|
|
120
|
+
try {
|
|
121
|
+
this._ensureNotDisposed();
|
|
122
|
+
prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
|
|
129
|
+
while (shouldHaveAnotherBatch) {
|
|
130
|
+
const batchItemToQueuedDecodeMap = new Map();
|
|
131
|
+
const batchItemsList = [];
|
|
132
|
+
for (const queuedDecode of this._queuedDecodes) {
|
|
133
|
+
const batchItem = {
|
|
134
|
+
tokens: queuedDecode.tokens,
|
|
135
|
+
evaluationPriority: queuedDecode.evaluationPriority
|
|
136
|
+
};
|
|
137
|
+
batchItemToQueuedDecodeMap.set(batchItem, queuedDecode);
|
|
138
|
+
batchItemsList.push(batchItem);
|
|
139
|
+
}
|
|
140
|
+
let prioritizedItems;
|
|
141
|
+
try {
|
|
142
|
+
prioritizedItems = prioritizeStrategy({
|
|
143
|
+
items: batchItemsList,
|
|
144
|
+
size: this._batchSize
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
let batchTokenSlotsLeft = this._batchSize;
|
|
152
|
+
const afterDecodeActions = [];
|
|
153
|
+
const queuedDecodesToDelete = new Set();
|
|
154
|
+
const currentQueuedDecodeItems = new Set();
|
|
155
|
+
const currentBatchItems = [];
|
|
156
|
+
let currentBatchSize = 0;
|
|
157
|
+
for (const prioritizedItem of prioritizedItems) {
|
|
158
|
+
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
159
|
+
if (queuedDecode == null)
|
|
160
|
+
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
161
|
+
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
162
|
+
const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
|
|
163
|
+
if (processAmount <= 0)
|
|
164
|
+
continue;
|
|
165
|
+
batchTokenSlotsLeft -= processAmount;
|
|
166
|
+
currentBatchItems.push({
|
|
167
|
+
queuedDecode,
|
|
168
|
+
processAmount
|
|
169
|
+
});
|
|
170
|
+
currentBatchSize += processAmount;
|
|
171
|
+
}
|
|
172
|
+
if (currentBatchSize !== 0)
|
|
173
|
+
this._ctx.initBatch(currentBatchSize);
|
|
174
|
+
for (const { queuedDecode, processAmount } of currentBatchItems) {
|
|
175
|
+
let batchLogitIndex;
|
|
176
|
+
try {
|
|
177
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
currentQueuedDecodeItems.add(queuedDecode);
|
|
184
|
+
if (queuedDecode.tokens.length === processAmount) {
|
|
185
|
+
queuedDecodesToDelete.add(queuedDecode);
|
|
186
|
+
afterDecodeActions.push({
|
|
187
|
+
batchLogitIndex,
|
|
188
|
+
response: queuedDecode.response,
|
|
189
|
+
onDone: queuedDecode.onDone
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
194
|
+
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
195
|
+
}
|
|
196
|
+
if (batchTokenSlotsLeft === 0)
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
200
|
+
const queuedDecode = this._queuedDecodes[i];
|
|
201
|
+
if (queuedDecodesToDelete.has(queuedDecode)) {
|
|
202
|
+
this._queuedDecodes.splice(i, 1);
|
|
203
|
+
this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
|
|
204
|
+
i--;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
|
|
208
|
+
try {
|
|
209
|
+
if (currentBatchSize !== 0)
|
|
210
|
+
await this._ctx.decodeBatch();
|
|
211
|
+
}
|
|
212
|
+
catch (err) {
|
|
213
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
for (const action of afterDecodeActions) {
|
|
217
|
+
const [accept, reject] = action.response;
|
|
218
|
+
if (action.onDone != null && action.batchLogitIndex != null) {
|
|
219
|
+
try {
|
|
220
|
+
accept(action.onDone(action.batchLogitIndex ?? null));
|
|
221
|
+
}
|
|
222
|
+
catch (err) {
|
|
223
|
+
reject(err);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
accept(undefined);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
/** @internal */
|
|
232
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
233
|
+
return await new Promise((accept, reject) => {
|
|
234
|
+
this._queuedDecodes.push({
|
|
235
|
+
sequenceId,
|
|
236
|
+
tokens,
|
|
237
|
+
firstTokenSequenceIndex,
|
|
238
|
+
generateLogitAtTheEnd,
|
|
239
|
+
evaluationPriority,
|
|
240
|
+
response: [accept, reject],
|
|
241
|
+
onDone
|
|
242
|
+
});
|
|
243
|
+
this._queuedDecodeSequenceIds.add(sequenceId);
|
|
244
|
+
this._scheduleDecode();
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
/** @internal */
|
|
248
|
+
_reclaimUnusedSequenceId(sequenceId) {
|
|
249
|
+
if (this._disposed)
|
|
250
|
+
return;
|
|
251
|
+
void withLock(this, "context", async () => {
|
|
252
|
+
this._ctx.disposeSequence(sequenceId);
|
|
253
|
+
this._unusedSequenceIds.push(sequenceId);
|
|
254
|
+
this._onReclaimUnusedSequenceId.dispatchEvent();
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
/** @internal */
|
|
258
|
+
_popSequenceId() {
|
|
259
|
+
if (this._unusedSequenceIds.length > 0)
|
|
260
|
+
return this._unusedSequenceIds.shift();
|
|
261
|
+
if (this._nextGeneratedSequenceId < this._totalSequences) {
|
|
262
|
+
const sequenceId = this._nextGeneratedSequenceId;
|
|
263
|
+
this._nextGeneratedSequenceId++;
|
|
264
|
+
return sequenceId;
|
|
265
|
+
}
|
|
266
|
+
return null;
|
|
267
|
+
}
|
|
268
|
+
/** @internal */
|
|
269
|
+
_scheduleDecode() {
|
|
270
|
+
if (this._dispatchDecodeScheduled || this._batchDispatchPending)
|
|
271
|
+
return;
|
|
272
|
+
this._dispatchDecodeScheduled = true;
|
|
273
|
+
const currentPendingBatchHandle = this._currentDispatchBatchHandle;
|
|
274
|
+
const dispatch = () => {
|
|
275
|
+
if (this._currentDispatchBatchHandle !== currentPendingBatchHandle)
|
|
276
|
+
return;
|
|
277
|
+
this.dispatchPendingBatch();
|
|
278
|
+
};
|
|
279
|
+
const dispatchSchedule = this._batchingOptions.dispatchSchedule;
|
|
280
|
+
if (this._queuedDecodeSequenceIds.size === this._totalSequences)
|
|
281
|
+
dispatch();
|
|
282
|
+
if (dispatchSchedule === "nextTick")
|
|
283
|
+
setTimeout(dispatch, 0);
|
|
284
|
+
else
|
|
285
|
+
dispatchSchedule(dispatch);
|
|
286
|
+
}
|
|
287
|
+
/** @internal */
|
|
288
|
+
_dispatchErrorForQueuedDecodesAndDequeue(queuedDecodes, err) {
|
|
289
|
+
for (const pendingDecode of queuedDecodes) {
|
|
290
|
+
const [, reject] = pendingDecode.response;
|
|
291
|
+
reject(err);
|
|
292
|
+
}
|
|
293
|
+
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
294
|
+
const item = this._queuedDecodes[i];
|
|
295
|
+
if (queuedDecodes.has(item)) {
|
|
296
|
+
this._queuedDecodes.splice(i, 1);
|
|
297
|
+
this._queuedDecodeSequenceIds.delete(item.sequenceId);
|
|
298
|
+
i--;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
/** @internal */
|
|
303
|
+
_ensureNotDisposed() {
|
|
304
|
+
if (this._disposed)
|
|
305
|
+
throw new DisposedError();
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
export class LlamaContextSequence {
|
|
309
|
+
/** @internal */ _sequenceId;
|
|
310
|
+
/** @internal */ _gcRegistry;
|
|
311
|
+
/** @internal */ _context;
|
|
312
|
+
/** @internal */ _prependBos;
|
|
313
|
+
/** @internal */ _contextShift;
|
|
314
|
+
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
315
|
+
/** @internal */ _contextTokens = [];
|
|
316
|
+
/** @internal */ _contextTokenPriorities = [];
|
|
317
|
+
/** @internal */ _nextTokenIndex = 0;
|
|
318
|
+
/** @internal */ _prependTokens = [];
|
|
319
|
+
/** @internal */ _prependTokenPriorities = [];
|
|
320
|
+
/** @internal */ _disposed = false;
|
|
321
|
+
onDispose = new EventRelay();
|
|
322
|
+
constructor({ sequenceId, context, prependBos, contextShift }) {
|
|
323
|
+
this._sequenceId = sequenceId;
|
|
324
|
+
this._context = context;
|
|
325
|
+
this._contextShift = contextShift;
|
|
326
|
+
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
327
|
+
this._prependTokens = [];
|
|
328
|
+
this._prependTokenPriorities = [];
|
|
329
|
+
if (prependBos && this._context.model.tokens.bos != null) {
|
|
330
|
+
this._prependTokens.unshift(this._context.model.tokens.bos);
|
|
331
|
+
this._prependTokenPriorities.unshift(1);
|
|
332
|
+
this._prependBos = true;
|
|
333
|
+
}
|
|
334
|
+
else
|
|
335
|
+
this._prependBos = false;
|
|
336
|
+
this._gcRegistry.register(this, sequenceId);
|
|
337
|
+
this._disposeAggregator.add(() => this._gcRegistry.unregister(this));
|
|
338
|
+
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
339
|
+
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextSequenceIfReferenced.bind(null, new WeakRef(this))));
|
|
340
|
+
this._disposeAggregator.add(() => {
|
|
341
|
+
this._context._reclaimUnusedSequenceId(this._sequenceId);
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
dispose() {
|
|
345
|
+
if (this._disposed)
|
|
346
|
+
return;
|
|
347
|
+
this._disposeAggregator.dispose();
|
|
348
|
+
this._contextTokens.length = 0;
|
|
349
|
+
this._contextTokenPriorities.length = 0;
|
|
350
|
+
this._disposed = true;
|
|
351
|
+
}
|
|
352
|
+
/** @hidden */
|
|
353
|
+
[Symbol.dispose]() {
|
|
354
|
+
return this.dispose();
|
|
355
|
+
}
|
|
356
|
+
get disposed() {
|
|
357
|
+
return this._disposed;
|
|
358
|
+
}
|
|
359
|
+
get context() {
|
|
360
|
+
return this._context;
|
|
361
|
+
}
|
|
362
|
+
get model() {
|
|
363
|
+
return this._context.model;
|
|
364
|
+
}
|
|
365
|
+
get prependBos() {
|
|
366
|
+
return this._prependBos;
|
|
367
|
+
}
|
|
368
|
+
get nextTokenIndex() {
|
|
369
|
+
return this._nextTokenIndex;
|
|
370
|
+
}
|
|
371
|
+
get contextTokens() {
|
|
372
|
+
return this._contextTokens.slice();
|
|
373
|
+
}
|
|
374
|
+
get contextTokenPriorities() {
|
|
375
|
+
return this._contextTokenPriorities.slice();
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Clear the history of the sequence.
|
|
379
|
+
* If `prependBos` was enabled, the BOS token will be prepended to the sequence again.
|
|
380
|
+
*/
|
|
381
|
+
async clearHistory() {
|
|
382
|
+
this._ensureNotDisposed();
|
|
383
|
+
await this.eraseContextTokenRanges([{ start: 0, end: this._nextTokenIndex }]);
|
|
384
|
+
this._prependTokens.length = 0;
|
|
385
|
+
this._prependTokenPriorities.length = 0;
|
|
386
|
+
if (this._prependBos && this._context.model.tokens.bos != null) {
|
|
387
|
+
this._prependTokens.push(this._context.model.tokens.bos);
|
|
388
|
+
this._prependTokenPriorities.push(1);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
393
|
+
* the start and end of each range are exclusive.
|
|
394
|
+
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
395
|
+
*/
|
|
396
|
+
async eraseContextTokenRanges(ranges) {
|
|
397
|
+
this._ensureNotDisposed();
|
|
398
|
+
await withLock(this._context, "context", async () => {
|
|
399
|
+
this._ensureNotDisposed();
|
|
400
|
+
if (ranges.length === 0)
|
|
401
|
+
return;
|
|
402
|
+
const resolvedRanges = ranges
|
|
403
|
+
.map(({ start, end }) => {
|
|
404
|
+
if (start === end)
|
|
405
|
+
return null;
|
|
406
|
+
if (start > end)
|
|
407
|
+
[start, end] = [end, start];
|
|
408
|
+
if (end > this._nextTokenIndex)
|
|
409
|
+
end = this._nextTokenIndex;
|
|
410
|
+
if (start >= this._nextTokenIndex)
|
|
411
|
+
return null;
|
|
412
|
+
return { start, end };
|
|
413
|
+
})
|
|
414
|
+
.filter((range) => range != null)
|
|
415
|
+
.sort((a, b) => a.start - b.start)
|
|
416
|
+
.reduce((ranges, range) => {
|
|
417
|
+
if (ranges.length === 0)
|
|
418
|
+
return [range];
|
|
419
|
+
const lastRange = ranges[ranges.length - 1];
|
|
420
|
+
if (lastRange.end >= range.start) {
|
|
421
|
+
lastRange.end = Math.max(lastRange.end, range.end);
|
|
422
|
+
return ranges;
|
|
423
|
+
}
|
|
424
|
+
ranges.push(range);
|
|
425
|
+
return ranges;
|
|
426
|
+
}, []);
|
|
427
|
+
let removedTokens = 0;
|
|
428
|
+
let lastDeleteRangeEndPos = null;
|
|
429
|
+
for (const range of resolvedRanges) {
|
|
430
|
+
this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
|
|
431
|
+
this._contextTokenPriorities.splice(range.start - removedTokens, range.end - range.start);
|
|
432
|
+
this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
|
|
433
|
+
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
|
|
434
|
+
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
|
|
435
|
+
removedTokens += range.end - range.start;
|
|
436
|
+
lastDeleteRangeEndPos = range.end;
|
|
437
|
+
}
|
|
438
|
+
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
439
|
+
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
|
|
440
|
+
this._nextTokenIndex -= removedTokens;
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* @param tokens
|
|
445
|
+
* @param [options]
|
|
446
|
+
*/
|
|
447
|
+
evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, tokenPriority = 1 } = {}) {
|
|
448
|
+
return this._evaluate(tokens, {
|
|
449
|
+
temperature,
|
|
450
|
+
topK,
|
|
451
|
+
topP,
|
|
452
|
+
grammarEvaluationState,
|
|
453
|
+
repeatPenalty,
|
|
454
|
+
evaluationPriority,
|
|
455
|
+
tokenPriority
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Evaluate the provided tokens into the context sequence without generating new tokens.
|
|
460
|
+
* @param tokens
|
|
461
|
+
* @param [options]
|
|
462
|
+
*/
|
|
463
|
+
async evaluateWithoutGeneratingNewTokens(tokens, { evaluationPriority = 5, tokenPriority = 1 } = {}) {
|
|
464
|
+
const iterator = this._evaluate(tokens, {
|
|
465
|
+
generateNewTokens: false,
|
|
466
|
+
evaluationPriority,
|
|
467
|
+
tokenPriority
|
|
468
|
+
});
|
|
469
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
470
|
+
for await (const token of iterator) {
|
|
471
|
+
// Array.from doesn't work with async generators, so we have to iterate over the generator
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
/** @internal */
|
|
475
|
+
async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, tokenPriority = 1, generateNewTokens = true } = {}) {
|
|
476
|
+
this._ensureNotDisposed();
|
|
477
|
+
if (!(tokenPriority instanceof Array))
|
|
478
|
+
tokenPriority = Array(tokens.length).fill(tokenPriority);
|
|
479
|
+
let evalTokens = tokens;
|
|
480
|
+
if (generateNewTokens && tokens.length === 0)
|
|
481
|
+
return;
|
|
482
|
+
if (this._prependTokens.length > 0) {
|
|
483
|
+
evalTokens = this._prependTokens.concat(tokens);
|
|
484
|
+
tokenPriority = this._prependTokenPriorities.concat(tokenPriority);
|
|
485
|
+
this._prependTokens = [];
|
|
486
|
+
this._prependTokenPriorities = [];
|
|
487
|
+
}
|
|
488
|
+
if (evalTokens.length === 0)
|
|
489
|
+
return;
|
|
490
|
+
// eslint-disable-next-line no-constant-condition
|
|
491
|
+
while (true) {
|
|
492
|
+
this._ensureNotDisposed();
|
|
493
|
+
// Evaluate to get the next token.
|
|
494
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, tokenPriority, (batchLogitIndex) => {
|
|
495
|
+
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
|
|
496
|
+
temperature,
|
|
497
|
+
topK,
|
|
498
|
+
topP,
|
|
499
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
500
|
+
repeatPenaltyTokens: repeatPenalty?.punishTokens instanceof Function
|
|
501
|
+
? repeatPenalty.punishTokens()
|
|
502
|
+
: repeatPenalty?.punishTokens,
|
|
503
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
504
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
505
|
+
grammarEvaluationState: grammarEvaluationState?._state
|
|
506
|
+
}));
|
|
507
|
+
});
|
|
508
|
+
if (nextToken == null)
|
|
509
|
+
return;
|
|
510
|
+
// the model finished generating text
|
|
511
|
+
if (nextToken === this._context.model.tokens.eos)
|
|
512
|
+
break;
|
|
513
|
+
yield nextToken;
|
|
514
|
+
// Create tokens for the next eval.
|
|
515
|
+
evalTokens = [nextToken];
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
/** @internal */
|
|
519
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenPriority, onDecodeDone) {
|
|
520
|
+
this._ensureNotDisposed();
|
|
521
|
+
const tokensLeftToDecode = tokens.slice();
|
|
522
|
+
return await withLock(this, "evaluate", async () => {
|
|
523
|
+
while (tokensLeftToDecode.length > 0) {
|
|
524
|
+
this._ensureNotDisposed();
|
|
525
|
+
let freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
526
|
+
if (freeSpace === 0) {
|
|
527
|
+
await this._freeUpSpaceForTokens();
|
|
528
|
+
freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
529
|
+
if (freeSpace === 0)
|
|
530
|
+
throw new Error("Failed to free up space for new tokens");
|
|
531
|
+
}
|
|
532
|
+
const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
|
|
533
|
+
const generateLogitAtTheEnd = generateLogit && tokensLeftToDecode.length === 0;
|
|
534
|
+
const nextToken = await this._context._decodeTokens({
|
|
535
|
+
sequenceId: this._sequenceId,
|
|
536
|
+
tokens: tokensToDecode,
|
|
537
|
+
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
538
|
+
generateLogitAtTheEnd,
|
|
539
|
+
evaluationPriority
|
|
540
|
+
}, !generateLogitAtTheEnd
|
|
541
|
+
? undefined
|
|
542
|
+
: onDecodeDone);
|
|
543
|
+
this._nextTokenIndex += tokensToDecode.length;
|
|
544
|
+
this._contextTokens = this._contextTokens.concat(tokensToDecode);
|
|
545
|
+
for (let i = 0; i < tokensToDecode.length; i++) {
|
|
546
|
+
this._contextTokenPriorities.push(tokenPriority[i] ?? tokenPriority[tokenPriority.length - 1] ?? 1);
|
|
547
|
+
}
|
|
548
|
+
if (generateLogitAtTheEnd && nextToken != null)
|
|
549
|
+
return nextToken;
|
|
550
|
+
}
|
|
551
|
+
return null;
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
/** @internal */
|
|
555
|
+
async _freeUpSpaceForTokens() {
|
|
556
|
+
this._ensureNotDisposed();
|
|
557
|
+
const size = Math.min(this._nextTokenIndex, Math.max(1, this._contextShift.size instanceof Function
|
|
558
|
+
? await this._contextShift.size(this)
|
|
559
|
+
: this._contextShift.size));
|
|
560
|
+
this._ensureNotDisposed();
|
|
561
|
+
if (this._contextShift.strategy === "eraseLowestTokenPriorityBeginning") {
|
|
562
|
+
let leftTokensToErase = Math.min(size, this._contextTokenPriorities.length);
|
|
563
|
+
let previousLowestPriority = null;
|
|
564
|
+
let indexesToErase = [];
|
|
565
|
+
while (leftTokensToErase > 0 && this._contextTokenPriorities.length > 0) {
|
|
566
|
+
let currentLowestPriorityIndexes = [];
|
|
567
|
+
let currentLowestPriorityFound = null;
|
|
568
|
+
for (let i = 0; leftTokensToErase > currentLowestPriorityIndexes.length && i < this._contextTokenPriorities.length; i++) {
|
|
569
|
+
const tokenPriority = this._contextTokenPriorities[i];
|
|
570
|
+
if (currentLowestPriorityFound == null || (tokenPriority < currentLowestPriorityFound && (previousLowestPriority == null || tokenPriority > previousLowestPriority))) {
|
|
571
|
+
currentLowestPriorityFound = tokenPriority;
|
|
572
|
+
currentLowestPriorityIndexes = [i];
|
|
573
|
+
}
|
|
574
|
+
else if (tokenPriority === currentLowestPriorityFound) {
|
|
575
|
+
currentLowestPriorityIndexes.push(i);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
previousLowestPriority = currentLowestPriorityFound;
|
|
579
|
+
indexesToErase = indexesToErase.concat(currentLowestPriorityIndexes);
|
|
580
|
+
leftTokensToErase -= currentLowestPriorityIndexes.length;
|
|
581
|
+
}
|
|
582
|
+
await this.eraseContextTokenRanges(indexesToErase.reduce((ranges, index) => {
|
|
583
|
+
if (ranges.length === 0)
|
|
584
|
+
return [{ start: index, end: index + 1 }];
|
|
585
|
+
const lastRange = ranges[ranges.length - 1];
|
|
586
|
+
if (lastRange.end >= index) {
|
|
587
|
+
lastRange.end = Math.max(lastRange.end, index + 1);
|
|
588
|
+
return ranges;
|
|
589
|
+
}
|
|
590
|
+
ranges.push({ start: index, end: index + 1 });
|
|
591
|
+
return ranges;
|
|
592
|
+
}, []));
|
|
593
|
+
}
|
|
594
|
+
else if (this._contextShift.strategy === "eraseBeginning") {
|
|
595
|
+
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
596
|
+
}
|
|
597
|
+
else {
|
|
598
|
+
const ranges = await this._contextShift.strategy({
|
|
599
|
+
sequence: this,
|
|
600
|
+
size
|
|
601
|
+
});
|
|
602
|
+
if (ranges == null)
|
|
603
|
+
throw new Error("Invalid delete ranges");
|
|
604
|
+
await this.eraseContextTokenRanges(ranges);
|
|
605
|
+
if (this.nextTokenIndex >= this._context.contextSize)
|
|
606
|
+
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
/** @internal */
|
|
610
|
+
_ensureNotDisposed() {
|
|
611
|
+
if (this._disposed)
|
|
612
|
+
throw new DisposedError();
|
|
613
|
+
}
|
|
614
|
+
/**
|
|
615
|
+
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
616
|
+
* @internal
|
|
617
|
+
*/
|
|
618
|
+
static _create({ sequenceId, context, prependBos = true, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
619
|
+
return new LlamaContextSequence({
|
|
620
|
+
sequenceId,
|
|
621
|
+
context,
|
|
622
|
+
prependBos,
|
|
623
|
+
contextShift: {
|
|
624
|
+
size: contextShiftSize,
|
|
625
|
+
strategy: contextShiftStrategy
|
|
626
|
+
}
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
function disposeContextIfReferenced(contextRef) {
|
|
631
|
+
const context = contextRef.deref();
|
|
632
|
+
if (context != null)
|
|
633
|
+
context.dispose();
|
|
634
|
+
}
|
|
635
|
+
function disposeContextSequenceIfReferenced(contextRef) {
|
|
636
|
+
const context = contextRef.deref();
|
|
637
|
+
if (context != null)
|
|
638
|
+
context.dispose();
|
|
639
|
+
}
|
|
640
|
+
//# sourceMappingURL=LlamaContext.js.map
|