node-llama-cpp 2.7.5 → 3.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +1 -1
  2. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -8
  3. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -8
  4. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +1 -1
  5. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +13 -0
  6. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +49 -0
  7. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -0
  8. package/dist/cli/cli.js +1 -1
  9. package/dist/cli/cli.js.map +1 -1
  10. package/dist/cli/commands/BuildCommand.d.ts +2 -2
  11. package/dist/cli/commands/BuildCommand.js +1 -1
  12. package/dist/cli/commands/BuildCommand.js.map +1 -1
  13. package/dist/cli/commands/ChatCommand.js +20 -10
  14. package/dist/cli/commands/ChatCommand.js.map +1 -1
  15. package/dist/cli/commands/ClearCommand.js +2 -1
  16. package/dist/cli/commands/ClearCommand.js.map +1 -1
  17. package/dist/cli/commands/DownloadCommand.d.ts +4 -5
  18. package/dist/cli/commands/DownloadCommand.js +3 -2
  19. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  20. package/dist/commands.d.ts +2 -1
  21. package/dist/commands.js +2 -1
  22. package/dist/commands.js.map +1 -1
  23. package/dist/config.d.ts +1 -0
  24. package/dist/config.js +1 -0
  25. package/dist/config.js.map +1 -1
  26. package/dist/index.d.ts +7 -4
  27. package/dist/index.js +6 -4
  28. package/dist/index.js.map +1 -1
  29. package/dist/llamaEvaluator/LlamaBins.d.ts +19 -4
  30. package/dist/llamaEvaluator/LlamaBins.js +3 -3
  31. package/dist/llamaEvaluator/LlamaChatSession.d.ts +24 -23
  32. package/dist/llamaEvaluator/LlamaChatSession.js +90 -36
  33. package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
  34. package/dist/llamaEvaluator/LlamaContext/LlamaContext.d.ts +112 -0
  35. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js +640 -0
  36. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +1 -0
  37. package/dist/llamaEvaluator/LlamaContext/types.d.ts +90 -0
  38. package/dist/llamaEvaluator/LlamaContext/types.js +2 -0
  39. package/dist/llamaEvaluator/LlamaContext/types.js.map +1 -0
  40. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +5 -0
  41. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +16 -0
  42. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  43. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +5 -0
  44. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +42 -0
  45. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  46. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +2 -0
  47. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +13 -0
  48. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  49. package/dist/llamaEvaluator/LlamaGrammar.d.ts +5 -5
  50. package/dist/llamaEvaluator/LlamaGrammar.js +7 -7
  51. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +6 -5
  52. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +8 -7
  53. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -1
  54. package/dist/llamaEvaluator/LlamaModel.d.ts +93 -112
  55. package/dist/llamaEvaluator/LlamaModel.js +294 -59
  56. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  57. package/dist/types.d.ts +3 -1
  58. package/dist/utils/ReplHistory.js +1 -1
  59. package/dist/utils/ReplHistory.js.map +1 -1
  60. package/dist/utils/cloneLlamaCppRepo.d.ts +1 -0
  61. package/dist/utils/cloneLlamaCppRepo.js +26 -1
  62. package/dist/utils/cloneLlamaCppRepo.js.map +1 -1
  63. package/dist/utils/getBin.d.ts +71 -39
  64. package/dist/utils/getBin.js.map +1 -1
  65. package/dist/utils/getBuildDefaults.d.ts +6 -0
  66. package/dist/utils/getBuildDefaults.js +10 -0
  67. package/dist/utils/getBuildDefaults.js.map +1 -0
  68. package/dist/utils/getReleaseInfo.d.ts +7 -0
  69. package/dist/utils/getReleaseInfo.js +30 -0
  70. package/dist/utils/getReleaseInfo.js.map +1 -0
  71. package/dist/utils/parseModelFileName.d.ts +9 -0
  72. package/dist/utils/parseModelFileName.js +68 -0
  73. package/dist/utils/parseModelFileName.js.map +1 -0
  74. package/dist/utils/parseModelTypeDescription.d.ts +6 -0
  75. package/dist/utils/parseModelTypeDescription.js +9 -0
  76. package/dist/utils/parseModelTypeDescription.js.map +1 -0
  77. package/llama/.clang-format +10 -9
  78. package/llama/addon.cpp +689 -356
  79. package/llama/binariesGithubRelease.json +1 -1
  80. package/llama/gitRelease.bundle +0 -0
  81. package/llama/grammars/README.md +2 -2
  82. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  83. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  84. package/llamaBins/linux-x64/llama-addon.node +0 -0
  85. package/llamaBins/mac-arm64/ggml-metal.metal +107 -1
  86. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  87. package/llamaBins/mac-x64/ggml-metal.metal +107 -1
  88. package/llamaBins/mac-x64/llama-addon.node +0 -0
  89. package/llamaBins/win-x64/llama-addon.exp +0 -0
  90. package/llamaBins/win-x64/llama-addon.lib +0 -0
  91. package/llamaBins/win-x64/llama-addon.node +0 -0
  92. package/package.json +13 -7
  93. package/dist/chatWrappers/createChatWrapperByBos.d.ts +0 -2
  94. package/dist/chatWrappers/createChatWrapperByBos.js +0 -14
  95. package/dist/chatWrappers/createChatWrapperByBos.js.map +0 -1
  96. package/dist/llamaEvaluator/LlamaContext.d.ts +0 -100
  97. package/dist/llamaEvaluator/LlamaContext.js +0 -141
  98. package/dist/llamaEvaluator/LlamaContext.js.map +0 -1
  99. package/dist/utils/withLock.d.ts +0 -1
  100. package/dist/utils/withLock.js +0 -19
  101. package/dist/utils/withLock.js.map +0 -1
@@ -0,0 +1,640 @@
1
+ import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
2
+ import { removeNullFields } from "../../utils/removeNullFields.js";
3
+ import { AddonContext } from "../LlamaBins.js";
4
+ import { resolveBatchItemsPrioritizingStrategy } from "./utils/resolveBatchItemsPrioritizingStrategy.js";
5
+ export class LlamaContext {
6
+ /** @internal */ _ctx;
7
+ /** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
8
+ /** @internal */ _model;
9
+ /** @internal */ _contextSize;
10
+ /** @internal */ _batchSize;
11
+ /** @internal */ _totalSequences;
12
+ /** @internal */ _unusedSequenceIds = [];
13
+ /** @internal */ _batchingOptions;
14
+ /** @internal */ _queuedDecodeSequenceIds = new Set();
15
+ /** @internal */ _queuedDecodes = [];
16
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
17
+ /** @internal */ _nextGeneratedSequenceId = 0;
18
+ /** @internal */ _dispatchDecodeScheduled = false;
19
+ /** @internal */ _batchDispatchPending = false;
20
+ /** @internal */ _currentDispatchBatchHandle = {};
21
+ /** @internal */ _allocatedContextSize;
22
+ /** @internal */ _disposed = false;
23
+ onDispose = new EventRelay();
24
+ /**
25
+ * @param options
26
+ */
27
+ constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, f16Kv, logitsAll, embedding, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {} }) {
28
+ if (model.disposed)
29
+ throw new DisposedError();
30
+ this._model = model;
31
+ this._totalSequences = Math.max(1, Math.floor(sequences));
32
+ this._contextSize = Math.max(2, contextSize);
33
+ this._batchSize = Math.max(batchSize, this._totalSequences);
34
+ this._ctx = new AddonContext(this._model._model, removeNullFields({
35
+ seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
36
+ contextSize: contextSize * this._totalSequences,
37
+ batchSize: this._batchSize,
38
+ f16Kv,
39
+ logitsAll,
40
+ embedding,
41
+ threads
42
+ }));
43
+ this._batchingOptions = {
44
+ dispatchSchedule: batchingDispatchSchedule,
45
+ itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy
46
+ };
47
+ this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
48
+ this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
49
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
50
+ this._disposeAggregator.add(() => {
51
+ this._ctx.dispose();
52
+ });
53
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
54
+ }
55
+ dispose() {
56
+ if (this._disposed)
57
+ return;
58
+ this._disposed = true;
59
+ this._disposeAggregator.dispose();
60
+ }
61
+ /** @hidden */
62
+ [Symbol.dispose]() {
63
+ return this.dispose();
64
+ }
65
+ get disposed() {
66
+ return this._disposed;
67
+ }
68
+ get model() {
69
+ return this._model;
70
+ }
71
+ get contextSize() {
72
+ return this._contextSize;
73
+ }
74
+ get batchSize() {
75
+ return this._batchSize;
76
+ }
77
+ getAllocatedContextSize() {
78
+ this._ensureNotDisposed();
79
+ if (this._allocatedContextSize == null)
80
+ this._allocatedContextSize = this._ctx.getContextSize();
81
+ return this._allocatedContextSize;
82
+ }
83
+ get totalSequences() {
84
+ return this._totalSequences;
85
+ }
86
+ get sequencesLeft() {
87
+ return this._totalSequences - this._nextGeneratedSequenceId + this._unusedSequenceIds.length;
88
+ }
89
+ /**
90
+ * Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
91
+ * When there are no sequences left, this method will throw an error.
92
+ * @param [options]
93
+ */
94
+ getSequence({ prependBos = true, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
95
+ this._ensureNotDisposed();
96
+ const nextSequenceId = this._popSequenceId();
97
+ if (nextSequenceId == null)
98
+ throw new Error("No sequences left");
99
+ return LlamaContextSequence._create({
100
+ sequenceId: nextSequenceId,
101
+ context: this,
102
+ prependBos,
103
+ contextShift: {
104
+ size: contextShiftSize,
105
+ strategy: contextShiftStrategy
106
+ }
107
+ });
108
+ }
109
+ dispatchPendingBatch() {
110
+ this._currentDispatchBatchHandle = {};
111
+ this._dispatchDecodeScheduled = false;
112
+ if (this._batchDispatchPending)
113
+ return;
114
+ this._batchDispatchPending = true;
115
+ void withLock(this, "context", async () => {
116
+ this._currentDispatchBatchHandle = {};
117
+ this._dispatchDecodeScheduled = false;
118
+ this._batchDispatchPending = false;
119
+ let prioritizeStrategy;
120
+ try {
121
+ this._ensureNotDisposed();
122
+ prioritizeStrategy = resolveBatchItemsPrioritizingStrategy(this._batchingOptions.itemsPrioritizingStrategy);
123
+ }
124
+ catch (err) {
125
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
126
+ return;
127
+ }
128
+ let shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
129
+ while (shouldHaveAnotherBatch) {
130
+ const batchItemToQueuedDecodeMap = new Map();
131
+ const batchItemsList = [];
132
+ for (const queuedDecode of this._queuedDecodes) {
133
+ const batchItem = {
134
+ tokens: queuedDecode.tokens,
135
+ evaluationPriority: queuedDecode.evaluationPriority
136
+ };
137
+ batchItemToQueuedDecodeMap.set(batchItem, queuedDecode);
138
+ batchItemsList.push(batchItem);
139
+ }
140
+ let prioritizedItems;
141
+ try {
142
+ prioritizedItems = prioritizeStrategy({
143
+ items: batchItemsList,
144
+ size: this._batchSize
145
+ });
146
+ }
147
+ catch (err) {
148
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
149
+ return;
150
+ }
151
+ let batchTokenSlotsLeft = this._batchSize;
152
+ const afterDecodeActions = [];
153
+ const queuedDecodesToDelete = new Set();
154
+ const currentQueuedDecodeItems = new Set();
155
+ const currentBatchItems = [];
156
+ let currentBatchSize = 0;
157
+ for (const prioritizedItem of prioritizedItems) {
158
+ const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
159
+ if (queuedDecode == null)
160
+ throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
161
+ "of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
162
+ const processAmount = Math.min(queuedDecode.tokens.length, prioritizedItem.processAmount, batchTokenSlotsLeft);
163
+ if (processAmount <= 0)
164
+ continue;
165
+ batchTokenSlotsLeft -= processAmount;
166
+ currentBatchItems.push({
167
+ queuedDecode,
168
+ processAmount
169
+ });
170
+ currentBatchSize += processAmount;
171
+ }
172
+ if (currentBatchSize !== 0)
173
+ this._ctx.initBatch(currentBatchSize);
174
+ for (const { queuedDecode, processAmount } of currentBatchItems) {
175
+ let batchLogitIndex;
176
+ try {
177
+ batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(queuedDecode.tokens.slice(0, processAmount)), queuedDecode.generateLogitAtTheEnd && processAmount === queuedDecode.tokens.length);
178
+ }
179
+ catch (err) {
180
+ this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
181
+ continue;
182
+ }
183
+ currentQueuedDecodeItems.add(queuedDecode);
184
+ if (queuedDecode.tokens.length === processAmount) {
185
+ queuedDecodesToDelete.add(queuedDecode);
186
+ afterDecodeActions.push({
187
+ batchLogitIndex,
188
+ response: queuedDecode.response,
189
+ onDone: queuedDecode.onDone
190
+ });
191
+ }
192
+ else {
193
+ queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
194
+ queuedDecode.firstTokenSequenceIndex += processAmount;
195
+ }
196
+ if (batchTokenSlotsLeft === 0)
197
+ break;
198
+ }
199
+ for (let i = 0; i < this._queuedDecodes.length; i++) {
200
+ const queuedDecode = this._queuedDecodes[i];
201
+ if (queuedDecodesToDelete.has(queuedDecode)) {
202
+ this._queuedDecodes.splice(i, 1);
203
+ this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
204
+ i--;
205
+ }
206
+ }
207
+ shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
208
+ try {
209
+ if (currentBatchSize !== 0)
210
+ await this._ctx.decodeBatch();
211
+ }
212
+ catch (err) {
213
+ this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
214
+ return;
215
+ }
216
+ for (const action of afterDecodeActions) {
217
+ const [accept, reject] = action.response;
218
+ if (action.onDone != null && action.batchLogitIndex != null) {
219
+ try {
220
+ accept(action.onDone(action.batchLogitIndex ?? null));
221
+ }
222
+ catch (err) {
223
+ reject(err);
224
+ }
225
+ }
226
+ accept(undefined);
227
+ }
228
+ }
229
+ });
230
+ }
231
+ /** @internal */
232
+ async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
233
+ return await new Promise((accept, reject) => {
234
+ this._queuedDecodes.push({
235
+ sequenceId,
236
+ tokens,
237
+ firstTokenSequenceIndex,
238
+ generateLogitAtTheEnd,
239
+ evaluationPriority,
240
+ response: [accept, reject],
241
+ onDone
242
+ });
243
+ this._queuedDecodeSequenceIds.add(sequenceId);
244
+ this._scheduleDecode();
245
+ });
246
+ }
247
+ /** @internal */
248
+ _reclaimUnusedSequenceId(sequenceId) {
249
+ if (this._disposed)
250
+ return;
251
+ void withLock(this, "context", async () => {
252
+ this._ctx.disposeSequence(sequenceId);
253
+ this._unusedSequenceIds.push(sequenceId);
254
+ this._onReclaimUnusedSequenceId.dispatchEvent();
255
+ });
256
+ }
257
+ /** @internal */
258
+ _popSequenceId() {
259
+ if (this._unusedSequenceIds.length > 0)
260
+ return this._unusedSequenceIds.shift();
261
+ if (this._nextGeneratedSequenceId < this._totalSequences) {
262
+ const sequenceId = this._nextGeneratedSequenceId;
263
+ this._nextGeneratedSequenceId++;
264
+ return sequenceId;
265
+ }
266
+ return null;
267
+ }
268
+ /** @internal */
269
+ _scheduleDecode() {
270
+ if (this._dispatchDecodeScheduled || this._batchDispatchPending)
271
+ return;
272
+ this._dispatchDecodeScheduled = true;
273
+ const currentPendingBatchHandle = this._currentDispatchBatchHandle;
274
+ const dispatch = () => {
275
+ if (this._currentDispatchBatchHandle !== currentPendingBatchHandle)
276
+ return;
277
+ this.dispatchPendingBatch();
278
+ };
279
+ const dispatchSchedule = this._batchingOptions.dispatchSchedule;
280
+ if (this._queuedDecodeSequenceIds.size === this._totalSequences)
281
+ dispatch();
282
+ if (dispatchSchedule === "nextTick")
283
+ setTimeout(dispatch, 0);
284
+ else
285
+ dispatchSchedule(dispatch);
286
+ }
287
+ /** @internal */
288
+ _dispatchErrorForQueuedDecodesAndDequeue(queuedDecodes, err) {
289
+ for (const pendingDecode of queuedDecodes) {
290
+ const [, reject] = pendingDecode.response;
291
+ reject(err);
292
+ }
293
+ for (let i = 0; i < this._queuedDecodes.length; i++) {
294
+ const item = this._queuedDecodes[i];
295
+ if (queuedDecodes.has(item)) {
296
+ this._queuedDecodes.splice(i, 1);
297
+ this._queuedDecodeSequenceIds.delete(item.sequenceId);
298
+ i--;
299
+ }
300
+ }
301
+ }
302
+ /** @internal */
303
+ _ensureNotDisposed() {
304
+ if (this._disposed)
305
+ throw new DisposedError();
306
+ }
307
+ }
308
+ export class LlamaContextSequence {
309
+ /** @internal */ _sequenceId;
310
+ /** @internal */ _gcRegistry;
311
+ /** @internal */ _context;
312
+ /** @internal */ _prependBos;
313
+ /** @internal */ _contextShift;
314
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
315
+ /** @internal */ _contextTokens = [];
316
+ /** @internal */ _contextTokenPriorities = [];
317
+ /** @internal */ _nextTokenIndex = 0;
318
+ /** @internal */ _prependTokens = [];
319
+ /** @internal */ _prependTokenPriorities = [];
320
+ /** @internal */ _disposed = false;
321
+ onDispose = new EventRelay();
322
+ constructor({ sequenceId, context, prependBos, contextShift }) {
323
+ this._sequenceId = sequenceId;
324
+ this._context = context;
325
+ this._contextShift = contextShift;
326
+ this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
327
+ this._prependTokens = [];
328
+ this._prependTokenPriorities = [];
329
+ if (prependBos && this._context.model.tokens.bos != null) {
330
+ this._prependTokens.unshift(this._context.model.tokens.bos);
331
+ this._prependTokenPriorities.unshift(1);
332
+ this._prependBos = true;
333
+ }
334
+ else
335
+ this._prependBos = false;
336
+ this._gcRegistry.register(this, sequenceId);
337
+ this._disposeAggregator.add(() => this._gcRegistry.unregister(this));
338
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
339
+ this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextSequenceIfReferenced.bind(null, new WeakRef(this))));
340
+ this._disposeAggregator.add(() => {
341
+ this._context._reclaimUnusedSequenceId(this._sequenceId);
342
+ });
343
+ }
344
+ dispose() {
345
+ if (this._disposed)
346
+ return;
347
+ this._disposeAggregator.dispose();
348
+ this._contextTokens.length = 0;
349
+ this._contextTokenPriorities.length = 0;
350
+ this._disposed = true;
351
+ }
352
+ /** @hidden */
353
+ [Symbol.dispose]() {
354
+ return this.dispose();
355
+ }
356
+ get disposed() {
357
+ return this._disposed;
358
+ }
359
+ get context() {
360
+ return this._context;
361
+ }
362
+ get model() {
363
+ return this._context.model;
364
+ }
365
+ get prependBos() {
366
+ return this._prependBos;
367
+ }
368
+ get nextTokenIndex() {
369
+ return this._nextTokenIndex;
370
+ }
371
+ get contextTokens() {
372
+ return this._contextTokens.slice();
373
+ }
374
+ get contextTokenPriorities() {
375
+ return this._contextTokenPriorities.slice();
376
+ }
377
+ /**
378
+ * Clear the history of the sequence.
379
+ * If `prependBos` was enabled, the BOS token will be prepended to the sequence again.
380
+ */
381
+ async clearHistory() {
382
+ this._ensureNotDisposed();
383
+ await this.eraseContextTokenRanges([{ start: 0, end: this._nextTokenIndex }]);
384
+ this._prependTokens.length = 0;
385
+ this._prependTokenPriorities.length = 0;
386
+ if (this._prependBos && this._context.model.tokens.bos != null) {
387
+ this._prependTokens.push(this._context.model.tokens.bos);
388
+ this._prependTokenPriorities.push(1);
389
+ }
390
+ }
391
+ /**
392
+ * Erase context tokens in the provided ranges to free up space for new tokens to be generated.
393
+ * the start and end of each range are exclusive.
394
+ * For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
395
+ */
396
+ async eraseContextTokenRanges(ranges) {
397
+ this._ensureNotDisposed();
398
+ await withLock(this._context, "context", async () => {
399
+ this._ensureNotDisposed();
400
+ if (ranges.length === 0)
401
+ return;
402
+ const resolvedRanges = ranges
403
+ .map(({ start, end }) => {
404
+ if (start === end)
405
+ return null;
406
+ if (start > end)
407
+ [start, end] = [end, start];
408
+ if (end > this._nextTokenIndex)
409
+ end = this._nextTokenIndex;
410
+ if (start >= this._nextTokenIndex)
411
+ return null;
412
+ return { start, end };
413
+ })
414
+ .filter((range) => range != null)
415
+ .sort((a, b) => a.start - b.start)
416
+ .reduce((ranges, range) => {
417
+ if (ranges.length === 0)
418
+ return [range];
419
+ const lastRange = ranges[ranges.length - 1];
420
+ if (lastRange.end >= range.start) {
421
+ lastRange.end = Math.max(lastRange.end, range.end);
422
+ return ranges;
423
+ }
424
+ ranges.push(range);
425
+ return ranges;
426
+ }, []);
427
+ let removedTokens = 0;
428
+ let lastDeleteRangeEndPos = null;
429
+ for (const range of resolvedRanges) {
430
+ this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
431
+ this._contextTokenPriorities.splice(range.start - removedTokens, range.end - range.start);
432
+ this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
433
+ if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
434
+ this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
435
+ removedTokens += range.end - range.start;
436
+ lastDeleteRangeEndPos = range.end;
437
+ }
438
+ if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
439
+ this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
440
+ this._nextTokenIndex -= removedTokens;
441
+ });
442
+ }
443
+ /**
444
+ * @param tokens
445
+ * @param [options]
446
+ */
447
+ evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, tokenPriority = 1 } = {}) {
448
+ return this._evaluate(tokens, {
449
+ temperature,
450
+ topK,
451
+ topP,
452
+ grammarEvaluationState,
453
+ repeatPenalty,
454
+ evaluationPriority,
455
+ tokenPriority
456
+ });
457
+ }
458
+ /**
459
+ * Evaluate the provided tokens into the context sequence without generating new tokens.
460
+ * @param tokens
461
+ * @param [options]
462
+ */
463
+ async evaluateWithoutGeneratingNewTokens(tokens, { evaluationPriority = 5, tokenPriority = 1 } = {}) {
464
+ const iterator = this._evaluate(tokens, {
465
+ generateNewTokens: false,
466
+ evaluationPriority,
467
+ tokenPriority
468
+ });
469
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
470
+ for await (const token of iterator) {
471
+ // Array.from doesn't work with async generators, so we have to iterate over the generator
472
+ }
473
+ }
474
+ /** @internal */
475
+ async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, tokenPriority = 1, generateNewTokens = true } = {}) {
476
+ this._ensureNotDisposed();
477
+ if (!(tokenPriority instanceof Array))
478
+ tokenPriority = Array(tokens.length).fill(tokenPriority);
479
+ let evalTokens = tokens;
480
+ if (generateNewTokens && tokens.length === 0)
481
+ return;
482
+ if (this._prependTokens.length > 0) {
483
+ evalTokens = this._prependTokens.concat(tokens);
484
+ tokenPriority = this._prependTokenPriorities.concat(tokenPriority);
485
+ this._prependTokens = [];
486
+ this._prependTokenPriorities = [];
487
+ }
488
+ if (evalTokens.length === 0)
489
+ return;
490
+ // eslint-disable-next-line no-constant-condition
491
+ while (true) {
492
+ this._ensureNotDisposed();
493
+ // Evaluate to get the next token.
494
+ const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, tokenPriority, (batchLogitIndex) => {
495
+ return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
496
+ temperature,
497
+ topK,
498
+ topP,
499
+ repeatPenalty: repeatPenalty?.penalty,
500
+ repeatPenaltyTokens: repeatPenalty?.punishTokens instanceof Function
501
+ ? repeatPenalty.punishTokens()
502
+ : repeatPenalty?.punishTokens,
503
+ repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
504
+ repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
505
+ grammarEvaluationState: grammarEvaluationState?._state
506
+ }));
507
+ });
508
+ if (nextToken == null)
509
+ return;
510
+ // the model finished generating text
511
+ if (nextToken === this._context.model.tokens.eos)
512
+ break;
513
+ yield nextToken;
514
+ // Create tokens for the next eval.
515
+ evalTokens = [nextToken];
516
+ }
517
+ }
518
+ /** @internal */
519
+ async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenPriority, onDecodeDone) {
520
+ this._ensureNotDisposed();
521
+ const tokensLeftToDecode = tokens.slice();
522
+ return await withLock(this, "evaluate", async () => {
523
+ while (tokensLeftToDecode.length > 0) {
524
+ this._ensureNotDisposed();
525
+ let freeSpace = this._context.contextSize - this._nextTokenIndex;
526
+ if (freeSpace === 0) {
527
+ await this._freeUpSpaceForTokens();
528
+ freeSpace = this._context.contextSize - this._nextTokenIndex;
529
+ if (freeSpace === 0)
530
+ throw new Error("Failed to free up space for new tokens");
531
+ }
532
+ const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
533
+ const generateLogitAtTheEnd = generateLogit && tokensLeftToDecode.length === 0;
534
+ const nextToken = await this._context._decodeTokens({
535
+ sequenceId: this._sequenceId,
536
+ tokens: tokensToDecode,
537
+ firstTokenSequenceIndex: this._nextTokenIndex,
538
+ generateLogitAtTheEnd,
539
+ evaluationPriority
540
+ }, !generateLogitAtTheEnd
541
+ ? undefined
542
+ : onDecodeDone);
543
+ this._nextTokenIndex += tokensToDecode.length;
544
+ this._contextTokens = this._contextTokens.concat(tokensToDecode);
545
+ for (let i = 0; i < tokensToDecode.length; i++) {
546
+ this._contextTokenPriorities.push(tokenPriority[i] ?? tokenPriority[tokenPriority.length - 1] ?? 1);
547
+ }
548
+ if (generateLogitAtTheEnd && nextToken != null)
549
+ return nextToken;
550
+ }
551
+ return null;
552
+ });
553
+ }
554
+ /** @internal */
555
+ async _freeUpSpaceForTokens() {
556
+ this._ensureNotDisposed();
557
+ const size = Math.min(this._nextTokenIndex, Math.max(1, this._contextShift.size instanceof Function
558
+ ? await this._contextShift.size(this)
559
+ : this._contextShift.size));
560
+ this._ensureNotDisposed();
561
+ if (this._contextShift.strategy === "eraseLowestTokenPriorityBeginning") {
562
+ let leftTokensToErase = Math.min(size, this._contextTokenPriorities.length);
563
+ let previousLowestPriority = null;
564
+ let indexesToErase = [];
565
+ while (leftTokensToErase > 0 && this._contextTokenPriorities.length > 0) {
566
+ let currentLowestPriorityIndexes = [];
567
+ let currentLowestPriorityFound = null;
568
+ for (let i = 0; leftTokensToErase > currentLowestPriorityIndexes.length && i < this._contextTokenPriorities.length; i++) {
569
+ const tokenPriority = this._contextTokenPriorities[i];
570
+ if (currentLowestPriorityFound == null || (tokenPriority < currentLowestPriorityFound && (previousLowestPriority == null || tokenPriority > previousLowestPriority))) {
571
+ currentLowestPriorityFound = tokenPriority;
572
+ currentLowestPriorityIndexes = [i];
573
+ }
574
+ else if (tokenPriority === currentLowestPriorityFound) {
575
+ currentLowestPriorityIndexes.push(i);
576
+ }
577
+ }
578
+ previousLowestPriority = currentLowestPriorityFound;
579
+ indexesToErase = indexesToErase.concat(currentLowestPriorityIndexes);
580
+ leftTokensToErase -= currentLowestPriorityIndexes.length;
581
+ }
582
+ await this.eraseContextTokenRanges(indexesToErase.reduce((ranges, index) => {
583
+ if (ranges.length === 0)
584
+ return [{ start: index, end: index + 1 }];
585
+ const lastRange = ranges[ranges.length - 1];
586
+ if (lastRange.end >= index) {
587
+ lastRange.end = Math.max(lastRange.end, index + 1);
588
+ return ranges;
589
+ }
590
+ ranges.push({ start: index, end: index + 1 });
591
+ return ranges;
592
+ }, []));
593
+ }
594
+ else if (this._contextShift.strategy === "eraseBeginning") {
595
+ await this.eraseContextTokenRanges([{ start: 0, end: size }]);
596
+ }
597
+ else {
598
+ const ranges = await this._contextShift.strategy({
599
+ sequence: this,
600
+ size
601
+ });
602
+ if (ranges == null)
603
+ throw new Error("Invalid delete ranges");
604
+ await this.eraseContextTokenRanges(ranges);
605
+ if (this.nextTokenIndex >= this._context.contextSize)
606
+ await this.eraseContextTokenRanges([{ start: 0, end: size }]);
607
+ }
608
+ }
609
+ /** @internal */
610
+ _ensureNotDisposed() {
611
+ if (this._disposed)
612
+ throw new DisposedError();
613
+ }
614
+ /**
615
+ * We need this to make it impossible to manually create instances of this class outside the code of this library
616
+ * @internal
617
+ */
618
+ static _create({ sequenceId, context, prependBos = true, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
619
+ return new LlamaContextSequence({
620
+ sequenceId,
621
+ context,
622
+ prependBos,
623
+ contextShift: {
624
+ size: contextShiftSize,
625
+ strategy: contextShiftStrategy
626
+ }
627
+ });
628
+ }
629
+ }
630
+ function disposeContextIfReferenced(contextRef) {
631
+ const context = contextRef.deref();
632
+ if (context != null)
633
+ context.dispose();
634
+ }
635
+ function disposeContextSequenceIfReferenced(contextRef) {
636
+ const context = contextRef.deref();
637
+ if (context != null)
638
+ context.dispose();
639
+ }
640
+ //# sourceMappingURL=LlamaContext.js.map