node-llama-cpp 3.17.1 → 3.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bindings/AddonTypes.d.ts +11 -0
- package/dist/bindings/Llama.js +20 -2
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +1 -1
- package/dist/bindings/getLlama.js +19 -8
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +2 -1
- package/dist/bindings/utils/compileLLamaCpp.js +8 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getLlamaGpuTypes.js +2 -0
- package/dist/bindings/utils/getLlamaGpuTypes.js.map +1 -1
- package/dist/chatWrappers/QwenChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/QwenChatWrapper.js +176 -56
- package/dist/chatWrappers/QwenChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +127 -88
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +16 -10
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +115 -5
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -1
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +1 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +1 -1
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +51 -4
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/utils/resolveNpmrcConfig.d.ts +18 -0
- package/dist/cli/utils/resolveNpmrcConfig.js +129 -0
- package/dist/cli/utils/resolveNpmrcConfig.js.map +1 -0
- package/dist/config.d.ts +3 -0
- package/dist/config.js +4 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +8 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +99 -6
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +8 -2
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +8 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +88 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +181 -17
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.d.ts +27 -0
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js +130 -0
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +3 -0
- package/dist/gguf/insights/GgufInsights.js +221 -43
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +15 -1
- package/dist/gguf/types/GgufMetadataTypes.js +4 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/utils/getFirstWritableDir.d.ts +8 -0
- package/dist/utils/getFirstWritableDir.js +60 -0
- package/dist/utils/getFirstWritableDir.js.map +1 -0
- package/dist/utils/getTempDir.d.ts +10 -0
- package/dist/utils/getTempDir.js +121 -0
- package/dist/utils/getTempDir.js.map +1 -0
- package/dist/utils/resolveModelFile.js +19 -8
- package/dist/utils/resolveModelFile.js.map +1 -1
- package/llama/addon/AddonContext.cpp +168 -0
- package/llama/addon/AddonContext.h +27 -0
- package/llama/addon/addon.cpp +1 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +24 -24
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
|
+
import { internalCheckpoints } from "../LlamaContext/LlamaContext.js";
|
|
2
3
|
import { isChatModelResponseFunctionCall, isChatModelResponseSegment, allSegmentTypes } from "../../types.js";
|
|
3
4
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
4
5
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
@@ -15,6 +16,7 @@ import { LlamaSampler } from "../LlamaContext/LlamaSampler.js";
|
|
|
15
16
|
import { getChatWrapperSegmentDefinition } from "../../utils/getChatWrapperSegmentDefinition.js";
|
|
16
17
|
import { jsonDumps } from "../../chatWrappers/utils/jsonDumps.js";
|
|
17
18
|
import { defaultMaxPreloadTokens } from "../LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js";
|
|
19
|
+
import { LlamaLogLevel } from "../../bindings/types.js";
|
|
18
20
|
import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
|
|
19
21
|
import { FunctionCallNameGrammar } from "./utils/FunctionCallNameGrammar.js";
|
|
20
22
|
import { FunctionCallParamsGrammar } from "./utils/FunctionCallParamsGrammar.js";
|
|
@@ -26,6 +28,9 @@ const defaultContextShiftOptions = {
|
|
|
26
28
|
const defaultRepeatPenaltyLastTokens = 64;
|
|
27
29
|
const defaultTrimWhitespaceSuffix = false;
|
|
28
30
|
const defaultEvaluationPriority = 5;
|
|
31
|
+
const defaultSegmentBudgetSize = (contextSize) => (contextSize < 8192
|
|
32
|
+
? contextSize * 0.5
|
|
33
|
+
: contextSize * 0.75);
|
|
29
34
|
export class LlamaChat {
|
|
30
35
|
/** @internal */ _chatWrapper;
|
|
31
36
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
@@ -118,7 +123,9 @@ export class LlamaChat {
|
|
|
118
123
|
if (generateResponseState.grammar != null && generateResponseState.functionsEnabled && !abortOnNonText)
|
|
119
124
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
120
125
|
return await withLock([this._chatLock, "evaluate"], signal, async () => {
|
|
126
|
+
let hadError = false;
|
|
121
127
|
try {
|
|
128
|
+
let tookInitialCheckpoint = false;
|
|
122
129
|
generateResponseState.ensureLastHistoryItemIsModel();
|
|
123
130
|
generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
|
|
124
131
|
const loadContextWindow = async (avoidReloadingHistory = false) => {
|
|
@@ -156,6 +163,10 @@ export class LlamaChat {
|
|
|
156
163
|
await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
|
|
157
164
|
await generateResponseState.createNewEvaluationIterator();
|
|
158
165
|
while (await generateResponseState.iterateEvaluation()) {
|
|
166
|
+
if (!tookInitialCheckpoint && this.sequence.needsCheckpoints) {
|
|
167
|
+
await this.sequence.takeCheckpoint();
|
|
168
|
+
tookInitialCheckpoint = true;
|
|
169
|
+
}
|
|
159
170
|
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
160
171
|
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
161
172
|
generateResponseState.detectAndHandleFunctionStartSyntax();
|
|
@@ -166,7 +177,11 @@ export class LlamaChat {
|
|
|
166
177
|
if (functionsCallsRes != null)
|
|
167
178
|
return functionsCallsRes;
|
|
168
179
|
}
|
|
169
|
-
|
|
180
|
+
{
|
|
181
|
+
const resPromise = generateResponseState.recordStopGenerationEvaluation();
|
|
182
|
+
if (resPromise instanceof Promise)
|
|
183
|
+
await resPromise;
|
|
184
|
+
}
|
|
170
185
|
generateResponseState.popStreamRegulatorFreeTokens();
|
|
171
186
|
generateResponseState.removeFoundStartIgnoreTextsFromPendingTokens();
|
|
172
187
|
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("model");
|
|
@@ -198,8 +213,14 @@ export class LlamaChat {
|
|
|
198
213
|
}
|
|
199
214
|
throw new Error("The context size is too small to generate a response");
|
|
200
215
|
}
|
|
216
|
+
catch (err) {
|
|
217
|
+
hadError = true;
|
|
218
|
+
throw err;
|
|
219
|
+
}
|
|
201
220
|
finally {
|
|
202
221
|
await generateResponseState.dispose();
|
|
222
|
+
if (!hadError && this.sequence.needsCheckpoints)
|
|
223
|
+
void this.sequence.takeCheckpoint();
|
|
203
224
|
}
|
|
204
225
|
});
|
|
205
226
|
}
|
|
@@ -247,6 +268,7 @@ export class LlamaChat {
|
|
|
247
268
|
});
|
|
248
269
|
return await withLock([this._chatLock, "evaluate"], signal, async () => {
|
|
249
270
|
try {
|
|
271
|
+
let tookInitialCheckpoint = false;
|
|
250
272
|
generateResponseState.ensureLastHistoryItemIsUser();
|
|
251
273
|
while (true) {
|
|
252
274
|
generateResponseState.startTokenLoop();
|
|
@@ -279,9 +301,17 @@ export class LlamaChat {
|
|
|
279
301
|
}
|
|
280
302
|
await generateResponseState.createNewEvaluationIterator();
|
|
281
303
|
while (await generateResponseState.iterateEvaluation()) {
|
|
304
|
+
if (!tookInitialCheckpoint && this.sequence.needsCheckpoints) {
|
|
305
|
+
await this.sequence.takeCheckpoint();
|
|
306
|
+
tookInitialCheckpoint = true;
|
|
307
|
+
}
|
|
282
308
|
if (!generateResponseState.holdPartialTokensForNextEvaluation()) {
|
|
283
309
|
generateResponseState.waitOnPartialCharactersOrWhiteSpaceTokens();
|
|
284
|
-
|
|
310
|
+
{
|
|
311
|
+
const resPromise = generateResponseState.recordStopGenerationEvaluation();
|
|
312
|
+
if (resPromise instanceof Promise)
|
|
313
|
+
await resPromise;
|
|
314
|
+
}
|
|
285
315
|
generateResponseState.popStreamRegulatorFreeTokens();
|
|
286
316
|
const someOfCurrentTokensAreSpecial = generateResponseState.currentTokens.some((token) => (this.model.isSpecialToken(token)));
|
|
287
317
|
const stopGenerationTriggerRes = generateResponseState.handleStopGenerationTrigger("user", someOfCurrentTokensAreSpecial
|
|
@@ -792,6 +822,7 @@ class GenerateResponseState {
|
|
|
792
822
|
userTextSuffix = undefined;
|
|
793
823
|
prefixTriggerDetectors = new Map();
|
|
794
824
|
noPrefixTrigger = undefined;
|
|
825
|
+
responsePrefix = undefined;
|
|
795
826
|
rerenderTriggers = [];
|
|
796
827
|
rerenderTriggerDetector = new StopGenerationDetector();
|
|
797
828
|
rerenderActions = undefined;
|
|
@@ -1148,6 +1179,9 @@ class GenerateResponseState {
|
|
|
1148
1179
|
!this.segmentHandler.isSegmentTypeOpen(trigger.segmentType) &&
|
|
1149
1180
|
this.segmentHandler.getSegmentTokensCount(trigger.segmentType) >= segmentBudget)
|
|
1150
1181
|
continue;
|
|
1182
|
+
if (this.responsePrefix == null && trigger.type === "response" && trigger.triggers.length > 0 &&
|
|
1183
|
+
(trigger.triggers[0]?.values?.length ?? 0) > 0)
|
|
1184
|
+
this.responsePrefix = LlamaText([trigger.triggers[0] ?? "", trigger.inject ?? ""]);
|
|
1151
1185
|
const prefixDetector = new StopGenerationDetector();
|
|
1152
1186
|
StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
|
|
1153
1187
|
.forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
|
|
@@ -1172,6 +1206,8 @@ class GenerateResponseState {
|
|
|
1172
1206
|
!this.segmentHandler.isSegmentTypeOpen(noPrefixTrigger.segmentType) &&
|
|
1173
1207
|
this.segmentHandler.getSegmentTokensCount(noPrefixTrigger.segmentType) >= noPrefixTriggerSegmentBudget)
|
|
1174
1208
|
this.noPrefixTrigger = undefined;
|
|
1209
|
+
else if (noPrefixTrigger?.type === "response")
|
|
1210
|
+
this.responsePrefix = noPrefixTrigger.inject;
|
|
1175
1211
|
this.rerenderTriggers = rerender?.triggers ?? [];
|
|
1176
1212
|
this.rerenderTriggerDetector.clearInProgressStops();
|
|
1177
1213
|
this.rerenderTriggerDetector.clearTriggeredStops();
|
|
@@ -1252,6 +1288,11 @@ class GenerateResponseState {
|
|
|
1252
1288
|
if (alignStateTokens)
|
|
1253
1289
|
await reloadTokens();
|
|
1254
1290
|
};
|
|
1291
|
+
if (this.grammar != null) {
|
|
1292
|
+
if (this.responsePrefix != null)
|
|
1293
|
+
await injectTokens(this.responsePrefix, true);
|
|
1294
|
+
return undefined;
|
|
1295
|
+
}
|
|
1255
1296
|
if (this.prefixTriggerDetectors.size === 0) {
|
|
1256
1297
|
if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
|
|
1257
1298
|
this.shouldAbortBecauseOfNonText = true;
|
|
@@ -1275,7 +1316,12 @@ class GenerateResponseState {
|
|
|
1275
1316
|
const generatedTokens = [];
|
|
1276
1317
|
let isFirstToken = true;
|
|
1277
1318
|
let continueGeneration = true;
|
|
1319
|
+
let tookInitialCheckpoint = false;
|
|
1278
1320
|
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1321
|
+
if (!tookInitialCheckpoint && this.llamaChat.sequence.needsCheckpoints) {
|
|
1322
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatSequenceStart.name, internalCheckpoints.chatSequenceStart.maxCheckpoints);
|
|
1323
|
+
tookInitialCheckpoint = true;
|
|
1324
|
+
}
|
|
1279
1325
|
pushAll(generatedTokens, tokens);
|
|
1280
1326
|
for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
|
|
1281
1327
|
triggerDetector.recordGeneration({
|
|
@@ -1420,7 +1466,12 @@ class GenerateResponseState {
|
|
|
1420
1466
|
pushAll(prefixDetectorRecordedTokens, tokens);
|
|
1421
1467
|
}
|
|
1422
1468
|
}
|
|
1469
|
+
let tookInitialCheckpoint = false;
|
|
1423
1470
|
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1471
|
+
if (!tookInitialCheckpoint && this.llamaChat.sequence.needsCheckpoints) {
|
|
1472
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatSequenceStart.name, internalCheckpoints.chatSequenceStart.maxCheckpoints);
|
|
1473
|
+
tookInitialCheckpoint = true;
|
|
1474
|
+
}
|
|
1424
1475
|
const stopGenerationTriggerRes = this.handleStopGenerationTrigger("model");
|
|
1425
1476
|
if (stopGenerationTriggerRes != null)
|
|
1426
1477
|
return stopGenerationTriggerRes;
|
|
@@ -1463,7 +1514,11 @@ class GenerateResponseState {
|
|
|
1463
1514
|
tokens: this.currentTokens,
|
|
1464
1515
|
text: this.currentText
|
|
1465
1516
|
});
|
|
1466
|
-
|
|
1517
|
+
{
|
|
1518
|
+
const resPromise = this.recordStopGenerationEvaluation();
|
|
1519
|
+
if (resPromise instanceof Promise)
|
|
1520
|
+
await resPromise;
|
|
1521
|
+
}
|
|
1467
1522
|
}
|
|
1468
1523
|
this.currentFunctionCallCurrentPartTokens.length = 0;
|
|
1469
1524
|
this.functionEvaluationMode = false;
|
|
@@ -1515,7 +1570,12 @@ class GenerateResponseState {
|
|
|
1515
1570
|
}
|
|
1516
1571
|
}
|
|
1517
1572
|
}
|
|
1573
|
+
let tookInitialCheckpoint = false;
|
|
1518
1574
|
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1575
|
+
if (!tookInitialCheckpoint && this.llamaChat.sequence.needsCheckpoints) {
|
|
1576
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatSequenceStart.name, internalCheckpoints.chatSequenceStart.maxCheckpoints);
|
|
1577
|
+
tookInitialCheckpoint = true;
|
|
1578
|
+
}
|
|
1519
1579
|
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1520
1580
|
functionNameGenerationDoneDetector.recordGeneration({
|
|
1521
1581
|
text: this.currentText,
|
|
@@ -1578,11 +1638,20 @@ class GenerateResponseState {
|
|
|
1578
1638
|
paramsChunk: this.llamaChat.model.detokenize(this.currentFunctionCallCurrentPartTokens, false, lastPartTokens),
|
|
1579
1639
|
done: false
|
|
1580
1640
|
});
|
|
1641
|
+
let tookInitialCheckpoint = false;
|
|
1581
1642
|
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1643
|
+
if (!tookInitialCheckpoint && this.llamaChat.sequence.needsCheckpoints) {
|
|
1644
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatSequenceStart.name, internalCheckpoints.chatSequenceStart.maxCheckpoints);
|
|
1645
|
+
tookInitialCheckpoint = true;
|
|
1646
|
+
}
|
|
1647
|
+
const hadInProgressTriggers = functionParamsGenerationDoneDetector.hasInProgressStops;
|
|
1582
1648
|
functionParamsGenerationDoneDetector.recordGeneration({
|
|
1583
1649
|
text: this.currentText,
|
|
1584
1650
|
tokens: this.currentTokens
|
|
1585
1651
|
});
|
|
1652
|
+
if (!hadInProgressTriggers && functionParamsGenerationDoneDetector.hasInProgressStops &&
|
|
1653
|
+
this.llamaChat.sequence.needsCheckpoints)
|
|
1654
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatGrammarEnd.name, internalCheckpoints.chatGrammarEnd.maxCheckpoints);
|
|
1586
1655
|
this.onFunctionCallParamsChunk?.({
|
|
1587
1656
|
callIndex: this.resFunctionCalls.length,
|
|
1588
1657
|
functionName: this.functionEvaluationFunctionName,
|
|
@@ -1646,7 +1715,12 @@ class GenerateResponseState {
|
|
|
1646
1715
|
LlamaText(new SpecialToken("EOT"))
|
|
1647
1716
|
], this.llamaChat.model.tokenizer)
|
|
1648
1717
|
.map((stopTrigger) => sectionSuffixDetector.addStopTrigger(stopTrigger));
|
|
1718
|
+
let tookInitialCheckpoint = false;
|
|
1649
1719
|
for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
|
|
1720
|
+
if (!tookInitialCheckpoint && this.llamaChat.sequence.needsCheckpoints) {
|
|
1721
|
+
await this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatSequenceStart.name, internalCheckpoints.chatSequenceStart.maxCheckpoints);
|
|
1722
|
+
tookInitialCheckpoint = true;
|
|
1723
|
+
}
|
|
1650
1724
|
pushAll(this.currentFunctionCallCurrentPartTokens, tokens);
|
|
1651
1725
|
sectionSuffixDetector.recordGeneration({
|
|
1652
1726
|
text: this.currentText,
|
|
@@ -1772,6 +1846,19 @@ class GenerateResponseState {
|
|
|
1772
1846
|
}));
|
|
1773
1847
|
}
|
|
1774
1848
|
async createNewEvaluationIterator() {
|
|
1849
|
+
if (this.tokens.length === 0) {
|
|
1850
|
+
if (this.evaluationIterator != null)
|
|
1851
|
+
return;
|
|
1852
|
+
const token = this.llamaChat.sequence.contextTokens.at(-1);
|
|
1853
|
+
if (token == null)
|
|
1854
|
+
throw new Error("No tokens to evaluate");
|
|
1855
|
+
this.llamaChat.sequence.model._llama._log(LlamaLogLevel.warn, "Attempted to evaluate with no input, reevaluating the last context sequence token");
|
|
1856
|
+
await this.llamaChat.sequence.eraseContextTokenRanges([{
|
|
1857
|
+
start: this.llamaChat.sequence.contextTokens.length - 1,
|
|
1858
|
+
end: this.llamaChat.sequence.contextTokens.length
|
|
1859
|
+
}]);
|
|
1860
|
+
this.tokens = [token];
|
|
1861
|
+
}
|
|
1775
1862
|
if (this.evaluationIterator != null)
|
|
1776
1863
|
await this.evaluationIterator.return();
|
|
1777
1864
|
this.currentIterationReplacementToken = undefined;
|
|
@@ -1881,6 +1968,7 @@ class GenerateResponseState {
|
|
|
1881
1968
|
}
|
|
1882
1969
|
}
|
|
1883
1970
|
recordStopGenerationEvaluation() {
|
|
1971
|
+
const hadInProgressStopTrigger = this.stopGenerationDetector.hasInProgressStops;
|
|
1884
1972
|
this.rerenderTriggerDetector.recordGeneration({
|
|
1885
1973
|
text: this.currentText,
|
|
1886
1974
|
tokens: this.currentTokens,
|
|
@@ -1898,6 +1986,9 @@ class GenerateResponseState {
|
|
|
1898
1986
|
});
|
|
1899
1987
|
if (this.llamaChat.model.isEogToken(this.currentToken))
|
|
1900
1988
|
this.currentQueuedTokenRelease?.createTokenIndexLock(0);
|
|
1989
|
+
if (this.grammar != null && !hadInProgressStopTrigger && this.stopGenerationDetector.hasInProgressStops &&
|
|
1990
|
+
this.llamaChat.sequence.needsCheckpoints)
|
|
1991
|
+
return this.llamaChat.sequence._takeNamedCheckpoint(internalCheckpoints.chatGrammarEnd.name, internalCheckpoints.chatGrammarEnd.maxCheckpoints);
|
|
1901
1992
|
}
|
|
1902
1993
|
popStreamRegulatorFreeTokens() {
|
|
1903
1994
|
pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
|
|
@@ -2020,9 +2111,11 @@ class GenerateResponseState {
|
|
|
2020
2111
|
return shouldReloadEvaluationState;
|
|
2021
2112
|
}
|
|
2022
2113
|
getSegmentBudget(segmentType) {
|
|
2023
|
-
const getBudget = (budget) => (
|
|
2024
|
-
?
|
|
2025
|
-
: budget
|
|
2114
|
+
const getBudget = (budget) => (budget == null
|
|
2115
|
+
? Math.ceil(defaultSegmentBudgetSize(this.llamaChat.sequence.contextSize))
|
|
2116
|
+
: budget === Infinity
|
|
2117
|
+
? null
|
|
2118
|
+
: budget);
|
|
2026
2119
|
if (this.budgets == null)
|
|
2027
2120
|
return null;
|
|
2028
2121
|
if (segmentType === "thought")
|