node-llama-cpp 3.12.0 → 3.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/bindings/Llama.js +16 -6
  2. package/dist/bindings/Llama.js.map +1 -1
  3. package/dist/bindings/getLlama.js +6 -2
  4. package/dist/bindings/getLlama.js.map +1 -1
  5. package/dist/bindings/types.d.ts +11 -2
  6. package/dist/bindings/types.js +11 -2
  7. package/dist/bindings/types.js.map +1 -1
  8. package/dist/bindings/utils/clearAllLocalBuilds.js +1 -1
  9. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
  10. package/dist/bindings/utils/compileLLamaCpp.d.ts +1 -0
  11. package/dist/bindings/utils/compileLLamaCpp.js +49 -12
  12. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
  13. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +4 -1
  14. package/dist/bindings/utils/detectAvailableComputeLayers.js +12 -6
  15. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
  16. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +1 -0
  17. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +19 -7
  18. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
  19. package/dist/bindings/utils/testBindingBinary.d.ts +1 -1
  20. package/dist/bindings/utils/testBindingBinary.js +13 -5
  21. package/dist/bindings/utils/testBindingBinary.js.map +1 -1
  22. package/dist/chatWrappers/HarmonyChatWrapper.js +27 -15
  23. package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -1
  24. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +8 -2
  25. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  26. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +8 -4
  27. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
  28. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +14 -0
  29. package/dist/evaluator/LlamaChat/LlamaChat.js +126 -20
  30. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  31. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +53 -2
  32. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +162 -47
  33. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  34. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
  35. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  36. package/dist/evaluator/LlamaContext/LlamaContext.js +1 -1
  37. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  38. package/dist/tsconfig.tsbuildinfo +1 -1
  39. package/dist/types.d.ts +6 -1
  40. package/dist/types.js +2 -1
  41. package/dist/types.js.map +1 -1
  42. package/llama/CMakeLists.txt +20 -0
  43. package/llama/addon/globals/getGpuInfo.cpp +8 -4
  44. package/llama/binariesGithubRelease.json +1 -1
  45. package/llama/cmake/addVariantSuffix.cmake +21 -0
  46. package/llama/gitRelease.bundle +0 -0
  47. package/llama/llama.cpp.info.json +1 -1
  48. package/package.json +24 -23
  49. package/templates/packed/electron-typescript-react.json +1 -1
@@ -252,7 +252,21 @@ export type LLamaChatGenerateResponseOptions<Functions extends ChatModelFunction
252
252
  * Defaults to `Infinity`.
253
253
  */
254
254
  thoughtTokens?: number;
255
+ /**
256
+ * Budget for comment tokens.
257
+ *
258
+ * Defaults to `Infinity`.
259
+ */
260
+ commentTokens?: number;
255
261
  };
262
+ /**
263
+ * Stop the generation when the model tries to generate a non-textual segment or call a function.
264
+ *
265
+ * Useful for generating completions in a form of a model response.
266
+ *
267
+ * Defaults to `false`.
268
+ */
269
+ abortOnNonText?: boolean;
256
270
  } & ({
257
271
  grammar?: LlamaGrammar;
258
272
  functions?: never;
@@ -80,7 +80,7 @@ export class LlamaChat {
80
80
  return this.sequence.model;
81
81
  }
82
82
  async generateResponse(history, options = {}) {
83
- const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
83
+ const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
84
84
  this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
85
85
  const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
86
86
  onTextChunk,
@@ -107,12 +107,13 @@ export class LlamaChat {
107
107
  maxParallelFunctionCalls,
108
108
  contextShift,
109
109
  customStopTriggers,
110
+ abortOnNonText,
110
111
  lastEvaluationContextWindow: {
111
112
  history: lastEvaluationContextWindowHistory,
112
113
  minimumOverlapPercentageToPreventContextShift
113
114
  }
114
115
  });
115
- if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
116
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled && !abortOnNonText)
116
117
  throw new Error("Using both grammar and functions is not supported yet");
117
118
  return await withLock([this._chatLock, "evaluate"], signal, async () => {
118
119
  try {
@@ -122,7 +123,6 @@ export class LlamaChat {
122
123
  await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
123
124
  };
124
125
  const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
125
- const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
126
126
  while (true) {
127
127
  generateResponseState.startTokenLoop();
128
128
  generateResponseState.handleRerender();
@@ -137,12 +137,15 @@ export class LlamaChat {
137
137
  generateResponseState.initFunctions();
138
138
  }
139
139
  }
140
+ const abortRes = generateResponseState.handleAbortTrigger("model");
141
+ if (abortRes != null)
142
+ return abortRes;
140
143
  if (shouldHandlePrefixTriggers) {
141
144
  const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
142
145
  if (handlePrefixTriggersRes != null)
143
146
  return handlePrefixTriggersRes;
144
147
  }
145
- if (generateResponseState.functionEvaluationMode !== false) {
148
+ if (generateResponseState.functionEvaluationMode !== false && !generateResponseState.abortOnNonText) {
146
149
  const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
147
150
  if (functionsCallsRes != null)
148
151
  return functionsCallsRes;
@@ -176,9 +179,9 @@ export class LlamaChat {
176
179
  if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
177
180
  break;
178
181
  if (await generateResponseState.handleBudgetTriggers()) {
179
- await loadContextWindowForBudgetTriggers();
180
- await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
181
- await generateResponseState.createNewEvaluationIterator();
182
+ generateResponseState.shouldRerender = true;
183
+ generateResponseState.skipClosingResponseItemOnRerender = true;
184
+ break;
182
185
  }
183
186
  if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
184
187
  break;
@@ -728,6 +731,7 @@ class GenerateResponseState {
728
731
  maxParallelFunctionCalls;
729
732
  contextShift;
730
733
  customStopTriggers;
734
+ abortOnNonText;
731
735
  minimumOverlapPercentageToPreventContextShift;
732
736
  functionsEnabled;
733
737
  repeatPenaltyEnabled;
@@ -771,6 +775,8 @@ class GenerateResponseState {
771
775
  // context shift loop
772
776
  shouldContextShift = false;
773
777
  shouldRerender = false;
778
+ skipClosingResponseItemOnRerender = false;
779
+ shouldAbortBecauseOfNonText = false;
774
780
  canAvoidReloadingHistory = false;
775
781
  contextWindowTokens = [];
776
782
  stopGenerationTriggers = [];
@@ -792,7 +798,7 @@ class GenerateResponseState {
792
798
  currentTokens = [];
793
799
  currentText = "";
794
800
  currentQueuedTokenRelease;
795
- constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
801
+ constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
796
802
  this.llamaChat = llamaChat;
797
803
  this.chatWrapper = chatWrapper;
798
804
  this.history = history;
@@ -819,6 +825,7 @@ class GenerateResponseState {
819
825
  this.maxParallelFunctionCalls = maxParallelFunctionCalls;
820
826
  this.contextShift = contextShift;
821
827
  this.customStopTriggers = customStopTriggers;
828
+ this.abortOnNonText = abortOnNonText ?? false;
822
829
  this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
823
830
  this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
824
831
  if (this.signal?.aborted)
@@ -856,7 +863,7 @@ class GenerateResponseState {
856
863
  if (this.grammar != null)
857
864
  StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
858
865
  .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
859
- if (this.functions != null && Object.keys(this.functions).length > 0)
866
+ if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
860
867
  this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
861
868
  this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
862
869
  this.chatWrapper.settings.functions.call.prefix
@@ -881,6 +888,17 @@ class GenerateResponseState {
881
888
  ? new Map()
882
889
  : SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
883
890
  });
891
+ if (this.abortOnNonText) {
892
+ this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
893
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
894
+ this.chatWrapper.settings.functions.call.prefix
895
+ ]), this.llamaChat.model.tokenizer));
896
+ for (const segmentType of allSegmentTypes) {
897
+ const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
898
+ if (segmentDefinition != null)
899
+ this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(segmentDefinition.prefix), this.llamaChat.model.tokenizer));
900
+ }
901
+ }
884
902
  this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
885
903
  }
886
904
  async dispose() {
@@ -929,7 +947,10 @@ class GenerateResponseState {
929
947
  });
930
948
  if (!hadThoughtSegments)
931
949
  return;
932
- this.segmentHandler.openSegment("thought");
950
+ if (this.abortOnNonText)
951
+ this.shouldAbortBecauseOfNonText = true;
952
+ else
953
+ this.segmentHandler.openSegment("thought");
933
954
  }
934
955
  ensureNotAborted() {
935
956
  if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
@@ -1033,10 +1054,12 @@ class GenerateResponseState {
1033
1054
  if (this.shouldRerender) {
1034
1055
  this.isRerender = true;
1035
1056
  this.streamRegulator.reset();
1036
- if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null) {
1057
+ if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null &&
1058
+ !this.skipClosingResponseItemOnRerender) {
1037
1059
  this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
1038
1060
  this.shouldRerender = false;
1039
1061
  }
1062
+ this.skipClosingResponseItemOnRerender = false;
1040
1063
  }
1041
1064
  }
1042
1065
  getContextWindowFunctionCallsTokens() {
@@ -1098,8 +1121,16 @@ class GenerateResponseState {
1098
1121
  }
1099
1122
  this.prefixTriggerDetectors.clear();
1100
1123
  for (const trigger of prefixTriggers ?? []) {
1124
+ const segmentBudget = trigger.type === "segment"
1125
+ ? this.getSegmentBudget(trigger.segmentType)
1126
+ : null;
1101
1127
  if (trigger.type === "functionCall" && !this.functionsEnabled)
1102
1128
  continue;
1129
+ else if (trigger.type === "segment" &&
1130
+ segmentBudget != null &&
1131
+ !this.segmentHandler.isSegmentTypeOpen(trigger.segmentType) &&
1132
+ this.segmentHandler.getSegmentTokensCount(trigger.segmentType) >= segmentBudget)
1133
+ continue;
1103
1134
  const prefixDetector = new StopGenerationDetector();
1104
1135
  StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
1105
1136
  .forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
@@ -1114,8 +1145,16 @@ class GenerateResponseState {
1114
1145
  }
1115
1146
  }
1116
1147
  this.noPrefixTrigger = noPrefixTrigger;
1148
+ const noPrefixTriggerSegmentBudget = noPrefixTrigger?.type === "segment"
1149
+ ? this.getSegmentBudget(noPrefixTrigger.segmentType)
1150
+ : null;
1117
1151
  if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
1118
1152
  this.noPrefixTrigger = undefined;
1153
+ else if (noPrefixTrigger?.type === "segment" &&
1154
+ noPrefixTriggerSegmentBudget != null &&
1155
+ !this.segmentHandler.isSegmentTypeOpen(noPrefixTrigger.segmentType) &&
1156
+ this.segmentHandler.getSegmentTokensCount(noPrefixTrigger.segmentType) >= noPrefixTriggerSegmentBudget)
1157
+ this.noPrefixTrigger = undefined;
1119
1158
  this.rerenderTriggers = rerender?.triggers ?? [];
1120
1159
  this.rerenderTriggerDetector.clearInProgressStops();
1121
1160
  this.rerenderTriggerDetector.clearTriggeredStops();
@@ -1163,6 +1202,10 @@ class GenerateResponseState {
1163
1202
  }
1164
1203
  initFunctions() {
1165
1204
  this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
1205
+ if (this.initiallyEngagedFunctionMode && this.abortOnNonText) {
1206
+ this.shouldAbortBecauseOfNonText = true;
1207
+ return;
1208
+ }
1166
1209
  if (this.initiallyEngagedFunctionMode) {
1167
1210
  StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
1168
1211
  .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
@@ -1193,6 +1236,13 @@ class GenerateResponseState {
1193
1236
  await reloadTokens();
1194
1237
  };
1195
1238
  if (this.prefixTriggerDetectors.size === 0) {
1239
+ if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
1240
+ this.shouldAbortBecauseOfNonText = true;
1241
+ const stopRes = this.handleAbortTrigger("model");
1242
+ if (stopRes != null)
1243
+ return stopRes;
1244
+ return undefined;
1245
+ }
1196
1246
  if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
1197
1247
  await injectTokens(this.noPrefixTrigger.inject, true);
1198
1248
  this.functionEvaluationMode = "functionName";
@@ -1231,6 +1281,13 @@ class GenerateResponseState {
1231
1281
  return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
1232
1282
  return [item];
1233
1283
  });
1284
+ if (this.abortOnNonText && trigger.type !== "response") {
1285
+ this.shouldAbortBecauseOfNonText = true;
1286
+ const stopRes = this.handleAbortTrigger("model");
1287
+ if (stopRes != null)
1288
+ return stopRes;
1289
+ return undefined;
1290
+ }
1234
1291
  this.streamRegulator.reset();
1235
1292
  if (trigger.type === "segment") {
1236
1293
  pushAll(this.prefixTriggerTokens, triggerTokens);
@@ -1263,6 +1320,13 @@ class GenerateResponseState {
1263
1320
  this.prefixTriggerDetectors.delete(triggerDetector);
1264
1321
  }
1265
1322
  if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
1323
+ if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
1324
+ this.shouldAbortBecauseOfNonText = true;
1325
+ const stopRes = this.handleAbortTrigger("model");
1326
+ if (stopRes != null)
1327
+ return stopRes;
1328
+ return undefined;
1329
+ }
1266
1330
  this.streamRegulator.reset();
1267
1331
  continueGeneration = false;
1268
1332
  if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
@@ -1777,6 +1841,10 @@ class GenerateResponseState {
1777
1841
  });
1778
1842
  if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1779
1843
  this.functionSyntaxStartDetector.hasTriggeredStops) {
1844
+ if (this.abortOnNonText) {
1845
+ this.shouldAbortBecauseOfNonText = true;
1846
+ return;
1847
+ }
1780
1848
  this.functionEvaluationMode = "functionName";
1781
1849
  this.currentQueuedTokenRelease.createTextIndexLock(0);
1782
1850
  this.stopGenerationDetector.clearTriggeredStops();
@@ -1918,21 +1986,37 @@ class GenerateResponseState {
1918
1986
  }
1919
1987
  async handleBudgetTriggers() {
1920
1988
  let shouldReloadEvaluationState = false;
1921
- const hasBudget = (budget) => budget != null && budget !== Infinity;
1922
- const hasBudgetTriggers = this.budgets != null && hasBudget(this.budgets.thoughtTokens);
1923
- if (!hasBudgetTriggers)
1989
+ if (this.budgets == null)
1924
1990
  return shouldReloadEvaluationState;
1925
- if (hasBudget(this.budgets.thoughtTokens) && this.segmentHandler.isSegmentTypeOpen("thought")) {
1926
- const usedThoughtTokens = this.segmentHandler.getSegmentTokensCount("thought");
1927
- if (usedThoughtTokens >= this.budgets.thoughtTokens) {
1928
- this.segmentHandler.closeSegment("thought");
1991
+ for (const segmentType of this.segmentHandler.getOpenSegmentStack().reverse()) {
1992
+ const budget = this.getSegmentBudget(segmentType);
1993
+ if (budget == null)
1994
+ continue;
1995
+ const usedSegmentTokens = this.segmentHandler.getSegmentTokensCount(segmentType);
1996
+ if (usedSegmentTokens >= budget) {
1997
+ this.segmentHandler.closeSegment(segmentType);
1929
1998
  shouldReloadEvaluationState = true;
1930
1999
  }
1931
2000
  }
1932
2001
  return shouldReloadEvaluationState;
1933
2002
  }
2003
+ getSegmentBudget(segmentType) {
2004
+ const getBudget = (budget) => ((budget == null || budget === Infinity)
2005
+ ? null
2006
+ : budget);
2007
+ if (this.budgets == null)
2008
+ return null;
2009
+ if (segmentType === "thought")
2010
+ return getBudget(this.budgets.thoughtTokens);
2011
+ else if (segmentType === "comment")
2012
+ return getBudget(this.budgets.commentTokens);
2013
+ void segmentType;
2014
+ return null;
2015
+ }
1934
2016
  handleShouldRerender() {
1935
2017
  this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
2018
+ if (this.abortOnNonText && this.shouldRerender)
2019
+ this.shouldAbortBecauseOfNonText = true;
1936
2020
  return this.shouldRerender;
1937
2021
  }
1938
2022
  updateShouldContextShift() {
@@ -1940,7 +2024,7 @@ class GenerateResponseState {
1940
2024
  return this.shouldContextShift;
1941
2025
  }
1942
2026
  get shouldAbort() {
1943
- return !!(this.signal?.aborted && this.stopOnAbortSignal);
2027
+ return !!(this.signal?.aborted && this.stopOnAbortSignal) || this.shouldAbortBecauseOfNonText;
1944
2028
  }
1945
2029
  handleAbortTrigger(lastHistoryItemType) {
1946
2030
  if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
@@ -1960,7 +2044,9 @@ class GenerateResponseState {
1960
2044
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1961
2045
  },
1962
2046
  metadata: {
1963
- stopReason: "abort"
2047
+ stopReason: this.shouldAbortBecauseOfNonText
2048
+ ? "eogToken"
2049
+ : "abort"
1964
2050
  }
1965
2051
  };
1966
2052
  }
@@ -2083,6 +2169,26 @@ class SegmentHandler {
2083
2169
  get topOpenSegmentType() {
2084
2170
  return this._segmentsStack.at(-1);
2085
2171
  }
2172
+ /**
2173
+ * First segment in the stack is the top most that'll close last.
2174
+ * ```
2175
+ * <segment1>
2176
+ * some text here
2177
+ * <segment2>
2178
+ * some text here
2179
+ * <segment3>
2180
+ * some text here
2181
+ * </segment3>
2182
+ * ```
2183
+ * In that example, the top most segment is `segment1`, and the last open segment is `segment2` (which is the next one to close).
2184
+ * So in that example, this function will return:
2185
+ * ```
2186
+ * ["segment1", "segment2"]
2187
+ * ```
2188
+ */
2189
+ getOpenSegmentStack() {
2190
+ return this._segmentsStack.slice(this._ownedSegmentsStackLength);
2191
+ }
2086
2192
  _processTokens(tokens, text) {
2087
2193
  const queuedTokenRelease = this._streamRegulator.addChunk({
2088
2194
  tokens,