node-llama-cpp 3.11.0 → 3.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +1 -1
  2. package/dist/bindings/Llama.d.ts +5 -1
  3. package/dist/bindings/Llama.js +11 -1
  4. package/dist/bindings/Llama.js.map +1 -1
  5. package/dist/bindings/types.d.ts +5 -2
  6. package/dist/bindings/types.js +16 -1
  7. package/dist/bindings/types.js.map +1 -1
  8. package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
  9. package/dist/chatWrappers/HarmonyChatWrapper.js +539 -0
  10. package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
  11. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +8 -2
  12. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
  13. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
  14. package/dist/chatWrappers/utils/resolveChatWrapper.js +21 -6
  15. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  16. package/dist/cli/commands/ChatCommand.d.ts +2 -1
  17. package/dist/cli/commands/ChatCommand.js +21 -7
  18. package/dist/cli/commands/ChatCommand.js.map +1 -1
  19. package/dist/cli/commands/CompleteCommand.d.ts +2 -1
  20. package/dist/cli/commands/CompleteCommand.js +21 -7
  21. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  22. package/dist/cli/commands/InfillCommand.d.ts +2 -1
  23. package/dist/cli/commands/InfillCommand.js +21 -7
  24. package/dist/cli/commands/InfillCommand.js.map +1 -1
  25. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
  26. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +16 -5
  27. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  28. package/dist/cli/recommendedModels.js +22 -0
  29. package/dist/cli/recommendedModels.js.map +1 -1
  30. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +14 -0
  31. package/dist/evaluator/LlamaChat/LlamaChat.js +369 -48
  32. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  33. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +52 -2
  34. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +162 -47
  35. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  36. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
  37. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  38. package/dist/gguf/insights/GgufInsights.js +22 -3
  39. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  40. package/dist/gguf/types/GgufMetadataTypes.d.ts +19 -2
  41. package/dist/gguf/types/GgufMetadataTypes.js +17 -0
  42. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  43. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +2 -1
  44. package/dist/gguf/types/GgufTensorInfoTypes.js +1 -0
  45. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
  46. package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
  47. package/dist/gguf/utils/ggufQuantNames.js +1 -0
  48. package/dist/gguf/utils/ggufQuantNames.js.map +1 -1
  49. package/dist/index.d.ts +3 -2
  50. package/dist/index.js +2 -1
  51. package/dist/index.js.map +1 -1
  52. package/dist/tsconfig.tsbuildinfo +1 -1
  53. package/dist/types.d.ts +150 -3
  54. package/dist/types.js +2 -1
  55. package/dist/types.js.map +1 -1
  56. package/dist/utils/gbnfJson/types.d.ts +1 -1
  57. package/dist/utils/gbnfJson/types.js.map +1 -1
  58. package/dist/utils/getChatWrapperSegmentDefinition.js +2 -0
  59. package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -1
  60. package/llama/binariesGithubRelease.json +1 -1
  61. package/llama/gitRelease.bundle +0 -0
  62. package/llama/llama.cpp.info.json +1 -1
  63. package/package.json +18 -16
  64. package/templates/packed/electron-typescript-react.json +1 -1
@@ -80,7 +80,7 @@ export class LlamaChat {
80
80
  return this.sequence.model;
81
81
  }
82
82
  async generateResponse(history, options = {}) {
83
- const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
83
+ const { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText = false, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = options;
84
84
  this.sequence.tokenPredictor?.updateInputTokens?.(this.model.tokenize(findLastUserMessageInChatHistory(history)?.text ?? ""));
85
85
  const generateResponseState = new GenerateResponseState(this, this._chatWrapper, history, {
86
86
  onTextChunk,
@@ -107,12 +107,13 @@ export class LlamaChat {
107
107
  maxParallelFunctionCalls,
108
108
  contextShift,
109
109
  customStopTriggers,
110
+ abortOnNonText,
110
111
  lastEvaluationContextWindow: {
111
112
  history: lastEvaluationContextWindowHistory,
112
113
  minimumOverlapPercentageToPreventContextShift
113
114
  }
114
115
  });
115
- if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
116
+ if (generateResponseState.grammar != null && generateResponseState.functionsEnabled && !abortOnNonText)
116
117
  throw new Error("Using both grammar and functions is not supported yet");
117
118
  return await withLock([this._chatLock, "evaluate"], signal, async () => {
118
119
  try {
@@ -122,11 +123,13 @@ export class LlamaChat {
122
123
  await generateResponseState.loadContextWindow(generateResponseState.getResolvedHistoryWithCurrentModelResponse(), generateResponseState.getContextWindowsHistoryWithCurrentModelResponse(), false, avoidReloadingHistory);
123
124
  };
124
125
  const loadContextWindowForFunctionCallingLoop = async () => loadContextWindow(true);
125
- const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
126
126
  while (true) {
127
127
  generateResponseState.startTokenLoop();
128
+ generateResponseState.handleRerender();
129
+ const shouldHandlePrefixTriggers = generateResponseState.isRerender;
128
130
  generateResponseState.canAvoidReloadingHistory = false;
129
131
  await loadContextWindow();
132
+ generateResponseState.isRerender = false;
130
133
  generateResponseState.addStopGenerationTriggersFromChatWrapper();
131
134
  if (generateResponseState.generatedTokens === 0) {
132
135
  generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
@@ -134,7 +137,15 @@ export class LlamaChat {
134
137
  generateResponseState.initFunctions();
135
138
  }
136
139
  }
137
- if (generateResponseState.functionEvaluationMode !== false) {
140
+ const abortRes = generateResponseState.handleAbortTrigger("model");
141
+ if (abortRes != null)
142
+ return abortRes;
143
+ if (shouldHandlePrefixTriggers) {
144
+ const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
145
+ if (handlePrefixTriggersRes != null)
146
+ return handlePrefixTriggersRes;
147
+ }
148
+ if (generateResponseState.functionEvaluationMode !== false && !generateResponseState.abortOnNonText) {
138
149
  const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
139
150
  if (functionsCallsRes != null)
140
151
  return functionsCallsRes;
@@ -165,21 +176,21 @@ export class LlamaChat {
165
176
  const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
166
177
  if (maxTokensTriggerRes != null)
167
178
  return maxTokensTriggerRes;
168
- if (generateResponseState.updateShouldContextShift())
179
+ if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
169
180
  break;
170
181
  if (await generateResponseState.handleBudgetTriggers()) {
171
- await loadContextWindowForBudgetTriggers();
172
- await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
173
- await generateResponseState.createNewEvaluationIterator();
182
+ generateResponseState.shouldRerender = true;
183
+ generateResponseState.skipClosingResponseItemOnRerender = true;
184
+ break;
174
185
  }
175
- if (generateResponseState.updateShouldContextShift())
186
+ if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
176
187
  break;
177
188
  const abortRes = generateResponseState.handleAbortTrigger("model");
178
189
  if (abortRes != null)
179
190
  return abortRes;
180
191
  }
181
192
  generateResponseState.isFirstEvaluation = false;
182
- if (generateResponseState.shouldContextShift)
193
+ if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift)
183
194
  continue;
184
195
  break;
185
196
  }
@@ -236,10 +247,12 @@ export class LlamaChat {
236
247
  while (true) {
237
248
  generateResponseState.startTokenLoop();
238
249
  const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
250
+ generateResponseState.isRerender = false;
239
251
  generateResponseState.functionEvaluationMode = false;
240
252
  generateResponseState.addStopGenerationTriggersFromChatWrapper();
241
253
  if (userTextSuffix != null && userTextSuffix.values.length > 0)
242
254
  generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
255
+ generateResponseState.rerenderTriggers.forEach((trigger) => (generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer))));
243
256
  allSegmentTypes
244
257
  .map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType))
245
258
  .filter((segmentDefinition) => segmentDefinition != null)
@@ -545,13 +558,13 @@ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
545
558
  `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
546
559
  "where not all user messages are properly added to the the result LlamaText");
547
560
  }
548
- async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
561
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
549
562
  if (sequence == null)
550
563
  throw new DisposedError();
551
564
  const model = sequence.model;
552
565
  const context = sequence.context;
553
566
  let removeRawFromHistory = false;
554
- if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
567
+ if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
555
568
  const newContextWindow = lastEvaluationContextWindowHistory.slice();
556
569
  if (endWithUserText) {
557
570
  if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
@@ -565,7 +578,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
565
578
  type: "model",
566
579
  response: []
567
580
  });
568
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
581
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
569
582
  chatHistory: newContextWindow,
570
583
  availableFunctions: functions,
571
584
  documentFunctionParams
@@ -574,7 +587,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
574
587
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
575
588
  const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
576
589
  const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
577
- if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
590
+ if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
578
591
  return {
579
592
  history: newContextWindow,
580
593
  stopGenerationTriggers,
@@ -584,7 +597,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
584
597
  ignoreStartText: ignoreStartText ?? [],
585
598
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
586
599
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
587
- userTextSuffix
600
+ userTextSuffix,
601
+ prefixTriggers,
602
+ noPrefixTrigger,
603
+ rerender,
604
+ detectFunctionCalls
588
605
  };
589
606
  }
590
607
  }
@@ -607,7 +624,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
607
624
  functions,
608
625
  documentFunctionParams
609
626
  });
610
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
627
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
611
628
  chatHistory: compressedHistory,
612
629
  availableFunctions: functions,
613
630
  documentFunctionParams
@@ -621,11 +638,15 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
621
638
  ignoreStartText: ignoreStartText ?? [],
622
639
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
623
640
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
624
- userTextSuffix
641
+ userTextSuffix,
642
+ prefixTriggers,
643
+ noPrefixTrigger,
644
+ rerender,
645
+ detectFunctionCalls
625
646
  };
626
647
  }
627
648
  {
628
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
649
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
629
650
  chatHistory: resolvedHistory,
630
651
  availableFunctions: functions,
631
652
  documentFunctionParams
@@ -641,7 +662,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
641
662
  ignoreStartText: ignoreStartText ?? [],
642
663
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
643
664
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
644
- userTextSuffix
665
+ userTextSuffix,
666
+ prefixTriggers,
667
+ noPrefixTrigger,
668
+ rerender,
669
+ detectFunctionCalls
645
670
  };
646
671
  }
647
672
  const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
@@ -658,7 +683,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
658
683
  functions,
659
684
  documentFunctionParams
660
685
  });
661
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
686
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
662
687
  chatHistory: compressedHistory,
663
688
  availableFunctions: functions,
664
689
  documentFunctionParams
@@ -672,7 +697,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
672
697
  ignoreStartText: ignoreStartText ?? [],
673
698
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
674
699
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
675
- userTextSuffix
700
+ userTextSuffix,
701
+ prefixTriggers,
702
+ noPrefixTrigger,
703
+ rerender,
704
+ detectFunctionCalls
676
705
  };
677
706
  }
678
707
  class GenerateResponseState {
@@ -702,6 +731,7 @@ class GenerateResponseState {
702
731
  maxParallelFunctionCalls;
703
732
  contextShift;
704
733
  customStopTriggers;
734
+ abortOnNonText;
705
735
  minimumOverlapPercentageToPreventContextShift;
706
736
  functionsEnabled;
707
737
  repeatPenaltyEnabled;
@@ -711,6 +741,7 @@ class GenerateResponseState {
711
741
  functionNameGrammar;
712
742
  functionsGrammar;
713
743
  functionsEvaluationState;
744
+ functionSyntaxStartDetectorEnabled = true;
714
745
  streamRegulator = new TokenStreamRegulator();
715
746
  stopGenerationDetector = new StopGenerationDetector();
716
747
  customStopGenerationTriggersDetector = new StopGenerationDetector();
@@ -723,6 +754,7 @@ class GenerateResponseState {
723
754
  res = [];
724
755
  pendingTokens = [];
725
756
  ignoredStartTextTokens = [];
757
+ prefixTriggerTokens = [];
726
758
  resFunctionCalls = [];
727
759
  segmentHandler;
728
760
  pendingPartialTokens = [];
@@ -735,12 +767,16 @@ class GenerateResponseState {
735
767
  releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
736
768
  generatedTokens = 0;
737
769
  isFirstEvaluation = true;
770
+ isRerender = true; // first render is a rerender
738
771
  initiallyEngagedFunctionMode = false;
739
772
  lastContextWindowHistory;
740
773
  lastHistoryCompressionMetadata;
741
774
  restartEvaluationIterator = false;
742
775
  // context shift loop
743
776
  shouldContextShift = false;
777
+ shouldRerender = false;
778
+ skipClosingResponseItemOnRerender = false;
779
+ shouldAbortBecauseOfNonText = false;
744
780
  canAvoidReloadingHistory = false;
745
781
  contextWindowTokens = [];
746
782
  stopGenerationTriggers = [];
@@ -748,6 +784,11 @@ class GenerateResponseState {
748
784
  functionCallInitiallyEngaged = false;
749
785
  disengageInitiallyEngagedFunctionCall = [];
750
786
  userTextSuffix = undefined;
787
+ prefixTriggerDetectors = new Map();
788
+ noPrefixTrigger = undefined;
789
+ rerenderTriggers = [];
790
+ rerenderTriggerDetector = new StopGenerationDetector();
791
+ rerenderActions = undefined;
751
792
  tokens = [];
752
793
  // token evaluation loop
753
794
  evaluationIterator;
@@ -757,7 +798,7 @@ class GenerateResponseState {
757
798
  currentTokens = [];
758
799
  currentText = "";
759
800
  currentQueuedTokenRelease;
760
- constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
801
+ constructor(llamaChat, chatWrapper, history, { onTextChunk, onToken, onResponseChunk, onFunctionCallParamsChunk, budgets, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, seed, grammar, trimWhitespaceSuffix = defaultTrimWhitespaceSuffix, repeatPenalty = {}, tokenBias, evaluationPriority = defaultEvaluationPriority, functions, onFunctionCall, documentFunctionParams, maxParallelFunctionCalls, contextShift = defaultContextShiftOptions, customStopTriggers, abortOnNonText, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
761
802
  this.llamaChat = llamaChat;
762
803
  this.chatWrapper = chatWrapper;
763
804
  this.history = history;
@@ -784,6 +825,7 @@ class GenerateResponseState {
784
825
  this.maxParallelFunctionCalls = maxParallelFunctionCalls;
785
826
  this.contextShift = contextShift;
786
827
  this.customStopTriggers = customStopTriggers;
828
+ this.abortOnNonText = abortOnNonText ?? false;
787
829
  this.minimumOverlapPercentageToPreventContextShift = minimumOverlapPercentageToPreventContextShift;
788
830
  this.functionsEnabled = (this.functions != null && Object.keys(this.functions).length > 0);
789
831
  if (this.signal?.aborted)
@@ -821,7 +863,7 @@ class GenerateResponseState {
821
863
  if (this.grammar != null)
822
864
  StopGenerationDetector.resolveStopTriggers(this.grammar.stopGenerationTriggers, this.llamaChat.model.tokenizer)
823
865
  .map((stopTrigger) => this.stopGenerationDetector.addStopTrigger(stopTrigger));
824
- if (this.functions != null && Object.keys(this.functions).length > 0)
866
+ if (this.functions != null && Object.keys(this.functions).length > 0 && !this.abortOnNonText)
825
867
  this.functionSyntaxStartDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
826
868
  this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
827
869
  this.chatWrapper.settings.functions.call.prefix
@@ -846,6 +888,17 @@ class GenerateResponseState {
846
888
  ? new Map()
847
889
  : SegmentHandler.getSegmentTokenCounts(lastModelMessageFullResponse, this.llamaChat.model.tokenizer)
848
890
  });
891
+ if (this.abortOnNonText) {
892
+ this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText([
893
+ this.chatWrapper.settings.functions?.parallelism?.call?.sectionPrefix ?? "",
894
+ this.chatWrapper.settings.functions.call.prefix
895
+ ]), this.llamaChat.model.tokenizer));
896
+ for (const segmentType of allSegmentTypes) {
897
+ const segmentDefinition = getChatWrapperSegmentDefinition(this.chatWrapper.settings, segmentType);
898
+ if (segmentDefinition != null)
899
+ this.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(LlamaText(segmentDefinition.prefix), this.llamaChat.model.tokenizer));
900
+ }
901
+ }
849
902
  this.getPenaltyTokens = this.getPenaltyTokens.bind(this);
850
903
  }
851
904
  async dispose() {
@@ -894,7 +947,10 @@ class GenerateResponseState {
894
947
  });
895
948
  if (!hadThoughtSegments)
896
949
  return;
897
- this.segmentHandler.openSegment("thought");
950
+ if (this.abortOnNonText)
951
+ this.shouldAbortBecauseOfNonText = true;
952
+ else
953
+ this.segmentHandler.openSegment("thought");
898
954
  }
899
955
  ensureNotAborted() {
900
956
  if (this.signal?.aborted && (!this.stopOnAbortSignal || this.res.length === 0))
@@ -930,7 +986,8 @@ class GenerateResponseState {
930
986
  let mostExhaustiveTriggeredStopsLeftoverTokens = [];
931
987
  const lastTokensForDetokenizer = resolveLastTokens([
932
988
  this.contextWindowTokens,
933
- this.ignoredStartTextTokens
989
+ this.ignoredStartTextTokens,
990
+ this.prefixTriggerTokens
934
991
  ]);
935
992
  const pendingPartialTokens = [];
936
993
  for (let i = 0; i < this.pendingTokens.length; i++) {
@@ -993,6 +1050,18 @@ class GenerateResponseState {
993
1050
  this.ensureNotAborted();
994
1051
  this.shouldContextShift = false;
995
1052
  }
1053
+ handleRerender() {
1054
+ if (this.shouldRerender) {
1055
+ this.isRerender = true;
1056
+ this.streamRegulator.reset();
1057
+ if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null &&
1058
+ !this.skipClosingResponseItemOnRerender) {
1059
+ this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
1060
+ this.shouldRerender = false;
1061
+ }
1062
+ this.skipClosingResponseItemOnRerender = false;
1063
+ }
1064
+ }
996
1065
  getContextWindowFunctionCallsTokens() {
997
1066
  if (this.functionEvaluationMode === false)
998
1067
  return [];
@@ -1019,14 +1088,15 @@ class GenerateResponseState {
1019
1088
  async loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText = false, avoidReloadingHistory = false) {
1020
1089
  const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
1021
1090
  const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
1022
- if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
1023
- const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
1091
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || this.isRerender || !this.llamaChat.sequence.isLoadedToMemory) {
1092
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = await getContextWindow({
1024
1093
  resolvedHistory: resolvedHistory,
1025
1094
  resolvedContextShift: this.resolvedContextShift,
1026
1095
  lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
1027
- pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
1028
- this.pendingPartialTokens.length,
1096
+ pendingTokensCount: this.prefixTriggerTokens.length + this.pendingTokens.length + queuedChunkTokens.length +
1097
+ functionCallsTokens.length + this.pendingPartialTokens.length,
1029
1098
  isFirstEvaluation: this.isFirstEvaluation,
1099
+ isRerender: this.isRerender,
1030
1100
  chatWrapper: this.chatWrapper,
1031
1101
  lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
1032
1102
  minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
@@ -1043,6 +1113,61 @@ class GenerateResponseState {
1043
1113
  this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
1044
1114
  this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
1045
1115
  this.userTextSuffix = userTextSuffix;
1116
+ if (this.isRerender) {
1117
+ this.prefixTriggerTokens.length = 0;
1118
+ for (const prefixDetector of this.prefixTriggerDetectors.keys()) {
1119
+ prefixDetector.clearInProgressStops();
1120
+ prefixDetector.clearTriggeredStops();
1121
+ }
1122
+ this.prefixTriggerDetectors.clear();
1123
+ for (const trigger of prefixTriggers ?? []) {
1124
+ const segmentBudget = trigger.type === "segment"
1125
+ ? this.getSegmentBudget(trigger.segmentType)
1126
+ : null;
1127
+ if (trigger.type === "functionCall" && !this.functionsEnabled)
1128
+ continue;
1129
+ else if (trigger.type === "segment" &&
1130
+ segmentBudget != null &&
1131
+ !this.segmentHandler.isSegmentTypeOpen(trigger.segmentType) &&
1132
+ this.segmentHandler.getSegmentTokensCount(trigger.segmentType) >= segmentBudget)
1133
+ continue;
1134
+ const prefixDetector = new StopGenerationDetector();
1135
+ StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
1136
+ .forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
1137
+ this.prefixTriggerDetectors.set(prefixDetector, { inject: trigger.inject, trigger });
1138
+ const inject = trigger.inject;
1139
+ if (inject != null && inject.values.length > 0) {
1140
+ const fullPrefixDetector = new StopGenerationDetector();
1141
+ StopGenerationDetector
1142
+ .resolveStopTriggers(trigger.triggers.map((trigger) => LlamaText([trigger, inject])), this.llamaChat.model.tokenizer)
1143
+ .forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger));
1144
+ this.prefixTriggerDetectors.set(fullPrefixDetector, { trigger });
1145
+ }
1146
+ }
1147
+ this.noPrefixTrigger = noPrefixTrigger;
1148
+ const noPrefixTriggerSegmentBudget = noPrefixTrigger?.type === "segment"
1149
+ ? this.getSegmentBudget(noPrefixTrigger.segmentType)
1150
+ : null;
1151
+ if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
1152
+ this.noPrefixTrigger = undefined;
1153
+ else if (noPrefixTrigger?.type === "segment" &&
1154
+ noPrefixTriggerSegmentBudget != null &&
1155
+ !this.segmentHandler.isSegmentTypeOpen(noPrefixTrigger.segmentType) &&
1156
+ this.segmentHandler.getSegmentTokensCount(noPrefixTrigger.segmentType) >= noPrefixTriggerSegmentBudget)
1157
+ this.noPrefixTrigger = undefined;
1158
+ this.rerenderTriggers = rerender?.triggers ?? [];
1159
+ this.rerenderTriggerDetector.clearInProgressStops();
1160
+ this.rerenderTriggerDetector.clearTriggeredStops();
1161
+ this.rerenderTriggerDetector = new StopGenerationDetector();
1162
+ this.rerenderActions = rerender?.action;
1163
+ this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true;
1164
+ if (!this.functionSyntaxStartDetectorEnabled)
1165
+ this.functionSyntaxStartDetector.clearInProgressStops();
1166
+ if (rerender?.triggers != null) {
1167
+ StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer)
1168
+ .map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger));
1169
+ }
1170
+ }
1046
1171
  this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
1047
1172
  this.lastContextWindowHistory = contextWindowHistory;
1048
1173
  this.segmentHandler.resetContextWindow();
@@ -1055,6 +1180,7 @@ class GenerateResponseState {
1055
1180
  this.tokens = [
1056
1181
  ...this.contextWindowTokens,
1057
1182
  ...this.ignoredStartTextTokens,
1183
+ ...this.prefixTriggerTokens,
1058
1184
  ...this.pendingTokens,
1059
1185
  ...queuedChunkTokens,
1060
1186
  ...functionCallsTokens,
@@ -1076,6 +1202,10 @@ class GenerateResponseState {
1076
1202
  }
1077
1203
  initFunctions() {
1078
1204
  this.initiallyEngagedFunctionMode = this.functionCallInitiallyEngaged;
1205
+ if (this.initiallyEngagedFunctionMode && this.abortOnNonText) {
1206
+ this.shouldAbortBecauseOfNonText = true;
1207
+ return;
1208
+ }
1079
1209
  if (this.initiallyEngagedFunctionMode) {
1080
1210
  StopGenerationDetector.resolveStopTriggers(this.disengageInitiallyEngagedFunctionCall, this.llamaChat.model.tokenizer)
1081
1211
  .map((stopTrigger) => this.disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
@@ -1090,6 +1220,140 @@ class GenerateResponseState {
1090
1220
  this.restartEvaluationIterator = true;
1091
1221
  }
1092
1222
  }
1223
+ async handlePrefixTriggers(loadContextWindow) {
1224
+ const reloadTokens = async () => {
1225
+ this.startTokenLoop();
1226
+ await loadContextWindow();
1227
+ };
1228
+ const injectTokens = async (text, alignStateTokens = false) => {
1229
+ if (text == null)
1230
+ return;
1231
+ const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
1232
+ if (tokens.length === 0)
1233
+ return;
1234
+ pushAll(this.prefixTriggerTokens, tokens);
1235
+ if (alignStateTokens)
1236
+ await reloadTokens();
1237
+ };
1238
+ if (this.prefixTriggerDetectors.size === 0) {
1239
+ if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
1240
+ this.shouldAbortBecauseOfNonText = true;
1241
+ const stopRes = this.handleAbortTrigger("model");
1242
+ if (stopRes != null)
1243
+ return stopRes;
1244
+ return undefined;
1245
+ }
1246
+ if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
1247
+ await injectTokens(this.noPrefixTrigger.inject, true);
1248
+ this.functionEvaluationMode = "functionName";
1249
+ }
1250
+ else if (this.noPrefixTrigger?.type === "segment") {
1251
+ await injectTokens(this.noPrefixTrigger.inject, true);
1252
+ this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
1253
+ }
1254
+ else if (this.noPrefixTrigger?.type === "response")
1255
+ await injectTokens(this.noPrefixTrigger.inject, true);
1256
+ return undefined;
1257
+ }
1258
+ const generatedTokens = [];
1259
+ let isFirstToken = true;
1260
+ let continueGeneration = true;
1261
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1262
+ pushAll(generatedTokens, tokens);
1263
+ for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
1264
+ triggerDetector.recordGeneration({
1265
+ text: this.currentText,
1266
+ tokens: this.currentTokens,
1267
+ startNewChecks: isFirstToken,
1268
+ triggerMustStartWithGeneration: true
1269
+ });
1270
+ if (triggerDetector.hasTriggeredStops) {
1271
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops());
1272
+ const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
1273
+ ? firstRemainingGenerationAfterStop === ""
1274
+ ? []
1275
+ : this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace")
1276
+ : (firstRemainingGenerationAfterStop ?? []);
1277
+ const triggerTokens = (stopTrigger == null || remainingTokens.length === 0)
1278
+ ? generatedTokens
1279
+ : stopTrigger.flatMap((item) => {
1280
+ if (typeof item === "string")
1281
+ return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
1282
+ return [item];
1283
+ });
1284
+ if (this.abortOnNonText && trigger.type !== "response") {
1285
+ this.shouldAbortBecauseOfNonText = true;
1286
+ const stopRes = this.handleAbortTrigger("model");
1287
+ if (stopRes != null)
1288
+ return stopRes;
1289
+ return undefined;
1290
+ }
1291
+ this.streamRegulator.reset();
1292
+ if (trigger.type === "segment") {
1293
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1294
+ if (inject != null)
1295
+ await injectTokens(inject);
1296
+ await reloadTokens();
1297
+ this.segmentHandler.openSegment(trigger.segmentType);
1298
+ }
1299
+ else if (trigger.type === "response") {
1300
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1301
+ if (inject != null)
1302
+ await injectTokens(inject);
1303
+ await reloadTokens();
1304
+ }
1305
+ else if (trigger.type === "functionCall") {
1306
+ if (trigger.replaceTrigger === false)
1307
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1308
+ if (inject != null)
1309
+ await injectTokens(inject);
1310
+ await reloadTokens();
1311
+ this.functionEvaluationMode = "functionName";
1312
+ }
1313
+ else
1314
+ void trigger;
1315
+ this.prefixTriggerDetectors.clear();
1316
+ continueGeneration = false;
1317
+ break;
1318
+ }
1319
+ else if (!triggerDetector.hasInProgressStops)
1320
+ this.prefixTriggerDetectors.delete(triggerDetector);
1321
+ }
1322
+ if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
1323
+ if (this.abortOnNonText && this.noPrefixTrigger != null && this.noPrefixTrigger.type !== "response") {
1324
+ this.shouldAbortBecauseOfNonText = true;
1325
+ const stopRes = this.handleAbortTrigger("model");
1326
+ if (stopRes != null)
1327
+ return stopRes;
1328
+ return undefined;
1329
+ }
1330
+ this.streamRegulator.reset();
1331
+ continueGeneration = false;
1332
+ if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
1333
+ await injectTokens(this.noPrefixTrigger.inject, true);
1334
+ this.functionEvaluationMode = "functionName";
1335
+ }
1336
+ else if (this.noPrefixTrigger?.type === "segment") {
1337
+ await injectTokens(this.noPrefixTrigger.inject, true);
1338
+ this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
1339
+ }
1340
+ else if (this.noPrefixTrigger?.type === "response")
1341
+ await injectTokens(this.noPrefixTrigger.inject, true);
1342
+ else
1343
+ this.streamRegulator.addChunk({
1344
+ tokens: generatedTokens,
1345
+ text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens())
1346
+ });
1347
+ }
1348
+ isFirstToken = false;
1349
+ if (!continueGeneration)
1350
+ break;
1351
+ const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model");
1352
+ if (stopRes != null)
1353
+ return stopRes;
1354
+ }
1355
+ return undefined;
1356
+ }
1093
1357
  async enterFunctionCallingLoop(loadContextWindow) {
1094
1358
  if (!this.functionsEnabled) {
1095
1359
  this.functionEvaluationMode = false;
@@ -1568,6 +1832,8 @@ class GenerateResponseState {
1568
1832
  }
1569
1833
  }
1570
1834
  detectAndHandleFunctionStartSyntax() {
1835
+ if (!this.functionSyntaxStartDetectorEnabled)
1836
+ return;
1571
1837
  this.functionSyntaxStartDetector.recordGeneration({
1572
1838
  text: this.currentText,
1573
1839
  tokens: this.currentTokens,
@@ -1575,6 +1841,10 @@ class GenerateResponseState {
1575
1841
  });
1576
1842
  if (this.currentQueuedTokenRelease != null && this.functionEvaluationMode === false && this.functionsEnabled &&
1577
1843
  this.functionSyntaxStartDetector.hasTriggeredStops) {
1844
+ if (this.abortOnNonText) {
1845
+ this.shouldAbortBecauseOfNonText = true;
1846
+ return;
1847
+ }
1578
1848
  this.functionEvaluationMode = "functionName";
1579
1849
  this.currentQueuedTokenRelease.createTextIndexLock(0);
1580
1850
  this.stopGenerationDetector.clearTriggeredStops();
@@ -1592,6 +1862,11 @@ class GenerateResponseState {
1592
1862
  }
1593
1863
  }
1594
1864
  recordStopGenerationEvaluation() {
1865
+ this.rerenderTriggerDetector.recordGeneration({
1866
+ text: this.currentText,
1867
+ tokens: this.currentTokens,
1868
+ queuedTokenRelease: this.currentQueuedTokenRelease
1869
+ });
1595
1870
  this.stopGenerationDetector.recordGeneration({
1596
1871
  text: this.currentText,
1597
1872
  tokens: this.currentTokens,
@@ -1609,8 +1884,10 @@ class GenerateResponseState {
1609
1884
  pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1610
1885
  }
1611
1886
  handleStopGenerationTrigger(lastHistoryItemType, forceStopReason) {
1612
- if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1613
- this.llamaChat.model.isEogToken(this.currentToken) || forceStopReason != null) {
1887
+ const detectedStopGenerationTrigger = this.stopGenerationDetector.hasTriggeredStops ||
1888
+ this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1889
+ this.llamaChat.model.isEogToken(this.currentToken);
1890
+ if ((detectedStopGenerationTrigger && !this.rerenderTriggerDetector.hasTriggeredStops) || forceStopReason != null) {
1614
1891
  this.stopGenerationDetector.clearInProgressStops();
1615
1892
  this.customStopGenerationTriggersDetector.clearInProgressStops();
1616
1893
  pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
@@ -1709,25 +1986,45 @@ class GenerateResponseState {
1709
1986
  }
1710
1987
  async handleBudgetTriggers() {
1711
1988
  let shouldReloadEvaluationState = false;
1712
- const hasBudget = (budget) => budget != null && budget !== Infinity;
1713
- const hasBudgetTriggers = this.budgets != null && hasBudget(this.budgets.thoughtTokens);
1714
- if (!hasBudgetTriggers)
1989
+ if (this.budgets == null)
1715
1990
  return shouldReloadEvaluationState;
1716
- if (hasBudget(this.budgets.thoughtTokens) && this.segmentHandler.isSegmentTypeOpen("thought")) {
1717
- const usedThoughtTokens = this.segmentHandler.getSegmentTokensCount("thought");
1718
- if (usedThoughtTokens >= this.budgets.thoughtTokens) {
1719
- this.segmentHandler.closeSegment("thought");
1991
+ for (const segmentType of this.segmentHandler.getOpenSegmentStack().reverse()) {
1992
+ const budget = this.getSegmentBudget(segmentType);
1993
+ if (budget == null)
1994
+ continue;
1995
+ const usedSegmentTokens = this.segmentHandler.getSegmentTokensCount(segmentType);
1996
+ if (usedSegmentTokens >= budget) {
1997
+ this.segmentHandler.closeSegment(segmentType);
1720
1998
  shouldReloadEvaluationState = true;
1721
1999
  }
1722
2000
  }
1723
2001
  return shouldReloadEvaluationState;
1724
2002
  }
2003
+ getSegmentBudget(segmentType) {
2004
+ const getBudget = (budget) => ((budget == null || budget === Infinity)
2005
+ ? null
2006
+ : budget);
2007
+ if (this.budgets == null)
2008
+ return null;
2009
+ if (segmentType === "thought")
2010
+ return getBudget(this.budgets.thoughtTokens);
2011
+ else if (segmentType === "comment")
2012
+ return getBudget(this.budgets.commentTokens);
2013
+ void segmentType;
2014
+ return null;
2015
+ }
2016
+ handleShouldRerender() {
2017
+ this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
2018
+ if (this.abortOnNonText && this.shouldRerender)
2019
+ this.shouldAbortBecauseOfNonText = true;
2020
+ return this.shouldRerender;
2021
+ }
1725
2022
  updateShouldContextShift() {
1726
2023
  this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1727
2024
  return this.shouldContextShift;
1728
2025
  }
1729
2026
  get shouldAbort() {
1730
- return !!(this.signal?.aborted && this.stopOnAbortSignal);
2027
+ return !!(this.signal?.aborted && this.stopOnAbortSignal) || this.shouldAbortBecauseOfNonText;
1731
2028
  }
1732
2029
  handleAbortTrigger(lastHistoryItemType) {
1733
2030
  if (this.shouldAbort && this.signal?.aborted && this.stopOnAbortSignal) {
@@ -1747,7 +2044,9 @@ class GenerateResponseState {
1747
2044
  contextShiftMetadata: this.lastHistoryCompressionMetadata
1748
2045
  },
1749
2046
  metadata: {
1750
- stopReason: "abort"
2047
+ stopReason: this.shouldAbortBecauseOfNonText
2048
+ ? "eogToken"
2049
+ : "abort"
1751
2050
  }
1752
2051
  };
1753
2052
  }
@@ -1867,6 +2166,29 @@ class SegmentHandler {
1867
2166
  isSegmentTypeOpen(type) {
1868
2167
  return this._segmentsStackSet.has(type);
1869
2168
  }
2169
+ get topOpenSegmentType() {
2170
+ return this._segmentsStack.at(-1);
2171
+ }
2172
+ /**
2173
+ * First segment in the stack is the top most that'll close last.
2174
+ * ```
2175
+ * <segment1>
2176
+ * some text here
2177
+ * <segment2>
2178
+ * some text here
2179
+ * <segment3>
2180
+ * some text here
2181
+ * </segment3>
2182
+ * ```
2183
+ * In that example, the top most segment is `segment1`, and the last open segment is `segment2` (which is the next one to close).
2184
+ * So in that example, this function will return:
2185
+ * ```
2186
+ * ["segment1", "segment2"]
2187
+ * ```
2188
+ */
2189
+ getOpenSegmentStack() {
2190
+ return this._segmentsStack.slice(this._ownedSegmentsStackLength);
2191
+ }
1870
2192
  _processTokens(tokens, text) {
1871
2193
  const queuedTokenRelease = this._streamRegulator.addChunk({
1872
2194
  tokens,
@@ -2065,17 +2387,16 @@ class SegmentHandler {
2065
2387
  this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
2066
2388
  }
2067
2389
  else {
2068
- if (lastSegment instanceof Array) {
2069
- const text = (this.onResponseChunk != null || this.onTextChunk != null)
2070
- ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2071
- : "";
2390
+ const text = (this.onResponseChunk != null || this.onTextChunk != null)
2391
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2392
+ : "";
2393
+ if (lastSegment instanceof Array)
2072
2394
  pushAll(lastSegment, tokens);
2073
- this.onToken?.(tokens);
2074
- this.onTextChunk?.(text);
2075
- this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
2076
- }
2077
2395
  else
2078
2396
  this._segments.push(tokens);
2397
+ this.onToken?.(tokens.slice());
2398
+ this.onTextChunk?.(text);
2399
+ this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
2079
2400
  }
2080
2401
  if (lastContextWindowSegment == null)
2081
2402
  this._contextWindowSegments.push(tokens.slice());