node-llama-cpp 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +1 -1
  2. package/dist/bindings/AddonTypes.d.ts +4 -2
  3. package/dist/bindings/Llama.d.ts +5 -1
  4. package/dist/bindings/Llama.js +22 -3
  5. package/dist/bindings/Llama.js.map +1 -1
  6. package/dist/bindings/getLlama.d.ts +40 -2
  7. package/dist/bindings/getLlama.js +16 -7
  8. package/dist/bindings/getLlama.js.map +1 -1
  9. package/dist/bindings/types.d.ts +6 -2
  10. package/dist/bindings/types.js +16 -1
  11. package/dist/bindings/types.js.map +1 -1
  12. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +1 -1
  13. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +1 -1
  14. package/dist/bindings/utils/getLlamaWithoutBackend.js +1 -1
  15. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -1
  16. package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
  17. package/dist/chatWrappers/HarmonyChatWrapper.js +527 -0
  18. package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
  19. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
  20. package/dist/chatWrappers/utils/resolveChatWrapper.js +21 -6
  21. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  22. package/dist/cli/commands/ChatCommand.d.ts +2 -1
  23. package/dist/cli/commands/ChatCommand.js +21 -7
  24. package/dist/cli/commands/ChatCommand.js.map +1 -1
  25. package/dist/cli/commands/CompleteCommand.d.ts +2 -1
  26. package/dist/cli/commands/CompleteCommand.js +21 -7
  27. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  28. package/dist/cli/commands/InfillCommand.d.ts +2 -1
  29. package/dist/cli/commands/InfillCommand.js +21 -7
  30. package/dist/cli/commands/InfillCommand.js.map +1 -1
  31. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +23 -2
  32. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
  33. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
  34. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +16 -5
  35. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  36. package/dist/cli/recommendedModels.js +22 -0
  37. package/dist/cli/recommendedModels.js.map +1 -1
  38. package/dist/config.d.ts +1 -1
  39. package/dist/evaluator/LlamaChat/LlamaChat.js +246 -31
  40. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  41. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +2 -2
  42. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  43. package/dist/evaluator/LlamaCompletion.js +2 -2
  44. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  45. package/dist/evaluator/LlamaContext/LlamaContext.js +17 -17
  46. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  47. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +5 -5
  48. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -1
  49. package/dist/evaluator/LlamaEmbeddingContext.js +1 -1
  50. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
  51. package/dist/evaluator/LlamaModel/LlamaModel.js +3 -3
  52. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  53. package/dist/evaluator/LlamaRankingContext.js +1 -1
  54. package/dist/evaluator/LlamaRankingContext.js.map +1 -1
  55. package/dist/gguf/fileReaders/GgufFsFileReader.js +1 -1
  56. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -1
  57. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +1 -1
  58. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
  59. package/dist/gguf/insights/GgufInsights.js +22 -3
  60. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  61. package/dist/gguf/types/GgufMetadataTypes.d.ts +25 -2
  62. package/dist/gguf/types/GgufMetadataTypes.js +23 -0
  63. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  64. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +2 -1
  65. package/dist/gguf/types/GgufTensorInfoTypes.js +1 -0
  66. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
  67. package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
  68. package/dist/gguf/utils/ggufQuantNames.js +1 -0
  69. package/dist/gguf/utils/ggufQuantNames.js.map +1 -1
  70. package/dist/index.d.ts +4 -3
  71. package/dist/index.js +2 -1
  72. package/dist/index.js.map +1 -1
  73. package/dist/tsconfig.tsbuildinfo +1 -1
  74. package/dist/types.d.ts +144 -2
  75. package/dist/types.js.map +1 -1
  76. package/dist/utils/LruCache.d.ts +1 -1
  77. package/dist/utils/ReplHistory.js +1 -1
  78. package/dist/utils/ReplHistory.js.map +1 -1
  79. package/dist/utils/gbnfJson/types.d.ts +1 -1
  80. package/dist/utils/gbnfJson/types.js.map +1 -1
  81. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +2 -0
  82. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  83. package/dist/utils/getBuildDefaults.d.ts +1 -1
  84. package/dist/utils/getChatWrapperSegmentDefinition.js +2 -0
  85. package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -1
  86. package/dist/utils/isLockfileActive.js +2 -2
  87. package/dist/utils/isLockfileActive.js.map +1 -1
  88. package/dist/utils/utilTypes.d.ts +10 -0
  89. package/dist/utils/waitForLockfileRelease.js +3 -3
  90. package/dist/utils/waitForLockfileRelease.js.map +1 -1
  91. package/dist/utils/withLockfile.js +1 -1
  92. package/dist/utils/withLockfile.js.map +1 -1
  93. package/llama/addon/addon.cpp +31 -0
  94. package/llama/binariesGithubRelease.json +1 -1
  95. package/llama/gitRelease.bundle +0 -0
  96. package/llama/llama.cpp.info.json +1 -1
  97. package/package.json +22 -20
  98. package/templates/packed/electron-typescript-react.json +1 -1
@@ -114,7 +114,7 @@ export class LlamaChat {
114
114
  });
115
115
  if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
116
116
  throw new Error("Using both grammar and functions is not supported yet");
117
- return await withLock(this._chatLock, "evaluate", signal, async () => {
117
+ return await withLock([this._chatLock, "evaluate"], signal, async () => {
118
118
  try {
119
119
  generateResponseState.ensureLastHistoryItemIsModel();
120
120
  generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
@@ -125,8 +125,11 @@ export class LlamaChat {
125
125
  const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
126
126
  while (true) {
127
127
  generateResponseState.startTokenLoop();
128
+ generateResponseState.handleRerender();
129
+ const shouldHandlePrefixTriggers = generateResponseState.isRerender;
128
130
  generateResponseState.canAvoidReloadingHistory = false;
129
131
  await loadContextWindow();
132
+ generateResponseState.isRerender = false;
130
133
  generateResponseState.addStopGenerationTriggersFromChatWrapper();
131
134
  if (generateResponseState.generatedTokens === 0) {
132
135
  generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
@@ -134,6 +137,11 @@ export class LlamaChat {
134
137
  generateResponseState.initFunctions();
135
138
  }
136
139
  }
140
+ if (shouldHandlePrefixTriggers) {
141
+ const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
142
+ if (handlePrefixTriggersRes != null)
143
+ return handlePrefixTriggersRes;
144
+ }
137
145
  if (generateResponseState.functionEvaluationMode !== false) {
138
146
  const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
139
147
  if (functionsCallsRes != null)
@@ -165,21 +173,21 @@ export class LlamaChat {
165
173
  const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
166
174
  if (maxTokensTriggerRes != null)
167
175
  return maxTokensTriggerRes;
168
- if (generateResponseState.updateShouldContextShift())
176
+ if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
169
177
  break;
170
178
  if (await generateResponseState.handleBudgetTriggers()) {
171
179
  await loadContextWindowForBudgetTriggers();
172
180
  await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
173
181
  await generateResponseState.createNewEvaluationIterator();
174
182
  }
175
- if (generateResponseState.updateShouldContextShift())
183
+ if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
176
184
  break;
177
185
  const abortRes = generateResponseState.handleAbortTrigger("model");
178
186
  if (abortRes != null)
179
187
  return abortRes;
180
188
  }
181
189
  generateResponseState.isFirstEvaluation = false;
182
- if (generateResponseState.shouldContextShift)
190
+ if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift)
183
191
  continue;
184
192
  break;
185
193
  }
@@ -230,16 +238,18 @@ export class LlamaChat {
230
238
  minimumOverlapPercentageToPreventContextShift
231
239
  }
232
240
  });
233
- return await withLock(this._chatLock, "evaluate", signal, async () => {
241
+ return await withLock([this._chatLock, "evaluate"], signal, async () => {
234
242
  try {
235
243
  generateResponseState.ensureLastHistoryItemIsUser();
236
244
  while (true) {
237
245
  generateResponseState.startTokenLoop();
238
246
  const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
247
+ generateResponseState.isRerender = false;
239
248
  generateResponseState.functionEvaluationMode = false;
240
249
  generateResponseState.addStopGenerationTriggersFromChatWrapper();
241
250
  if (userTextSuffix != null && userTextSuffix.values.length > 0)
242
251
  generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
252
+ generateResponseState.rerenderTriggers.forEach((trigger) => (generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer))));
243
253
  allSegmentTypes
244
254
  .map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType))
245
255
  .filter((segmentDefinition) => segmentDefinition != null)
@@ -545,13 +555,13 @@ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
545
555
  `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
546
556
  "where not all user messages are properly added to the the result LlamaText");
547
557
  }
548
- async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
558
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
549
559
  if (sequence == null)
550
560
  throw new DisposedError();
551
561
  const model = sequence.model;
552
562
  const context = sequence.context;
553
563
  let removeRawFromHistory = false;
554
- if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
564
+ if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
555
565
  const newContextWindow = lastEvaluationContextWindowHistory.slice();
556
566
  if (endWithUserText) {
557
567
  if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
@@ -565,7 +575,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
565
575
  type: "model",
566
576
  response: []
567
577
  });
568
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
578
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
569
579
  chatHistory: newContextWindow,
570
580
  availableFunctions: functions,
571
581
  documentFunctionParams
@@ -574,7 +584,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
574
584
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
575
585
  const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
576
586
  const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
577
- if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
587
+ if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
578
588
  return {
579
589
  history: newContextWindow,
580
590
  stopGenerationTriggers,
@@ -584,7 +594,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
584
594
  ignoreStartText: ignoreStartText ?? [],
585
595
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
586
596
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
587
- userTextSuffix
597
+ userTextSuffix,
598
+ prefixTriggers,
599
+ noPrefixTrigger,
600
+ rerender,
601
+ detectFunctionCalls
588
602
  };
589
603
  }
590
604
  }
@@ -607,7 +621,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
607
621
  functions,
608
622
  documentFunctionParams
609
623
  });
610
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
624
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
611
625
  chatHistory: compressedHistory,
612
626
  availableFunctions: functions,
613
627
  documentFunctionParams
@@ -621,11 +635,15 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
621
635
  ignoreStartText: ignoreStartText ?? [],
622
636
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
623
637
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
624
- userTextSuffix
638
+ userTextSuffix,
639
+ prefixTriggers,
640
+ noPrefixTrigger,
641
+ rerender,
642
+ detectFunctionCalls
625
643
  };
626
644
  }
627
645
  {
628
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
646
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
629
647
  chatHistory: resolvedHistory,
630
648
  availableFunctions: functions,
631
649
  documentFunctionParams
@@ -641,7 +659,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
641
659
  ignoreStartText: ignoreStartText ?? [],
642
660
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
643
661
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
644
- userTextSuffix
662
+ userTextSuffix,
663
+ prefixTriggers,
664
+ noPrefixTrigger,
665
+ rerender,
666
+ detectFunctionCalls
645
667
  };
646
668
  }
647
669
  const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
@@ -658,7 +680,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
658
680
  functions,
659
681
  documentFunctionParams
660
682
  });
661
- const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
683
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
662
684
  chatHistory: compressedHistory,
663
685
  availableFunctions: functions,
664
686
  documentFunctionParams
@@ -672,7 +694,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
672
694
  ignoreStartText: ignoreStartText ?? [],
673
695
  functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
674
696
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
675
- userTextSuffix
697
+ userTextSuffix,
698
+ prefixTriggers,
699
+ noPrefixTrigger,
700
+ rerender,
701
+ detectFunctionCalls
676
702
  };
677
703
  }
678
704
  class GenerateResponseState {
@@ -711,6 +737,7 @@ class GenerateResponseState {
711
737
  functionNameGrammar;
712
738
  functionsGrammar;
713
739
  functionsEvaluationState;
740
+ functionSyntaxStartDetectorEnabled = true;
714
741
  streamRegulator = new TokenStreamRegulator();
715
742
  stopGenerationDetector = new StopGenerationDetector();
716
743
  customStopGenerationTriggersDetector = new StopGenerationDetector();
@@ -723,6 +750,7 @@ class GenerateResponseState {
723
750
  res = [];
724
751
  pendingTokens = [];
725
752
  ignoredStartTextTokens = [];
753
+ prefixTriggerTokens = [];
726
754
  resFunctionCalls = [];
727
755
  segmentHandler;
728
756
  pendingPartialTokens = [];
@@ -735,12 +763,14 @@ class GenerateResponseState {
735
763
  releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
736
764
  generatedTokens = 0;
737
765
  isFirstEvaluation = true;
766
+ isRerender = true; // first render is a rerender
738
767
  initiallyEngagedFunctionMode = false;
739
768
  lastContextWindowHistory;
740
769
  lastHistoryCompressionMetadata;
741
770
  restartEvaluationIterator = false;
742
771
  // context shift loop
743
772
  shouldContextShift = false;
773
+ shouldRerender = false;
744
774
  canAvoidReloadingHistory = false;
745
775
  contextWindowTokens = [];
746
776
  stopGenerationTriggers = [];
@@ -748,6 +778,11 @@ class GenerateResponseState {
748
778
  functionCallInitiallyEngaged = false;
749
779
  disengageInitiallyEngagedFunctionCall = [];
750
780
  userTextSuffix = undefined;
781
+ prefixTriggerDetectors = new Map();
782
+ noPrefixTrigger = undefined;
783
+ rerenderTriggers = [];
784
+ rerenderTriggerDetector = new StopGenerationDetector();
785
+ rerenderActions = undefined;
751
786
  tokens = [];
752
787
  // token evaluation loop
753
788
  evaluationIterator;
@@ -930,7 +965,8 @@ class GenerateResponseState {
930
965
  let mostExhaustiveTriggeredStopsLeftoverTokens = [];
931
966
  const lastTokensForDetokenizer = resolveLastTokens([
932
967
  this.contextWindowTokens,
933
- this.ignoredStartTextTokens
968
+ this.ignoredStartTextTokens,
969
+ this.prefixTriggerTokens
934
970
  ]);
935
971
  const pendingPartialTokens = [];
936
972
  for (let i = 0; i < this.pendingTokens.length; i++) {
@@ -993,6 +1029,16 @@ class GenerateResponseState {
993
1029
  this.ensureNotAborted();
994
1030
  this.shouldContextShift = false;
995
1031
  }
1032
+ handleRerender() {
1033
+ if (this.shouldRerender) {
1034
+ this.isRerender = true;
1035
+ this.streamRegulator.reset();
1036
+ if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null) {
1037
+ this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
1038
+ this.shouldRerender = false;
1039
+ }
1040
+ }
1041
+ }
996
1042
  getContextWindowFunctionCallsTokens() {
997
1043
  if (this.functionEvaluationMode === false)
998
1044
  return [];
@@ -1019,14 +1065,15 @@ class GenerateResponseState {
1019
1065
  async loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText = false, avoidReloadingHistory = false) {
1020
1066
  const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
1021
1067
  const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
1022
- if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
1023
- const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
1068
+ if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || this.isRerender || !this.llamaChat.sequence.isLoadedToMemory) {
1069
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = await getContextWindow({
1024
1070
  resolvedHistory: resolvedHistory,
1025
1071
  resolvedContextShift: this.resolvedContextShift,
1026
1072
  lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
1027
- pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
1028
- this.pendingPartialTokens.length,
1073
+ pendingTokensCount: this.prefixTriggerTokens.length + this.pendingTokens.length + queuedChunkTokens.length +
1074
+ functionCallsTokens.length + this.pendingPartialTokens.length,
1029
1075
  isFirstEvaluation: this.isFirstEvaluation,
1076
+ isRerender: this.isRerender,
1030
1077
  chatWrapper: this.chatWrapper,
1031
1078
  lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
1032
1079
  minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
@@ -1043,6 +1090,45 @@ class GenerateResponseState {
1043
1090
  this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
1044
1091
  this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
1045
1092
  this.userTextSuffix = userTextSuffix;
1093
+ if (this.isRerender) {
1094
+ this.prefixTriggerTokens.length = 0;
1095
+ for (const prefixDetector of this.prefixTriggerDetectors.keys()) {
1096
+ prefixDetector.clearInProgressStops();
1097
+ prefixDetector.clearTriggeredStops();
1098
+ }
1099
+ this.prefixTriggerDetectors.clear();
1100
+ for (const trigger of prefixTriggers ?? []) {
1101
+ if (trigger.type === "functionCall" && !this.functionsEnabled)
1102
+ continue;
1103
+ const prefixDetector = new StopGenerationDetector();
1104
+ StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
1105
+ .forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
1106
+ this.prefixTriggerDetectors.set(prefixDetector, { inject: trigger.inject, trigger });
1107
+ const inject = trigger.inject;
1108
+ if (inject != null && inject.values.length > 0) {
1109
+ const fullPrefixDetector = new StopGenerationDetector();
1110
+ StopGenerationDetector
1111
+ .resolveStopTriggers(trigger.triggers.map((trigger) => LlamaText([trigger, inject])), this.llamaChat.model.tokenizer)
1112
+ .forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger));
1113
+ this.prefixTriggerDetectors.set(fullPrefixDetector, { trigger });
1114
+ }
1115
+ }
1116
+ this.noPrefixTrigger = noPrefixTrigger;
1117
+ if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
1118
+ this.noPrefixTrigger = undefined;
1119
+ this.rerenderTriggers = rerender?.triggers ?? [];
1120
+ this.rerenderTriggerDetector.clearInProgressStops();
1121
+ this.rerenderTriggerDetector.clearTriggeredStops();
1122
+ this.rerenderTriggerDetector = new StopGenerationDetector();
1123
+ this.rerenderActions = rerender?.action;
1124
+ this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true;
1125
+ if (!this.functionSyntaxStartDetectorEnabled)
1126
+ this.functionSyntaxStartDetector.clearInProgressStops();
1127
+ if (rerender?.triggers != null) {
1128
+ StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer)
1129
+ .map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger));
1130
+ }
1131
+ }
1046
1132
  this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
1047
1133
  this.lastContextWindowHistory = contextWindowHistory;
1048
1134
  this.segmentHandler.resetContextWindow();
@@ -1055,6 +1141,7 @@ class GenerateResponseState {
1055
1141
  this.tokens = [
1056
1142
  ...this.contextWindowTokens,
1057
1143
  ...this.ignoredStartTextTokens,
1144
+ ...this.prefixTriggerTokens,
1058
1145
  ...this.pendingTokens,
1059
1146
  ...queuedChunkTokens,
1060
1147
  ...functionCallsTokens,
@@ -1090,6 +1177,119 @@ class GenerateResponseState {
1090
1177
  this.restartEvaluationIterator = true;
1091
1178
  }
1092
1179
  }
1180
+ async handlePrefixTriggers(loadContextWindow) {
1181
+ const reloadTokens = async () => {
1182
+ this.startTokenLoop();
1183
+ await loadContextWindow();
1184
+ };
1185
+ const injectTokens = async (text, alignStateTokens = false) => {
1186
+ if (text == null)
1187
+ return;
1188
+ const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
1189
+ if (tokens.length === 0)
1190
+ return;
1191
+ pushAll(this.prefixTriggerTokens, tokens);
1192
+ if (alignStateTokens)
1193
+ await reloadTokens();
1194
+ };
1195
+ if (this.prefixTriggerDetectors.size === 0) {
1196
+ if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
1197
+ await injectTokens(this.noPrefixTrigger.inject, true);
1198
+ this.functionEvaluationMode = "functionName";
1199
+ }
1200
+ else if (this.noPrefixTrigger?.type === "segment") {
1201
+ await injectTokens(this.noPrefixTrigger.inject, true);
1202
+ this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
1203
+ }
1204
+ else if (this.noPrefixTrigger?.type === "response")
1205
+ await injectTokens(this.noPrefixTrigger.inject, true);
1206
+ return undefined;
1207
+ }
1208
+ const generatedTokens = [];
1209
+ let isFirstToken = true;
1210
+ let continueGeneration = true;
1211
+ for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
1212
+ pushAll(generatedTokens, tokens);
1213
+ for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
1214
+ triggerDetector.recordGeneration({
1215
+ text: this.currentText,
1216
+ tokens: this.currentTokens,
1217
+ startNewChecks: isFirstToken,
1218
+ triggerMustStartWithGeneration: true
1219
+ });
1220
+ if (triggerDetector.hasTriggeredStops) {
1221
+ const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops());
1222
+ const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
1223
+ ? firstRemainingGenerationAfterStop === ""
1224
+ ? []
1225
+ : this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace")
1226
+ : (firstRemainingGenerationAfterStop ?? []);
1227
+ const triggerTokens = (stopTrigger == null || remainingTokens.length === 0)
1228
+ ? generatedTokens
1229
+ : stopTrigger.flatMap((item) => {
1230
+ if (typeof item === "string")
1231
+ return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
1232
+ return [item];
1233
+ });
1234
+ this.streamRegulator.reset();
1235
+ if (trigger.type === "segment") {
1236
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1237
+ if (inject != null)
1238
+ await injectTokens(inject);
1239
+ await reloadTokens();
1240
+ this.segmentHandler.openSegment(trigger.segmentType);
1241
+ }
1242
+ else if (trigger.type === "response") {
1243
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1244
+ if (inject != null)
1245
+ await injectTokens(inject);
1246
+ await reloadTokens();
1247
+ }
1248
+ else if (trigger.type === "functionCall") {
1249
+ if (trigger.replaceTrigger === false)
1250
+ pushAll(this.prefixTriggerTokens, triggerTokens);
1251
+ if (inject != null)
1252
+ await injectTokens(inject);
1253
+ await reloadTokens();
1254
+ this.functionEvaluationMode = "functionName";
1255
+ }
1256
+ else
1257
+ void trigger;
1258
+ this.prefixTriggerDetectors.clear();
1259
+ continueGeneration = false;
1260
+ break;
1261
+ }
1262
+ else if (!triggerDetector.hasInProgressStops)
1263
+ this.prefixTriggerDetectors.delete(triggerDetector);
1264
+ }
1265
+ if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
1266
+ this.streamRegulator.reset();
1267
+ continueGeneration = false;
1268
+ if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
1269
+ await injectTokens(this.noPrefixTrigger.inject, true);
1270
+ this.functionEvaluationMode = "functionName";
1271
+ }
1272
+ else if (this.noPrefixTrigger?.type === "segment") {
1273
+ await injectTokens(this.noPrefixTrigger.inject, true);
1274
+ this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
1275
+ }
1276
+ else if (this.noPrefixTrigger?.type === "response")
1277
+ await injectTokens(this.noPrefixTrigger.inject, true);
1278
+ else
1279
+ this.streamRegulator.addChunk({
1280
+ tokens: generatedTokens,
1281
+ text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens())
1282
+ });
1283
+ }
1284
+ isFirstToken = false;
1285
+ if (!continueGeneration)
1286
+ break;
1287
+ const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model");
1288
+ if (stopRes != null)
1289
+ return stopRes;
1290
+ }
1291
+ return undefined;
1292
+ }
1093
1293
  async enterFunctionCallingLoop(loadContextWindow) {
1094
1294
  if (!this.functionsEnabled) {
1095
1295
  this.functionEvaluationMode = false;
@@ -1568,6 +1768,8 @@ class GenerateResponseState {
1568
1768
  }
1569
1769
  }
1570
1770
  detectAndHandleFunctionStartSyntax() {
1771
+ if (!this.functionSyntaxStartDetectorEnabled)
1772
+ return;
1571
1773
  this.functionSyntaxStartDetector.recordGeneration({
1572
1774
  text: this.currentText,
1573
1775
  tokens: this.currentTokens,
@@ -1592,6 +1794,11 @@ class GenerateResponseState {
1592
1794
  }
1593
1795
  }
1594
1796
  recordStopGenerationEvaluation() {
1797
+ this.rerenderTriggerDetector.recordGeneration({
1798
+ text: this.currentText,
1799
+ tokens: this.currentTokens,
1800
+ queuedTokenRelease: this.currentQueuedTokenRelease
1801
+ });
1595
1802
  this.stopGenerationDetector.recordGeneration({
1596
1803
  text: this.currentText,
1597
1804
  tokens: this.currentTokens,
@@ -1609,8 +1816,10 @@ class GenerateResponseState {
1609
1816
  pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
1610
1817
  }
1611
1818
  handleStopGenerationTrigger(lastHistoryItemType, forceStopReason) {
1612
- if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1613
- this.llamaChat.model.isEogToken(this.currentToken) || forceStopReason != null) {
1819
+ const detectedStopGenerationTrigger = this.stopGenerationDetector.hasTriggeredStops ||
1820
+ this.customStopGenerationTriggersDetector.hasTriggeredStops ||
1821
+ this.llamaChat.model.isEogToken(this.currentToken);
1822
+ if ((detectedStopGenerationTrigger && !this.rerenderTriggerDetector.hasTriggeredStops) || forceStopReason != null) {
1614
1823
  this.stopGenerationDetector.clearInProgressStops();
1615
1824
  this.customStopGenerationTriggersDetector.clearInProgressStops();
1616
1825
  pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
@@ -1722,6 +1931,10 @@ class GenerateResponseState {
1722
1931
  }
1723
1932
  return shouldReloadEvaluationState;
1724
1933
  }
1934
+ handleShouldRerender() {
1935
+ this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
1936
+ return this.shouldRerender;
1937
+ }
1725
1938
  updateShouldContextShift() {
1726
1939
  this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
1727
1940
  return this.shouldContextShift;
@@ -1867,6 +2080,9 @@ class SegmentHandler {
1867
2080
  isSegmentTypeOpen(type) {
1868
2081
  return this._segmentsStackSet.has(type);
1869
2082
  }
2083
+ get topOpenSegmentType() {
2084
+ return this._segmentsStack.at(-1);
2085
+ }
1870
2086
  _processTokens(tokens, text) {
1871
2087
  const queuedTokenRelease = this._streamRegulator.addChunk({
1872
2088
  tokens,
@@ -2065,17 +2281,16 @@ class SegmentHandler {
2065
2281
  this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
2066
2282
  }
2067
2283
  else {
2068
- if (lastSegment instanceof Array) {
2069
- const text = (this.onResponseChunk != null || this.onTextChunk != null)
2070
- ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2071
- : "";
2284
+ const text = (this.onResponseChunk != null || this.onTextChunk != null)
2285
+ ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
2286
+ : "";
2287
+ if (lastSegment instanceof Array)
2072
2288
  pushAll(lastSegment, tokens);
2073
- this.onToken?.(tokens);
2074
- this.onTextChunk?.(text);
2075
- this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
2076
- }
2077
2289
  else
2078
2290
  this._segments.push(tokens);
2291
+ this.onToken?.(tokens.slice());
2292
+ this.onTextChunk?.(text);
2293
+ this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
2079
2294
  }
2080
2295
  if (lastContextWindowSegment == null)
2081
2296
  this._contextWindowSegments.push(tokens.slice());