npm - node-llama-cpp - Versions diffs - 3.10.0 → 3.12.0 - Mend

node-llama-cpp 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +1 -1
package/dist/bindings/AddonTypes.d.ts +4 -2
package/dist/bindings/Llama.d.ts +5 -1
package/dist/bindings/Llama.js +22 -3
package/dist/bindings/Llama.js.map +1 -1
package/dist/bindings/getLlama.d.ts +40 -2
package/dist/bindings/getLlama.js +16 -7
package/dist/bindings/getLlama.js.map +1 -1
package/dist/bindings/types.d.ts +6 -2
package/dist/bindings/types.js +16 -1
package/dist/bindings/types.js.map +1 -1
package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +1 -1
package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +1 -1
package/dist/bindings/utils/getLlamaWithoutBackend.js +1 -1
package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -1
package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
package/dist/chatWrappers/HarmonyChatWrapper.js +527 -0
package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +4 -2
package/dist/chatWrappers/utils/resolveChatWrapper.js +21 -6
package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
package/dist/cli/commands/ChatCommand.d.ts +2 -1
package/dist/cli/commands/ChatCommand.js +21 -7
package/dist/cli/commands/ChatCommand.js.map +1 -1
package/dist/cli/commands/CompleteCommand.d.ts +2 -1
package/dist/cli/commands/CompleteCommand.js +21 -7
package/dist/cli/commands/CompleteCommand.js.map +1 -1
package/dist/cli/commands/InfillCommand.d.ts +2 -1
package/dist/cli/commands/InfillCommand.js +21 -7
package/dist/cli/commands/InfillCommand.js.map +1 -1
package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +23 -2
package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +16 -5
package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
package/dist/cli/recommendedModels.js +22 -0
package/dist/cli/recommendedModels.js.map +1 -1
package/dist/config.d.ts +1 -1
package/dist/evaluator/LlamaChat/LlamaChat.js +246 -31
package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +2 -2
package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
package/dist/evaluator/LlamaCompletion.js +2 -2
package/dist/evaluator/LlamaCompletion.js.map +1 -1
package/dist/evaluator/LlamaContext/LlamaContext.js +17 -17
package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +5 -5
package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -1
package/dist/evaluator/LlamaEmbeddingContext.js +1 -1
package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
package/dist/evaluator/LlamaModel/LlamaModel.js +3 -3
package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
package/dist/evaluator/LlamaRankingContext.js +1 -1
package/dist/evaluator/LlamaRankingContext.js.map +1 -1
package/dist/gguf/fileReaders/GgufFsFileReader.js +1 -1
package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -1
package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +1 -1
package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
package/dist/gguf/insights/GgufInsights.js +22 -3
package/dist/gguf/insights/GgufInsights.js.map +1 -1
package/dist/gguf/types/GgufMetadataTypes.d.ts +25 -2
package/dist/gguf/types/GgufMetadataTypes.js +23 -0
package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
package/dist/gguf/types/GgufTensorInfoTypes.d.ts +2 -1
package/dist/gguf/types/GgufTensorInfoTypes.js +1 -0
package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
package/dist/gguf/utils/ggufQuantNames.js +1 -0
package/dist/gguf/utils/ggufQuantNames.js.map +1 -1
package/dist/index.d.ts +4 -3
package/dist/index.js +2 -1
package/dist/index.js.map +1 -1
package/dist/tsconfig.tsbuildinfo +1 -1
package/dist/types.d.ts +144 -2
package/dist/types.js.map +1 -1
package/dist/utils/LruCache.d.ts +1 -1
package/dist/utils/ReplHistory.js +1 -1
package/dist/utils/ReplHistory.js.map +1 -1
package/dist/utils/gbnfJson/types.d.ts +1 -1
package/dist/utils/gbnfJson/types.js.map +1 -1
package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +2 -0
package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
package/dist/utils/getBuildDefaults.d.ts +1 -1
package/dist/utils/getChatWrapperSegmentDefinition.js +2 -0
package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -1
package/dist/utils/isLockfileActive.js +2 -2
package/dist/utils/isLockfileActive.js.map +1 -1
package/dist/utils/utilTypes.d.ts +10 -0
package/dist/utils/waitForLockfileRelease.js +3 -3
package/dist/utils/waitForLockfileRelease.js.map +1 -1
package/dist/utils/withLockfile.js +1 -1
package/dist/utils/withLockfile.js.map +1 -1
package/llama/addon/addon.cpp +31 -0
package/llama/binariesGithubRelease.json +1 -1
package/llama/gitRelease.bundle +0 -0
package/llama/llama.cpp.info.json +1 -1
package/package.json +22 -20
package/templates/packed/electron-typescript-react.json +1 -1

package/dist/evaluator/LlamaChat/LlamaChat.js CHANGED Viewed

@@ -114,7 +114,7 @@ export class LlamaChat {
         });
         if (generateResponseState.grammar != null && generateResponseState.functionsEnabled)
             throw new Error("Using both grammar and functions is not supported yet");
-        return await withLock(this._chatLock, "evaluate", signal, async () => {
+        return await withLock([this._chatLock, "evaluate"], signal, async () => {
             try {
                 generateResponseState.ensureLastHistoryItemIsModel();
                 generateResponseState.ensureReopenedThoughtSegmentAfterFunctionCallsIfNeeded();
@@ -125,8 +125,11 @@ export class LlamaChat {
                 const loadContextWindowForBudgetTriggers = async () => loadContextWindow(false);
                 while (true) {
                     generateResponseState.startTokenLoop();
+                    generateResponseState.handleRerender();
+                    const shouldHandlePrefixTriggers = generateResponseState.isRerender;
                     generateResponseState.canAvoidReloadingHistory = false;
                     await loadContextWindow();
+                    generateResponseState.isRerender = false;
                     generateResponseState.addStopGenerationTriggersFromChatWrapper();
                     if (generateResponseState.generatedTokens === 0) {
                         generateResponseState.addIgnoreStartTextTriggersFromChatWrapper();
@@ -134,6 +137,11 @@ export class LlamaChat {
                             generateResponseState.initFunctions();
                         }
                     }
+                    if (shouldHandlePrefixTriggers) {
+                        const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers(loadContextWindowForFunctionCallingLoop);
+                        if (handlePrefixTriggersRes != null)
+                            return handlePrefixTriggersRes;
+                    }
                     if (generateResponseState.functionEvaluationMode !== false) {
                         const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop(loadContextWindowForFunctionCallingLoop);
                         if (functionsCallsRes != null)
@@ -165,21 +173,21 @@ export class LlamaChat {
                         const maxTokensTriggerRes = generateResponseState.handleMaxTokensTrigger("model");
                         if (maxTokensTriggerRes != null)
                             return maxTokensTriggerRes;
-                        if (generateResponseState.updateShouldContextShift())
+                        if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
                             break;
                         if (await generateResponseState.handleBudgetTriggers()) {
                             await loadContextWindowForBudgetTriggers();
                             await generateResponseState.alignCurrentSequenceStateWithCurrentTokens();
                             await generateResponseState.createNewEvaluationIterator();
                         }
-                        if (generateResponseState.updateShouldContextShift())
+                        if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift())
                             break;
                         const abortRes = generateResponseState.handleAbortTrigger("model");
                         if (abortRes != null)
                             return abortRes;
                     }
                     generateResponseState.isFirstEvaluation = false;
-                    if (generateResponseState.shouldContextShift)
+                    if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift)
                         continue;
                     break;
                 }
@@ -230,16 +238,18 @@ export class LlamaChat {
                 minimumOverlapPercentageToPreventContextShift
             }
         });
-        return await withLock(this._chatLock, "evaluate", signal, async () => {
+        return await withLock([this._chatLock, "evaluate"], signal, async () => {
             try {
                 generateResponseState.ensureLastHistoryItemIsUser();
                 while (true) {
                     generateResponseState.startTokenLoop();
                     const { userTextSuffix } = await generateResponseState.loadContextWindow(mergeGeneratedResultWithChatHistory("user", generateResponseState.resolvedHistory, generateResponseState.segmentHandler.getModelResponseSegments()), mergeGeneratedResultWithChatHistory("user", generateResponseState.lastContextWindowHistory, generateResponseState.segmentHandler.getContextWindowModelResponseSegments()), true);
+                    generateResponseState.isRerender = false;
                     generateResponseState.functionEvaluationMode = false;
                     generateResponseState.addStopGenerationTriggersFromChatWrapper();
                     if (userTextSuffix != null && userTextSuffix.values.length > 0)
                         generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer));
+                    generateResponseState.rerenderTriggers.forEach((trigger) => (generateResponseState.stopGenerationDetector.addStopTrigger(StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer))));
                     allSegmentTypes
                         .map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType))
                         .filter((segmentDefinition) => segmentDefinition != null)
@@ -545,13 +555,13 @@ function generateContextTextThatEndsWithUserText(chatWrapper, options) {
         `There might be an issue with the chat wrapper "${chatWrapper.wrapperName}" ` +
         "where not all user messages are properly added to the the result LlamaText");
 }
-async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
+async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }) {
     if (sequence == null)
         throw new DisposedError();
     const model = sequence.model;
     const context = sequence.context;
     let removeRawFromHistory = false;
-    if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
+    if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
         const newContextWindow = lastEvaluationContextWindowHistory.slice();
         if (endWithUserText) {
             if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "user")
@@ -565,7 +575,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
                 type: "model",
                 response: []
             });
-        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
             chatHistory: newContextWindow,
             availableFunctions: functions,
             documentFunctionParams
@@ -574,7 +584,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
         if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
             const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
             const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
-            if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
+            if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
                 return {
                     history: newContextWindow,
                     stopGenerationTriggers,
@@ -584,7 +594,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
                     ignoreStartText: ignoreStartText ?? [],
                     functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
                     disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
-                    userTextSuffix
+                    userTextSuffix,
+                    prefixTriggers,
+                    noPrefixTrigger,
+                    rerender,
+                    detectFunctionCalls
                 };
         }
     }
@@ -607,7 +621,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
             functions,
             documentFunctionParams
         });
-        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
             chatHistory: compressedHistory,
             availableFunctions: functions,
             documentFunctionParams
@@ -621,11 +635,15 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
             ignoreStartText: ignoreStartText ?? [],
             functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
             disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
-            userTextSuffix
+            userTextSuffix,
+            prefixTriggers,
+            noPrefixTrigger,
+            rerender,
+            detectFunctionCalls
         };
     }
     {
-        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
             chatHistory: resolvedHistory,
             availableFunctions: functions,
             documentFunctionParams
@@ -641,7 +659,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
                 ignoreStartText: ignoreStartText ?? [],
                 functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
                 disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
-                userTextSuffix
+                userTextSuffix,
+                prefixTriggers,
+                noPrefixTrigger,
+                rerender,
+                detectFunctionCalls
             };
     }
     const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
@@ -658,7 +680,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
         functions,
         documentFunctionParams
     });
-    const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix } = generateContextText(endWithUserText, chatWrapper, {
+    const { contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = generateContextText(endWithUserText, chatWrapper, {
         chatHistory: compressedHistory,
         availableFunctions: functions,
         documentFunctionParams
@@ -672,7 +694,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
         ignoreStartText: ignoreStartText ?? [],
         functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
         disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [],
-        userTextSuffix
+        userTextSuffix,
+        prefixTriggers,
+        noPrefixTrigger,
+        rerender,
+        detectFunctionCalls
     };
 }
 class GenerateResponseState {
@@ -711,6 +737,7 @@ class GenerateResponseState {
     functionNameGrammar;
     functionsGrammar;
     functionsEvaluationState;
+    functionSyntaxStartDetectorEnabled = true;
     streamRegulator = new TokenStreamRegulator();
     stopGenerationDetector = new StopGenerationDetector();
     customStopGenerationTriggersDetector = new StopGenerationDetector();
@@ -723,6 +750,7 @@ class GenerateResponseState {
     res = [];
     pendingTokens = [];
     ignoredStartTextTokens = [];
+    prefixTriggerTokens = [];
     resFunctionCalls = [];
     segmentHandler;
     pendingPartialTokens = [];
@@ -735,12 +763,14 @@ class GenerateResponseState {
     releasedPartiallyFreeTokensBeforeFunctionCallStartSyntax = false;
     generatedTokens = 0;
     isFirstEvaluation = true;
+    isRerender = true; // first render is a rerender
     initiallyEngagedFunctionMode = false;
     lastContextWindowHistory;
     lastHistoryCompressionMetadata;
     restartEvaluationIterator = false;
     // context shift loop
     shouldContextShift = false;
+    shouldRerender = false;
     canAvoidReloadingHistory = false;
     contextWindowTokens = [];
     stopGenerationTriggers = [];
@@ -748,6 +778,11 @@ class GenerateResponseState {
     functionCallInitiallyEngaged = false;
     disengageInitiallyEngagedFunctionCall = [];
     userTextSuffix = undefined;
+    prefixTriggerDetectors = new Map();
+    noPrefixTrigger = undefined;
+    rerenderTriggers = [];
+    rerenderTriggerDetector = new StopGenerationDetector();
+    rerenderActions = undefined;
     tokens = [];
     // token evaluation loop
     evaluationIterator;
@@ -930,7 +965,8 @@ class GenerateResponseState {
             let mostExhaustiveTriggeredStopsLeftoverTokens = [];
             const lastTokensForDetokenizer = resolveLastTokens([
                 this.contextWindowTokens,
-                this.ignoredStartTextTokens
+                this.ignoredStartTextTokens,
+                this.prefixTriggerTokens
             ]);
             const pendingPartialTokens = [];
             for (let i = 0; i < this.pendingTokens.length; i++) {
@@ -993,6 +1029,16 @@ class GenerateResponseState {
         this.ensureNotAborted();
         this.shouldContextShift = false;
     }
+    handleRerender() {
+        if (this.shouldRerender) {
+            this.isRerender = true;
+            this.streamRegulator.reset();
+            if (this.rerenderActions === "closeResponseItem" && this.segmentHandler.topOpenSegmentType != null) {
+                this.segmentHandler.closeSegment(this.segmentHandler.topOpenSegmentType);
+                this.shouldRerender = false;
+            }
+        }
+    }
     getContextWindowFunctionCallsTokens() {
         if (this.functionEvaluationMode === false)
             return [];
@@ -1019,14 +1065,15 @@ class GenerateResponseState {
     async loadContextWindow(resolvedHistory, resolvedContextWindowsHistory, endWithUserText = false, avoidReloadingHistory = false) {
         const queuedChunkTokens = this.streamRegulator.getAllQueuedChunkTokens();
         const functionCallsTokens = this.getContextWindowFunctionCallsTokens();
-        if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || !this.llamaChat.sequence.isLoadedToMemory) {
-            const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix } = await getContextWindow({
+        if (!avoidReloadingHistory || !this.canAvoidReloadingHistory || this.isRerender || !this.llamaChat.sequence.isLoadedToMemory) {
+            const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, removeRawFromHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall, userTextSuffix, prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls } = await getContextWindow({
                 resolvedHistory: resolvedHistory,
                 resolvedContextShift: this.resolvedContextShift,
                 lastHistoryCompressionMetadata: this.lastHistoryCompressionMetadata,
-                pendingTokensCount: this.pendingTokens.length + queuedChunkTokens.length + functionCallsTokens.length +
-                    this.pendingPartialTokens.length,
+                pendingTokensCount: this.prefixTriggerTokens.length + this.pendingTokens.length + queuedChunkTokens.length +
+                    functionCallsTokens.length + this.pendingPartialTokens.length,
                 isFirstEvaluation: this.isFirstEvaluation,
+                isRerender: this.isRerender,
                 chatWrapper: this.chatWrapper,
                 lastEvaluationContextWindowHistory: resolvedContextWindowsHistory,
                 minimumOverlapPercentageToPreventContextShift: this.minimumOverlapPercentageToPreventContextShift,
@@ -1043,6 +1090,45 @@ class GenerateResponseState {
             this.functionCallInitiallyEngaged = functionCallInitiallyEngaged;
             this.disengageInitiallyEngagedFunctionCall = disengageInitiallyEngagedFunctionCall;
             this.userTextSuffix = userTextSuffix;
+            if (this.isRerender) {
+                this.prefixTriggerTokens.length = 0;
+                for (const prefixDetector of this.prefixTriggerDetectors.keys()) {
+                    prefixDetector.clearInProgressStops();
+                    prefixDetector.clearTriggeredStops();
+                }
+                this.prefixTriggerDetectors.clear();
+                for (const trigger of prefixTriggers ?? []) {
+                    if (trigger.type === "functionCall" && !this.functionsEnabled)
+                        continue;
+                    const prefixDetector = new StopGenerationDetector();
+                    StopGenerationDetector.resolveStopTriggers(trigger.triggers, this.llamaChat.model.tokenizer)
+                        .forEach((stopTrigger) => prefixDetector.addStopTrigger(stopTrigger));
+                    this.prefixTriggerDetectors.set(prefixDetector, { inject: trigger.inject, trigger });
+                    const inject = trigger.inject;
+                    if (inject != null && inject.values.length > 0) {
+                        const fullPrefixDetector = new StopGenerationDetector();
+                        StopGenerationDetector
+                            .resolveStopTriggers(trigger.triggers.map((trigger) => LlamaText([trigger, inject])), this.llamaChat.model.tokenizer)
+                            .forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger));
+                        this.prefixTriggerDetectors.set(fullPrefixDetector, { trigger });
+                    }
+                }
+                this.noPrefixTrigger = noPrefixTrigger;
+                if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled)
+                    this.noPrefixTrigger = undefined;
+                this.rerenderTriggers = rerender?.triggers ?? [];
+                this.rerenderTriggerDetector.clearInProgressStops();
+                this.rerenderTriggerDetector.clearTriggeredStops();
+                this.rerenderTriggerDetector = new StopGenerationDetector();
+                this.rerenderActions = rerender?.action;
+                this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true;
+                if (!this.functionSyntaxStartDetectorEnabled)
+                    this.functionSyntaxStartDetector.clearInProgressStops();
+                if (rerender?.triggers != null) {
+                    StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer)
+                        .map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger));
+                }
+            }
             this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
             this.lastContextWindowHistory = contextWindowHistory;
             this.segmentHandler.resetContextWindow();
@@ -1055,6 +1141,7 @@ class GenerateResponseState {
         this.tokens = [
             ...this.contextWindowTokens,
             ...this.ignoredStartTextTokens,
+            ...this.prefixTriggerTokens,
             ...this.pendingTokens,
             ...queuedChunkTokens,
             ...functionCallsTokens,
@@ -1090,6 +1177,119 @@ class GenerateResponseState {
             this.restartEvaluationIterator = true;
         }
     }
+    async handlePrefixTriggers(loadContextWindow) {
+        const reloadTokens = async () => {
+            this.startTokenLoop();
+            await loadContextWindow();
+        };
+        const injectTokens = async (text, alignStateTokens = false) => {
+            if (text == null)
+                return;
+            const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace");
+            if (tokens.length === 0)
+                return;
+            pushAll(this.prefixTriggerTokens, tokens);
+            if (alignStateTokens)
+                await reloadTokens();
+        };
+        if (this.prefixTriggerDetectors.size === 0) {
+            if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
+                await injectTokens(this.noPrefixTrigger.inject, true);
+                this.functionEvaluationMode = "functionName";
+            }
+            else if (this.noPrefixTrigger?.type === "segment") {
+                await injectTokens(this.noPrefixTrigger.inject, true);
+                this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
+            }
+            else if (this.noPrefixTrigger?.type === "response")
+                await injectTokens(this.noPrefixTrigger.inject, true);
+            return undefined;
+        }
+        const generatedTokens = [];
+        let isFirstToken = true;
+        let continueGeneration = true;
+        for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) {
+            pushAll(generatedTokens, tokens);
+            for (const [triggerDetector, { trigger, inject }] of [...this.prefixTriggerDetectors.entries()]) {
+                triggerDetector.recordGeneration({
+                    text: this.currentText,
+                    tokens: this.currentTokens,
+                    startNewChecks: isFirstToken,
+                    triggerMustStartWithGeneration: true
+                });
+                if (triggerDetector.hasTriggeredStops) {
+                    const { firstRemainingGenerationAfterStop, stopTrigger } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops());
+                    const remainingTokens = typeof firstRemainingGenerationAfterStop === "string"
+                        ? firstRemainingGenerationAfterStop === ""
+                            ? []
+                            : this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace")
+                        : (firstRemainingGenerationAfterStop ?? []);
+                    const triggerTokens = (stopTrigger == null || remainingTokens.length === 0)
+                        ? generatedTokens
+                        : stopTrigger.flatMap((item) => {
+                            if (typeof item === "string")
+                                return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace");
+                            return [item];
+                        });
+                    this.streamRegulator.reset();
+                    if (trigger.type === "segment") {
+                        pushAll(this.prefixTriggerTokens, triggerTokens);
+                        if (inject != null)
+                            await injectTokens(inject);
+                        await reloadTokens();
+                        this.segmentHandler.openSegment(trigger.segmentType);
+                    }
+                    else if (trigger.type === "response") {
+                        pushAll(this.prefixTriggerTokens, triggerTokens);
+                        if (inject != null)
+                            await injectTokens(inject);
+                        await reloadTokens();
+                    }
+                    else if (trigger.type === "functionCall") {
+                        if (trigger.replaceTrigger === false)
+                            pushAll(this.prefixTriggerTokens, triggerTokens);
+                        if (inject != null)
+                            await injectTokens(inject);
+                        await reloadTokens();
+                        this.functionEvaluationMode = "functionName";
+                    }
+                    else
+                        void trigger;
+                    this.prefixTriggerDetectors.clear();
+                    continueGeneration = false;
+                    break;
+                }
+                else if (!triggerDetector.hasInProgressStops)
+                    this.prefixTriggerDetectors.delete(triggerDetector);
+            }
+            if (this.prefixTriggerDetectors.size === 0 && continueGeneration) {
+                this.streamRegulator.reset();
+                continueGeneration = false;
+                if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) {
+                    await injectTokens(this.noPrefixTrigger.inject, true);
+                    this.functionEvaluationMode = "functionName";
+                }
+                else if (this.noPrefixTrigger?.type === "segment") {
+                    await injectTokens(this.noPrefixTrigger.inject, true);
+                    this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType);
+                }
+                else if (this.noPrefixTrigger?.type === "response")
+                    await injectTokens(this.noPrefixTrigger.inject, true);
+                else
+                    this.streamRegulator.addChunk({
+                        tokens: generatedTokens,
+                        text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens())
+                    });
+            }
+            isFirstToken = false;
+            if (!continueGeneration)
+                break;
+            const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model");
+            if (stopRes != null)
+                return stopRes;
+        }
+        return undefined;
+    }
     async enterFunctionCallingLoop(loadContextWindow) {
         if (!this.functionsEnabled) {
             this.functionEvaluationMode = false;
@@ -1568,6 +1768,8 @@ class GenerateResponseState {
         }
     }
     detectAndHandleFunctionStartSyntax() {
+        if (!this.functionSyntaxStartDetectorEnabled)
+            return;
         this.functionSyntaxStartDetector.recordGeneration({
             text: this.currentText,
             tokens: this.currentTokens,
@@ -1592,6 +1794,11 @@ class GenerateResponseState {
         }
     }
     recordStopGenerationEvaluation() {
+        this.rerenderTriggerDetector.recordGeneration({
+            text: this.currentText,
+            tokens: this.currentTokens,
+            queuedTokenRelease: this.currentQueuedTokenRelease
+        });
         this.stopGenerationDetector.recordGeneration({
             text: this.currentText,
             tokens: this.currentTokens,
@@ -1609,8 +1816,10 @@ class GenerateResponseState {
         pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
     }
     handleStopGenerationTrigger(lastHistoryItemType, forceStopReason) {
-        if (this.stopGenerationDetector.hasTriggeredStops || this.customStopGenerationTriggersDetector.hasTriggeredStops ||
-            this.llamaChat.model.isEogToken(this.currentToken) || forceStopReason != null) {
+        const detectedStopGenerationTrigger = this.stopGenerationDetector.hasTriggeredStops ||
+            this.customStopGenerationTriggersDetector.hasTriggeredStops ||
+            this.llamaChat.model.isEogToken(this.currentToken);
+        if ((detectedStopGenerationTrigger && !this.rerenderTriggerDetector.hasTriggeredStops) || forceStopReason != null) {
             this.stopGenerationDetector.clearInProgressStops();
             this.customStopGenerationTriggersDetector.clearInProgressStops();
             pushAll(this.pendingTokens, this.streamRegulator.popFreeChunkTokens());
@@ -1722,6 +1931,10 @@ class GenerateResponseState {
         }
         return shouldReloadEvaluationState;
     }
+    handleShouldRerender() {
+        this.shouldRerender = this.rerenderTriggerDetector.hasTriggeredStops;
+        return this.shouldRerender;
+    }
     updateShouldContextShift() {
         this.shouldContextShift = this.llamaChat.sequence.nextTokenIndex >= this.llamaChat.context.contextSize - 1;
         return this.shouldContextShift;
@@ -1867,6 +2080,9 @@ class SegmentHandler {
     isSegmentTypeOpen(type) {
         return this._segmentsStackSet.has(type);
     }
+    get topOpenSegmentType() {
+        return this._segmentsStack.at(-1);
+    }
     _processTokens(tokens, text) {
         const queuedTokenRelease = this._streamRegulator.addChunk({
             tokens,
@@ -2065,17 +2281,16 @@ class SegmentHandler {
                 this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
             }
             else {
-                if (lastSegment instanceof Array) {
-                    const text = (this.onResponseChunk != null || this.onTextChunk != null)
-                        ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
-                        : "";
+                const text = (this.onResponseChunk != null || this.onTextChunk != null)
+                    ? this.model.detokenize(tokens, false, this._getTokenTrailFromResult())
+                    : "";
+                if (lastSegment instanceof Array)
                     pushAll(lastSegment, tokens);
-                    this.onToken?.(tokens);
-                    this.onTextChunk?.(text);
-                    this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens, text });
-                }
                 else
                     this._segments.push(tokens);
+                this.onToken?.(tokens.slice());
+                this.onTextChunk?.(text);
+                this.onResponseChunk?.({ type: undefined, segmentType: undefined, tokens: tokens.slice(), text });
             }
             if (lastContextWindowSegment == null)
                 this._contextWindowSegments.push(tokens.slice());