npm - node-llama-cpp - Versions diffs - 3.0.0-beta.1 → 3.0.0-beta.3 - Mend

node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/dist/llamaEvaluator/LlamaChat/LlamaChat.js ADDED Viewed

@@ -0,0 +1,704 @@
+import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
+import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
+import { removeNullFields } from "../../utils/removeNullFields.js";
+import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
+import { AbortError } from "../../AbortError.js";
+import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
+import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
+import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
+import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
+const defaultContextShiftOptions = {
+    size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
+    strategy: "eraseFirstResponseAndKeepFirstSystem",
+    lastEvaluationMetadata: null
+};
+const UNKNOWN_UNICODE_CHAR = "\ufffd";
+export class LlamaChat {
+    /** @internal */ _chatWrapper;
+    /** @internal */ _disposeAggregator = new DisposeAggregator();
+    /** @internal */ _autoDisposeSequence;
+    /** @internal */ _sequence;
+    onDispose = new EventRelay();
+    constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
+        if (contextSequence == null)
+            throw new Error("contextSequence cannot be null");
+        if (contextSequence.disposed)
+            throw new DisposedError();
+        this._sequence = contextSequence;
+        this._autoDisposeSequence = autoDisposeSequence;
+        this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
+            this.dispose();
+        }));
+        this._disposeAggregator.add(this.onDispose.dispatchEvent);
+        this._chatWrapper = resolveChatWrapper(chatWrapper, contextSequence.model);
+    }
+    dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
+        if (this._sequence == null)
+            return;
+        if (disposeSequence)
+            this._sequence.dispose();
+        this._sequence = null;
+        this._disposeAggregator.dispose();
+    }
+    /** @hidden */
+    [Symbol.dispose]() {
+        return this.dispose();
+    }
+    get disposed() {
+        return this._sequence == null;
+    }
+    get chatWrapper() {
+        if (this._sequence == null)
+            throw new DisposedError();
+        return this._chatWrapper;
+    }
+    get sequence() {
+        if (this._sequence == null)
+            throw new DisposedError();
+        return this._sequence;
+    }
+    get context() {
+        return this.sequence.context;
+    }
+    get model() {
+        return this.sequence.model;
+    }
+    async generateResponse(history, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
+        const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
+        if (grammar != null && functionsEnabled)
+            throw new Error("Using both grammar and functions is not supported yet");
+        if (signal?.aborted)
+            throw new AbortError();
+        if (this._sequence == null)
+            throw new DisposedError();
+        let resolvedHistory = this._sequence.isLoadedToMemory
+            ? history.slice()
+            : history.map(removeRawFromHistoryItem);
+        if (resolvedHistory.length === 0 || resolvedHistory[resolvedHistory.length - 1].type !== "model")
+            resolvedHistory.push({
+                type: "model",
+                response: []
+            });
+        const model = this._sequence.model;
+        const context = this._sequence.context;
+        const eosToken = model.tokens.eos;
+        const resolvedContextShift = {
+            ...defaultContextShiftOptions,
+            ...removeNullFields(contextShift)
+        };
+        const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
+            ? { lastTokens: 0 }
+            : repeatPenalty;
+        const lastModelResponse = getLastTextModelResponseFromChatHistory(resolvedHistory);
+        const res = [];
+        const pendingTokens = [];
+        let ignoredStartTextTokens = [];
+        const functionCallTokens = [];
+        const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
+        const grammarEvaluationState = grammar != null
+            ? new LlamaGrammarEvaluationState({ grammar })
+            : undefined;
+        let functionsGrammar = functionsEnabled
+            ? new FunctionCallGrammar(functions, this._chatWrapper, false)
+            : undefined;
+        let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
+            ? new LlamaGrammarEvaluationState({
+                grammar: functionsGrammar
+            })
+            : undefined;
+        const streamRegulator = new TokenStreamRegulator();
+        const stopGenerationDetector = new StopGenerationDetector();
+        const functionSyntaxStartDetector = new StopGenerationDetector();
+        const functionSyntaxEndDetector = new StopGenerationDetector();
+        const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
+        const ignoreStartTextDetector = new StopGenerationDetector();
+        const locksToReleaseOnValidGeneration = [];
+        const functionCallTokenSyntaxLocks = [];
+        let generatedTokens = 0;
+        let isFirstEvaluation = true;
+        let inFunctionEvaluationMode = false;
+        let initiallyEngagedFunctionMode = false;
+        let lastContextWindowHistory = resolvedHistory;
+        let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
+        const ensureNotAborted = () => {
+            if (signal?.aborted)
+                throw new AbortError();
+            if (this._sequence == null)
+                throw new DisposedError();
+        };
+        const getPenaltyTokens = () => {
+            if (this._sequence == null)
+                throw new DisposedError();
+            let punishTokens = res.slice(-repeatPenaltyLastTokens);
+            if (punishTokensFilter != null)
+                punishTokens = punishTokensFilter(punishTokens);
+            if (!penalizeNewLine) {
+                const nlToken = model.tokens.nl;
+                if (nlToken != null)
+                    punishTokens = punishTokens.filter(token => token !== nlToken);
+            }
+            return punishTokens;
+        };
+        const getResolvedHistoryWithCurrentModelResponse = () => {
+            if (res.length === 0)
+                return resolvedHistory;
+            let modelResponse = model.detokenize(res);
+            if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
+                modelResponse = modelResponse.trimEnd();
+            if (modelResponse === "")
+                return resolvedHistory;
+            return setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse);
+        };
+        const removeFoundStartIgnoreTextsFromPendingTokens = () => {
+            if (res.length === 0 && pendingTokens.length > 0) {
+                ignoreStartTextDetector.clearInProgressStops();
+                ignoreStartTextDetector.clearTriggeredStops();
+                let mostExhaustiveTriggeredStops = null;
+                for (let i = 0; i < pendingTokens.length; i++) {
+                    ignoreStartTextDetector.recordGeneration({
+                        text: model.detokenize([pendingTokens[i]]),
+                        tokens: [pendingTokens[i]],
+                        startNewChecks: i === 0
+                    });
+                    if (ignoreStartTextDetector.hasTriggeredStops) {
+                        mostExhaustiveTriggeredStops = ignoreStartTextDetector.getTriggeredStops();
+                        ignoreStartTextDetector.clearTriggeredStops();
+                    }
+                    else if (!ignoreStartTextDetector.hasInProgressStops)
+                        break;
+                }
+                if (mostExhaustiveTriggeredStops != null) {
+                    const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
+                    if (mostExhaustiveTriggeredStop != null) {
+                        ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
+                            .map((stopTrigger) => {
+                            if (typeof stopTrigger === "string")
+                                return model.tokenize(stopTrigger);
+                            else
+                                return [stopTrigger];
+                        })
+                            .flat(1);
+                        const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
+                            .map((generation) => {
+                            if (typeof generation === "string")
+                                return model.tokenize(generation);
+                            else
+                                return generation;
+                        })
+                            .flat(1);
+                        pendingTokens.length = 0;
+                        pendingTokens.push(...newPendingTokens);
+                    }
+                }
+            }
+        };
+        if (grammar != null)
+            StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
+                .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
+        if (functions != null && Object.keys(functions).length > 0)
+            functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
+        // eslint-disable-next-line no-constant-condition
+        while (true) {
+            ensureNotAborted();
+            let shouldContextShift = false;
+            const queuedChunkTokens = streamRegulator.getAllQueuedChunkTokens();
+            const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall } = await getContextWindow({
+                resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
+                resolvedContextShift,
+                lastHistoryCompressionMetadata,
+                pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
+                isFirstEvaluation,
+                chatWrapper: this._chatWrapper,
+                lastEvaluationContextWindowHistory,
+                minimumOverlapPercentageToPreventContextShift,
+                sequence: this._sequence,
+                minFreeContextTokens: 1,
+                functions: functionsEnabled ? functions : undefined,
+                documentFunctionParams
+            });
+            ensureNotAborted();
+            if (generatedTokens === 0) {
+                StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
+                    .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
+                if (functionsEnabled) {
+                    initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
+                    StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
+                        .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
+                    if (initiallyEngagedFunctionMode) {
+                        inFunctionEvaluationMode = true;
+                        functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, true);
+                        functionsEvaluationState = new LlamaGrammarEvaluationState({
+                            grammar: functionsGrammar
+                        });
+                    }
+                }
+            }
+            const tokens = [...contextWindowTokens, ...ignoredStartTextTokens, ...pendingTokens, ...queuedChunkTokens];
+            resolvedHistory = newResolvedHistory;
+            lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
+            lastContextWindowHistory = contextWindowHistory;
+            const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
+            const contextWindowsRes = [];
+            StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
+                .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
+            if (functionsGrammar != null)
+                StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
+                    .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
+            let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
+            // we need to decode at least one token to generate a response
+            if (firstDifferentIndex === tokens.length && firstDifferentIndex > 0)
+                firstDifferentIndex -= 1;
+            tokens.splice(0, firstDifferentIndex);
+            if (firstDifferentIndex < this._sequence.nextTokenIndex) {
+                await this._sequence.eraseContextTokenRanges([{
+                        start: firstDifferentIndex,
+                        end: this._sequence.nextTokenIndex
+                    }]);
+                ensureNotAborted();
+            }
+            const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
+                temperature, topK, topP,
+                grammarEvaluationState: () => {
+                    if (inFunctionEvaluationMode)
+                        return functionsEvaluationState;
+                    return grammarEvaluationState;
+                },
+                repeatPenalty: !repeatPenaltyEnabled ? undefined : {
+                    punishTokens: getPenaltyTokens,
+                    penalty,
+                    frequencyPenalty,
+                    presencePenalty
+                },
+                evaluationPriority,
+                yieldEosToken: true
+            }));
+            for await (const token of evaluationIterator) {
+                ensureNotAborted();
+                generatedTokens++;
+                const tokens = [token];
+                const text = model.detokenize([token]);
+                const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
+                if (initiallyEngagedFunctionMode)
+                    disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
+                if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
+                    locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
+                }
+                else {
+                    while (locksToReleaseOnValidGeneration.length > 0)
+                        locksToReleaseOnValidGeneration.shift().dispose();
+                }
+                functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
+                if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
+                    initiallyEngagedFunctionMode = false;
+                    let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
+                    if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
+                        const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
+                        try {
+                            const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
+                                enableInternalBuiltinFunctions: true,
+                                initialFunctionCallEngaged: true
+                            });
+                            const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
+                            if (internalBuiltinFunctions[functionName] != null) {
+                                shouldStopFunctionEvaluationMode = true;
+                            }
+                        }
+                        catch (err) {
+                            if (!(err instanceof LlamaFunctionCallValidationError))
+                                throw err;
+                        }
+                    }
+                    if (shouldStopFunctionEvaluationMode) {
+                        inFunctionEvaluationMode = false;
+                        functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, false);
+                        functionsEvaluationState = new LlamaGrammarEvaluationState({
+                            grammar: functionsGrammar
+                        });
+                        functionCallTokens.length = 0;
+                        while (functionCallTokenSyntaxLocks.length > 0)
+                            functionCallTokenSyntaxLocks.shift().dispose();
+                        functionSyntaxStartDetector.clearInProgressStops();
+                        functionSyntaxStartDetector.clearTriggeredStops();
+                        functionSyntaxEndDetector.clearInProgressStops();
+                        functionSyntaxEndDetector.clearTriggeredStops();
+                    }
+                }
+                if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
+                    functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
+                    inFunctionEvaluationMode = true;
+                    functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
+                    stopGenerationDetector.clearTriggeredStops();
+                    stopGenerationDetector.clearInProgressStops();
+                    pendingTokens.push(...streamRegulator.popFreeChunkTokens());
+                    const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
+                    const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
+                    const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
+                    pendingTokens.push(...queuedTokensBeforeStopTrigger);
+                    const [firstRemainingGenerationAfterStop] = triggeredStops
+                        .map((stopTrigger) => stopTrigger.remainingGenerations)
+                        .filter((remainingGenerations) => remainingGenerations.length > 0)
+                        .flat(1);
+                    const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
+                        ? ""
+                        : typeof firstRemainingGenerationAfterStop === "string"
+                            ? firstRemainingGenerationAfterStop
+                            : model.detokenize(firstRemainingGenerationAfterStop);
+                    functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
+                    for (const functionCallToken of functionCallTokens)
+                        context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
+                }
+                else if (inFunctionEvaluationMode) {
+                    functionCallTokens.push(...tokens);
+                    functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
+                    functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
+                }
+                if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
+                    const functionCallText = model.detokenize(functionCallTokens);
+                    const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
+                    let modelResponse = model.detokenize(res);
+                    let contextWindowModelResponse = model.detokenize(contextWindowsRes);
+                    if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
+                        modelResponse = modelResponse.trimEnd();
+                        contextWindowModelResponse = contextWindowModelResponse.trimEnd();
+                    }
+                    return {
+                        response: modelResponse,
+                        lastEvaluation: {
+                            contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
+                            cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
+                            contextShiftMetadata: lastHistoryCompressionMetadata
+                        },
+                        // prevent infinite TS type instantiation
+                        functionCall: functionCall,
+                        metadata: {
+                            stopReason: "functionCall"
+                        }
+                    };
+                }
+                if (!inFunctionEvaluationMode)
+                    stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
+                pendingTokens.push(...streamRegulator.popFreeChunkTokens());
+                removeFoundStartIgnoreTextsFromPendingTokens();
+                if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
+                    const triggeredStops = stopGenerationDetector.getTriggeredStops();
+                    const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
+                    const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
+                    pendingTokens.push(...queuedTokensBeforeStopTrigger);
+                    const [firstRemainingGenerationAfterStop] = triggeredStops
+                        .map((stopTrigger) => stopTrigger.remainingGenerations)
+                        .filter((remainingGenerations) => remainingGenerations.length > 0)
+                        .flat(1);
+                    removeFoundStartIgnoreTextsFromPendingTokens();
+                    if (pendingTokens.length > 0)
+                        onToken?.(pendingTokens.slice());
+                    res.push(...pendingTokens);
+                    contextWindowsRes.push(...pendingTokens);
+                    pendingTokens.length = 0;
+                    let modelResponse = model.detokenize(res);
+                    let contextWindowModelResponse = model.detokenize(contextWindowsRes);
+                    if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
+                        modelResponse = modelResponse.trimEnd();
+                        contextWindowModelResponse = contextWindowModelResponse.trimEnd();
+                    }
+                    return {
+                        response: modelResponse,
+                        lastEvaluation: {
+                            contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
+                            cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
+                            contextShiftMetadata: lastHistoryCompressionMetadata
+                        },
+                        metadata: {
+                            remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
+                            stopReason: token === eosToken
+                                ? "eosToken"
+                                : "stopGenerationTrigger"
+                        }
+                    };
+                }
+                const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
+                if (res.length === 0) {
+                    ignoreStartTextDetector.clearInProgressStops();
+                    ignoreStartTextDetector.clearTriggeredStops();
+                    ignoreStartTextDetector.recordGeneration({
+                        text: model.detokenize(pendingTokens),
+                        tokens: pendingTokens
+                    });
+                }
+                if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
+                    removeFoundStartIgnoreTextsFromPendingTokens();
+                    if (pendingTokens.length > 0) {
+                        onToken?.(pendingTokens.slice());
+                        res.push(...pendingTokens);
+                        contextWindowsRes.push(...pendingTokens);
+                        pendingTokens.length = 0;
+                    }
+                }
+                if (maxTokensTriggered) {
+                    let modelResponse = model.detokenize(res);
+                    let contextWindowModelResponse = model.detokenize(contextWindowsRes);
+                    if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
+                        modelResponse = modelResponse.trimEnd();
+                        contextWindowModelResponse = contextWindowModelResponse.trimEnd();
+                    }
+                    return {
+                        response: modelResponse,
+                        lastEvaluation: {
+                            contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
+                            cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
+                            contextShiftMetadata: lastHistoryCompressionMetadata
+                        },
+                        metadata: {
+                            stopReason: "maxTokens"
+                        }
+                    };
+                }
+                if (this._sequence.nextTokenIndex >= context.contextSize) {
+                    shouldContextShift = true;
+                    break;
+                }
+            }
+            isFirstEvaluation = false;
+            if (shouldContextShift)
+                continue;
+            break;
+        }
+        throw new Error("The context size is too small to generate a response");
+    }
+}
+function removeRawFromHistoryItem(historyItem) {
+    if (historyItem.type === "model") {
+        const newHistoryItem = { ...historyItem };
+        newHistoryItem.response = newHistoryItem.response.map((item) => {
+            if (typeof item === "string")
+                return item;
+            else
+                return {
+                    ...item,
+                    raw: undefined
+                };
+        });
+        return newHistoryItem;
+    }
+    return historyItem;
+}
+async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
+    function checkIfHistoryFitsContext(history) {
+        const { contextText } = chatWrapper.generateContextText(history, {
+            availableFunctions: functions,
+            documentFunctionParams
+        });
+        const tokens = contextText.tokenize(tokenizer);
+        return tokens.length <= contextSize - contextShiftSize;
+    }
+    if (contextSize - contextShiftSize <= 0)
+        throw new Error(`The context size (${contextSize}) is too small to fit the context shift size (${contextShiftSize})`);
+    if (checkIfHistoryFitsContext(history))
+        return {
+            compressedHistory: history,
+            metadata: null
+        };
+    if (contextShiftStrategy instanceof Function) {
+        try {
+            const { chatHistory, metadata } = await contextShiftStrategy({
+                chatHistory: history,
+                maxTokensCount: contextSize - contextShiftSize,
+                tokenizer,
+                chatWrapper,
+                lastShiftMetadata: contextShiftLastEvaluationMetadata
+            });
+            if (checkIfHistoryFitsContext(chatHistory))
+                return {
+                    compressedHistory: chatHistory,
+                    metadata
+                };
+            console.warn("The provided context shift strategy did not return a history that fits the context size. " +
+                "Using the default strategy instead.");
+        }
+        catch (err) {
+            console.error("The provided context shift strategy threw an error. " +
+                "Using the default strategy instead.", err);
+        }
+    }
+    else if (contextShiftStrategy !== "eraseFirstResponseAndKeepFirstSystem")
+        console.warn(`Unknown context shift strategy "${contextShiftStrategy}". ` +
+            "Using the default strategy instead.");
+    const { chatHistory, metadata } = await eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({
+        chatHistory: history,
+        maxTokensCount: contextSize - contextShiftSize,
+        tokenizer,
+        chatWrapper,
+        lastShiftMetadata: contextShiftLastEvaluationMetadata
+    });
+    if (!checkIfHistoryFitsContext(chatHistory))
+        throw new Error("The default context shift strategy did not return a history that fits the context size");
+    return {
+        compressedHistory: chatHistory,
+        metadata
+    };
+}
+function getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, tokenizer) {
+    if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length === 0)
+        return [];
+    else if (partiallyFreeTokens.tokens.length !== 0 && partiallyFreeTokens.text.length === 0)
+        return partiallyFreeTokens.tokens;
+    else if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length !== 0)
+        return tokenizer(partiallyFreeTokens.text);
+    const triggerThatStartsWithStringIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] === "string");
+    const triggerThatStartsWithTokenIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] !== "string");
+    if (triggerThatStartsWithTokenIndex > 0 && triggerThatStartsWithStringIndex < 0)
+        return partiallyFreeTokens.tokens;
+    else if (triggerThatStartsWithStringIndex > 0 && triggerThatStartsWithTokenIndex < 0)
+        return tokenizer(partiallyFreeTokens.text);
+    const stringTokens = tokenizer(partiallyFreeTokens.text);
+    if (stringTokens.length === partiallyFreeTokens.tokens.length &&
+        stringTokens.every((value, index) => value === partiallyFreeTokens.tokens[index]))
+        return stringTokens;
+    else if (triggerThatStartsWithStringIndex < triggerThatStartsWithTokenIndex)
+        return stringTokens;
+    return partiallyFreeTokens.tokens;
+}
+function getLastTextModelResponseFromChatHistory(chatHistory) {
+    if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
+        return "";
+    const lastModelResponseItem = chatHistory[chatHistory.length - 1];
+    const modelResponse = lastModelResponseItem.response;
+    if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
+        return modelResponse[modelResponse.length - 1];
+    return "";
+}
+function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
+    const newChatHistory = chatHistory.slice();
+    if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
+        newChatHistory.push({
+            type: "model",
+            response: []
+        });
+    const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
+    const newLastModelResponseItem = { ...lastModelResponseItem };
+    newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
+    const modelResponse = newLastModelResponseItem.response.slice();
+    newLastModelResponseItem.response = modelResponse;
+    if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
+        if (textResponse === "")
+            modelResponse.pop();
+        else
+            modelResponse[modelResponse.length - 1] = textResponse;
+    }
+    else if (textResponse !== "")
+        modelResponse.push(textResponse);
+    return newChatHistory;
+}
+async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams }) {
+    if (sequence == null)
+        throw new DisposedError();
+    const model = sequence.model;
+    const context = sequence.context;
+    if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
+        const newContextWindow = lastEvaluationContextWindowHistory.slice();
+        if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
+            newContextWindow.push({
+                type: "model",
+                response: []
+            });
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(newContextWindow, {
+            availableFunctions: functions,
+            documentFunctionParams
+        });
+        const tokens = contextText.tokenize(model.tokenize);
+        if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
+            const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
+            const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
+            if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
+                return {
+                    history: newContextWindow,
+                    stopGenerationTriggers,
+                    tokens,
+                    newResolvedHistory: resolvedHistory,
+                    newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
+                    ignoreStartText: ignoreStartText ?? [],
+                    functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
+                    disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
+                };
+        }
+    }
+    resolvedHistory = sequence.isLoadedToMemory
+        ? resolvedHistory.slice()
+        : resolvedHistory.map(removeRawFromHistoryItem);
+    if (resolvedContextShift.lastEvaluationMetadata != null) {
+        const contextShiftSize = resolvedContextShift.size instanceof Function
+            ? await resolvedContextShift.size(sequence)
+            : resolvedContextShift.size;
+        const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
+            history: resolvedHistory,
+            contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
+            contextShiftStrategy: resolvedContextShift.strategy,
+            contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
+            contextSize: context.contextSize,
+            tokenizer: model.tokenize,
+            chatWrapper: chatWrapper,
+            functions,
+            documentFunctionParams
+        });
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
+            availableFunctions: functions,
+            documentFunctionParams
+        });
+        return {
+            history: compressedHistory,
+            stopGenerationTriggers,
+            tokens: contextText.tokenize(model.tokenize),
+            newResolvedHistory: resolvedHistory,
+            newHistoryCompressionMetadata: metadata,
+            ignoreStartText: ignoreStartText ?? [],
+            functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
+            disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
+        };
+    }
+    {
+        const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(resolvedHistory, {
+            availableFunctions: functions,
+            documentFunctionParams
+        });
+        const tokens = contextText.tokenize(model.tokenize);
+        if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
+            return {
+                history: resolvedHistory,
+                stopGenerationTriggers,
+                tokens,
+                newResolvedHistory: resolvedHistory,
+                newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
+                ignoreStartText: ignoreStartText ?? [],
+                functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
+                disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
+            };
+    }
+    const contextShiftSize = resolvedContextShift.size instanceof Function
+        ? await resolvedContextShift.size(sequence)
+        : resolvedContextShift.size;
+    const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
+        history: resolvedHistory,
+        contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
+        contextShiftStrategy: resolvedContextShift.strategy,
+        contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
+        contextSize: context.contextSize,
+        tokenizer: model.tokenize,
+        chatWrapper: chatWrapper,
+        functions,
+        documentFunctionParams
+    });
+    const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
+        availableFunctions: functions,
+        documentFunctionParams
+    });
+    return {
+        history: compressedHistory,
+        stopGenerationTriggers,
+        tokens: contextText.tokenize(model.tokenize),
+        newResolvedHistory: resolvedHistory,
+        newHistoryCompressionMetadata: metadata,
+        ignoreStartText: ignoreStartText ?? [],
+        functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
+        disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
+    };
+}
+//# sourceMappingURL=LlamaChat.js.map