node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. package/README.md +2 -0
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  6. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  7. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  8. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +13 -0
  9. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  10. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  11. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  12. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  13. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  14. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  15. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  16. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  17. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  18. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  19. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  20. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  21. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  22. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  23. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  24. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  25. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  26. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +4 -4
  27. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +24 -16
  28. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
  29. package/dist/cli/commands/ChatCommand.d.ts +2 -1
  30. package/dist/cli/commands/ChatCommand.js +71 -33
  31. package/dist/cli/commands/ChatCommand.js.map +1 -1
  32. package/dist/config.js +1 -1
  33. package/dist/config.js.map +1 -1
  34. package/dist/index.d.ts +17 -10
  35. package/dist/index.js +16 -8
  36. package/dist/index.js.map +1 -1
  37. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -1
  38. package/dist/llamaEvaluator/LlamaChat/LlamaChat.d.ts +175 -0
  39. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +704 -0
  40. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +1 -0
  41. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +21 -0
  42. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +120 -0
  43. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  44. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  45. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +117 -0
  46. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  47. package/dist/llamaEvaluator/{LlamaChatSession.d.ts → LlamaChatSession/LlamaChatSession.d.ts} +48 -25
  48. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +211 -0
  49. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  50. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  51. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  52. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  53. package/dist/llamaEvaluator/LlamaContext/LlamaContext.d.ts +18 -23
  54. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js +60 -103
  55. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +1 -1
  56. package/dist/llamaEvaluator/LlamaContext/types.d.ts +6 -14
  57. package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +35 -0
  58. package/dist/llamaEvaluator/LlamaEmbeddingContext.js +73 -0
  59. package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +1 -0
  60. package/dist/llamaEvaluator/LlamaGrammar.d.ts +8 -12
  61. package/dist/llamaEvaluator/LlamaGrammar.js +7 -12
  62. package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -1
  63. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js +2 -1
  64. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  65. package/dist/llamaEvaluator/LlamaModel.d.ts +10 -2
  66. package/dist/llamaEvaluator/LlamaModel.js +14 -3
  67. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  68. package/dist/types.d.ts +41 -3
  69. package/dist/types.js +5 -1
  70. package/dist/types.js.map +1 -1
  71. package/dist/utils/LlamaText.d.ts +42 -0
  72. package/dist/utils/LlamaText.js +207 -0
  73. package/dist/utils/LlamaText.js.map +1 -0
  74. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  75. package/dist/utils/StopGenerationDetector.js +205 -0
  76. package/dist/utils/StopGenerationDetector.js.map +1 -0
  77. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  78. package/dist/utils/TokenStreamRegulator.js +96 -0
  79. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  80. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  81. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  82. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  83. package/dist/utils/compareTokens.d.ts +2 -0
  84. package/dist/utils/compareTokens.js +4 -0
  85. package/dist/utils/compareTokens.js.map +1 -0
  86. package/dist/utils/compileLLamaCpp.js +11 -6
  87. package/dist/utils/compileLLamaCpp.js.map +1 -1
  88. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  89. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  90. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  91. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  92. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  93. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  94. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  95. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  96. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  97. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  98. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  99. package/dist/utils/gbnfJson/types.d.ts +1 -1
  100. package/dist/utils/gbnfJson/types.js.map +1 -1
  101. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  102. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  103. package/dist/utils/getBin.d.ts +3 -2
  104. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  105. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  106. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  107. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  108. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  109. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  110. package/dist/utils/resolveChatWrapper.js +16 -0
  111. package/dist/utils/resolveChatWrapper.js.map +1 -0
  112. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  113. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  114. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  115. package/llama/addon.cpp +45 -17
  116. package/llama/binariesGithubRelease.json +1 -1
  117. package/llama/gitRelease.bundle +0 -0
  118. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  119. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  120. package/llamaBins/linux-x64/llama-addon.node +0 -0
  121. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  122. package/llamaBins/mac-x64/llama-addon.node +0 -0
  123. package/llamaBins/win-x64/llama-addon.node +0 -0
  124. package/package.json +21 -9
  125. package/dist/ChatPromptWrapper.d.ts +0 -11
  126. package/dist/ChatPromptWrapper.js +0 -20
  127. package/dist/ChatPromptWrapper.js.map +0 -1
  128. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  129. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  130. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  131. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  132. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  133. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  134. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  135. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  136. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  137. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  138. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  139. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  140. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  141. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  142. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  143. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
  144. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
  145. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  146. package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
  147. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  148. package/dist/utils/getTextCompletion.d.ts +0 -3
  149. package/dist/utils/getTextCompletion.js +0 -12
  150. package/dist/utils/getTextCompletion.js.map +0 -1
  151. package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
  152. package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
@@ -0,0 +1,704 @@
1
+ import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
2
+ import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
3
+ import { removeNullFields } from "../../utils/removeNullFields.js";
4
+ import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
5
+ import { AbortError } from "../../AbortError.js";
6
+ import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
7
+ import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
8
+ import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
9
+ import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
10
+ const defaultContextShiftOptions = {
11
+ size: (sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)),
12
+ strategy: "eraseFirstResponseAndKeepFirstSystem",
13
+ lastEvaluationMetadata: null
14
+ };
15
+ const UNKNOWN_UNICODE_CHAR = "\ufffd";
16
+ export class LlamaChat {
17
+ /** @internal */ _chatWrapper;
18
+ /** @internal */ _disposeAggregator = new DisposeAggregator();
19
+ /** @internal */ _autoDisposeSequence;
20
+ /** @internal */ _sequence;
21
+ onDispose = new EventRelay();
22
+ constructor({ contextSequence, chatWrapper = "auto", autoDisposeSequence = true }) {
23
+ if (contextSequence == null)
24
+ throw new Error("contextSequence cannot be null");
25
+ if (contextSequence.disposed)
26
+ throw new DisposedError();
27
+ this._sequence = contextSequence;
28
+ this._autoDisposeSequence = autoDisposeSequence;
29
+ this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
30
+ this.dispose();
31
+ }));
32
+ this._disposeAggregator.add(this.onDispose.dispatchEvent);
33
+ this._chatWrapper = resolveChatWrapper(chatWrapper, contextSequence.model);
34
+ }
35
+ dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
36
+ if (this._sequence == null)
37
+ return;
38
+ if (disposeSequence)
39
+ this._sequence.dispose();
40
+ this._sequence = null;
41
+ this._disposeAggregator.dispose();
42
+ }
43
+ /** @hidden */
44
+ [Symbol.dispose]() {
45
+ return this.dispose();
46
+ }
47
+ get disposed() {
48
+ return this._sequence == null;
49
+ }
50
+ get chatWrapper() {
51
+ if (this._sequence == null)
52
+ throw new DisposedError();
53
+ return this._chatWrapper;
54
+ }
55
+ get sequence() {
56
+ if (this._sequence == null)
57
+ throw new DisposedError();
58
+ return this._sequence;
59
+ }
60
+ get context() {
61
+ return this.sequence.context;
62
+ }
63
+ get model() {
64
+ return this.sequence.model;
65
+ }
66
+ async generateResponse(history, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
67
+ const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
68
+ if (grammar != null && functionsEnabled)
69
+ throw new Error("Using both grammar and functions is not supported yet");
70
+ if (signal?.aborted)
71
+ throw new AbortError();
72
+ if (this._sequence == null)
73
+ throw new DisposedError();
74
+ let resolvedHistory = this._sequence.isLoadedToMemory
75
+ ? history.slice()
76
+ : history.map(removeRawFromHistoryItem);
77
+ if (resolvedHistory.length === 0 || resolvedHistory[resolvedHistory.length - 1].type !== "model")
78
+ resolvedHistory.push({
79
+ type: "model",
80
+ response: []
81
+ });
82
+ const model = this._sequence.model;
83
+ const context = this._sequence.context;
84
+ const eosToken = model.tokens.eos;
85
+ const resolvedContextShift = {
86
+ ...defaultContextShiftOptions,
87
+ ...removeNullFields(contextShift)
88
+ };
89
+ const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
90
+ ? { lastTokens: 0 }
91
+ : repeatPenalty;
92
+ const lastModelResponse = getLastTextModelResponseFromChatHistory(resolvedHistory);
93
+ const res = [];
94
+ const pendingTokens = [];
95
+ let ignoredStartTextTokens = [];
96
+ const functionCallTokens = [];
97
+ const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
98
+ const grammarEvaluationState = grammar != null
99
+ ? new LlamaGrammarEvaluationState({ grammar })
100
+ : undefined;
101
+ let functionsGrammar = functionsEnabled
102
+ ? new FunctionCallGrammar(functions, this._chatWrapper, false)
103
+ : undefined;
104
+ let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
105
+ ? new LlamaGrammarEvaluationState({
106
+ grammar: functionsGrammar
107
+ })
108
+ : undefined;
109
+ const streamRegulator = new TokenStreamRegulator();
110
+ const stopGenerationDetector = new StopGenerationDetector();
111
+ const functionSyntaxStartDetector = new StopGenerationDetector();
112
+ const functionSyntaxEndDetector = new StopGenerationDetector();
113
+ const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
114
+ const ignoreStartTextDetector = new StopGenerationDetector();
115
+ const locksToReleaseOnValidGeneration = [];
116
+ const functionCallTokenSyntaxLocks = [];
117
+ let generatedTokens = 0;
118
+ let isFirstEvaluation = true;
119
+ let inFunctionEvaluationMode = false;
120
+ let initiallyEngagedFunctionMode = false;
121
+ let lastContextWindowHistory = resolvedHistory;
122
+ let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
123
+ const ensureNotAborted = () => {
124
+ if (signal?.aborted)
125
+ throw new AbortError();
126
+ if (this._sequence == null)
127
+ throw new DisposedError();
128
+ };
129
+ const getPenaltyTokens = () => {
130
+ if (this._sequence == null)
131
+ throw new DisposedError();
132
+ let punishTokens = res.slice(-repeatPenaltyLastTokens);
133
+ if (punishTokensFilter != null)
134
+ punishTokens = punishTokensFilter(punishTokens);
135
+ if (!penalizeNewLine) {
136
+ const nlToken = model.tokens.nl;
137
+ if (nlToken != null)
138
+ punishTokens = punishTokens.filter(token => token !== nlToken);
139
+ }
140
+ return punishTokens;
141
+ };
142
+ const getResolvedHistoryWithCurrentModelResponse = () => {
143
+ if (res.length === 0)
144
+ return resolvedHistory;
145
+ let modelResponse = model.detokenize(res);
146
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
147
+ modelResponse = modelResponse.trimEnd();
148
+ if (modelResponse === "")
149
+ return resolvedHistory;
150
+ return setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse);
151
+ };
152
+ const removeFoundStartIgnoreTextsFromPendingTokens = () => {
153
+ if (res.length === 0 && pendingTokens.length > 0) {
154
+ ignoreStartTextDetector.clearInProgressStops();
155
+ ignoreStartTextDetector.clearTriggeredStops();
156
+ let mostExhaustiveTriggeredStops = null;
157
+ for (let i = 0; i < pendingTokens.length; i++) {
158
+ ignoreStartTextDetector.recordGeneration({
159
+ text: model.detokenize([pendingTokens[i]]),
160
+ tokens: [pendingTokens[i]],
161
+ startNewChecks: i === 0
162
+ });
163
+ if (ignoreStartTextDetector.hasTriggeredStops) {
164
+ mostExhaustiveTriggeredStops = ignoreStartTextDetector.getTriggeredStops();
165
+ ignoreStartTextDetector.clearTriggeredStops();
166
+ }
167
+ else if (!ignoreStartTextDetector.hasInProgressStops)
168
+ break;
169
+ }
170
+ if (mostExhaustiveTriggeredStops != null) {
171
+ const [mostExhaustiveTriggeredStop] = mostExhaustiveTriggeredStops;
172
+ if (mostExhaustiveTriggeredStop != null) {
173
+ ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
174
+ .map((stopTrigger) => {
175
+ if (typeof stopTrigger === "string")
176
+ return model.tokenize(stopTrigger);
177
+ else
178
+ return [stopTrigger];
179
+ })
180
+ .flat(1);
181
+ const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
182
+ .map((generation) => {
183
+ if (typeof generation === "string")
184
+ return model.tokenize(generation);
185
+ else
186
+ return generation;
187
+ })
188
+ .flat(1);
189
+ pendingTokens.length = 0;
190
+ pendingTokens.push(...newPendingTokens);
191
+ }
192
+ }
193
+ }
194
+ };
195
+ if (grammar != null)
196
+ StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
197
+ .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
198
+ if (functions != null && Object.keys(functions).length > 0)
199
+ functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
200
+ // eslint-disable-next-line no-constant-condition
201
+ while (true) {
202
+ ensureNotAborted();
203
+ let shouldContextShift = false;
204
+ const queuedChunkTokens = streamRegulator.getAllQueuedChunkTokens();
205
+ const { history: contextWindowHistory, stopGenerationTriggers, tokens: contextWindowTokens, newResolvedHistory, newHistoryCompressionMetadata, ignoreStartText, functionCallInitiallyEngaged, disengageInitiallyEngagedFunctionCall } = await getContextWindow({
206
+ resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
207
+ resolvedContextShift,
208
+ lastHistoryCompressionMetadata,
209
+ pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
210
+ isFirstEvaluation,
211
+ chatWrapper: this._chatWrapper,
212
+ lastEvaluationContextWindowHistory,
213
+ minimumOverlapPercentageToPreventContextShift,
214
+ sequence: this._sequence,
215
+ minFreeContextTokens: 1,
216
+ functions: functionsEnabled ? functions : undefined,
217
+ documentFunctionParams
218
+ });
219
+ ensureNotAborted();
220
+ if (generatedTokens === 0) {
221
+ StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
222
+ .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
223
+ if (functionsEnabled) {
224
+ initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
225
+ StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
226
+ .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
227
+ if (initiallyEngagedFunctionMode) {
228
+ inFunctionEvaluationMode = true;
229
+ functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, true);
230
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
231
+ grammar: functionsGrammar
232
+ });
233
+ }
234
+ }
235
+ }
236
+ const tokens = [...contextWindowTokens, ...ignoredStartTextTokens, ...pendingTokens, ...queuedChunkTokens];
237
+ resolvedHistory = newResolvedHistory;
238
+ lastHistoryCompressionMetadata = newHistoryCompressionMetadata;
239
+ lastContextWindowHistory = contextWindowHistory;
240
+ const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
241
+ const contextWindowsRes = [];
242
+ StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
243
+ .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
244
+ if (functionsGrammar != null)
245
+ StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
246
+ .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
247
+ let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
248
+ // we need to decode at least one token to generate a response
249
+ if (firstDifferentIndex === tokens.length && firstDifferentIndex > 0)
250
+ firstDifferentIndex -= 1;
251
+ tokens.splice(0, firstDifferentIndex);
252
+ if (firstDifferentIndex < this._sequence.nextTokenIndex) {
253
+ await this._sequence.eraseContextTokenRanges([{
254
+ start: firstDifferentIndex,
255
+ end: this._sequence.nextTokenIndex
256
+ }]);
257
+ ensureNotAborted();
258
+ }
259
+ const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
260
+ temperature, topK, topP,
261
+ grammarEvaluationState: () => {
262
+ if (inFunctionEvaluationMode)
263
+ return functionsEvaluationState;
264
+ return grammarEvaluationState;
265
+ },
266
+ repeatPenalty: !repeatPenaltyEnabled ? undefined : {
267
+ punishTokens: getPenaltyTokens,
268
+ penalty,
269
+ frequencyPenalty,
270
+ presencePenalty
271
+ },
272
+ evaluationPriority,
273
+ yieldEosToken: true
274
+ }));
275
+ for await (const token of evaluationIterator) {
276
+ ensureNotAborted();
277
+ generatedTokens++;
278
+ const tokens = [token];
279
+ const text = model.detokenize([token]);
280
+ const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
281
+ if (initiallyEngagedFunctionMode)
282
+ disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
283
+ if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
284
+ locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
285
+ }
286
+ else {
287
+ while (locksToReleaseOnValidGeneration.length > 0)
288
+ locksToReleaseOnValidGeneration.shift().dispose();
289
+ }
290
+ functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
291
+ if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
292
+ initiallyEngagedFunctionMode = false;
293
+ let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
294
+ if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
295
+ const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
296
+ try {
297
+ const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
298
+ enableInternalBuiltinFunctions: true,
299
+ initialFunctionCallEngaged: true
300
+ });
301
+ const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
302
+ if (internalBuiltinFunctions[functionName] != null) {
303
+ shouldStopFunctionEvaluationMode = true;
304
+ }
305
+ }
306
+ catch (err) {
307
+ if (!(err instanceof LlamaFunctionCallValidationError))
308
+ throw err;
309
+ }
310
+ }
311
+ if (shouldStopFunctionEvaluationMode) {
312
+ inFunctionEvaluationMode = false;
313
+ functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, false);
314
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
315
+ grammar: functionsGrammar
316
+ });
317
+ functionCallTokens.length = 0;
318
+ while (functionCallTokenSyntaxLocks.length > 0)
319
+ functionCallTokenSyntaxLocks.shift().dispose();
320
+ functionSyntaxStartDetector.clearInProgressStops();
321
+ functionSyntaxStartDetector.clearTriggeredStops();
322
+ functionSyntaxEndDetector.clearInProgressStops();
323
+ functionSyntaxEndDetector.clearTriggeredStops();
324
+ }
325
+ }
326
+ if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
327
+ functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
328
+ inFunctionEvaluationMode = true;
329
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
330
+ stopGenerationDetector.clearTriggeredStops();
331
+ stopGenerationDetector.clearInProgressStops();
332
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
333
+ const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
334
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
335
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
336
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
337
+ const [firstRemainingGenerationAfterStop] = triggeredStops
338
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
339
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
340
+ .flat(1);
341
+ const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
342
+ ? ""
343
+ : typeof firstRemainingGenerationAfterStop === "string"
344
+ ? firstRemainingGenerationAfterStop
345
+ : model.detokenize(firstRemainingGenerationAfterStop);
346
+ functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
347
+ for (const functionCallToken of functionCallTokens)
348
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
349
+ }
350
+ else if (inFunctionEvaluationMode) {
351
+ functionCallTokens.push(...tokens);
352
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
353
+ functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
354
+ }
355
+ if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
356
+ const functionCallText = model.detokenize(functionCallTokens);
357
+ const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
358
+ let modelResponse = model.detokenize(res);
359
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
360
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
361
+ modelResponse = modelResponse.trimEnd();
362
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
363
+ }
364
+ return {
365
+ response: modelResponse,
366
+ lastEvaluation: {
367
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
368
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
369
+ contextShiftMetadata: lastHistoryCompressionMetadata
370
+ },
371
+ // prevent infinite TS type instantiation
372
+ functionCall: functionCall,
373
+ metadata: {
374
+ stopReason: "functionCall"
375
+ }
376
+ };
377
+ }
378
+ if (!inFunctionEvaluationMode)
379
+ stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
380
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
381
+ removeFoundStartIgnoreTextsFromPendingTokens();
382
+ if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
383
+ const triggeredStops = stopGenerationDetector.getTriggeredStops();
384
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
385
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
386
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
387
+ const [firstRemainingGenerationAfterStop] = triggeredStops
388
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
389
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
390
+ .flat(1);
391
+ removeFoundStartIgnoreTextsFromPendingTokens();
392
+ if (pendingTokens.length > 0)
393
+ onToken?.(pendingTokens.slice());
394
+ res.push(...pendingTokens);
395
+ contextWindowsRes.push(...pendingTokens);
396
+ pendingTokens.length = 0;
397
+ let modelResponse = model.detokenize(res);
398
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
399
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
400
+ modelResponse = modelResponse.trimEnd();
401
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
402
+ }
403
+ return {
404
+ response: modelResponse,
405
+ lastEvaluation: {
406
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
407
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
408
+ contextShiftMetadata: lastHistoryCompressionMetadata
409
+ },
410
+ metadata: {
411
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
412
+ stopReason: token === eosToken
413
+ ? "eosToken"
414
+ : "stopGenerationTrigger"
415
+ }
416
+ };
417
+ }
418
+ const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
419
+ if (res.length === 0) {
420
+ ignoreStartTextDetector.clearInProgressStops();
421
+ ignoreStartTextDetector.clearTriggeredStops();
422
+ ignoreStartTextDetector.recordGeneration({
423
+ text: model.detokenize(pendingTokens),
424
+ tokens: pendingTokens
425
+ });
426
+ }
427
+ if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
428
+ removeFoundStartIgnoreTextsFromPendingTokens();
429
+ if (pendingTokens.length > 0) {
430
+ onToken?.(pendingTokens.slice());
431
+ res.push(...pendingTokens);
432
+ contextWindowsRes.push(...pendingTokens);
433
+ pendingTokens.length = 0;
434
+ }
435
+ }
436
+ if (maxTokensTriggered) {
437
+ let modelResponse = model.detokenize(res);
438
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
439
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
440
+ modelResponse = modelResponse.trimEnd();
441
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
442
+ }
443
+ return {
444
+ response: modelResponse,
445
+ lastEvaluation: {
446
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
447
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
448
+ contextShiftMetadata: lastHistoryCompressionMetadata
449
+ },
450
+ metadata: {
451
+ stopReason: "maxTokens"
452
+ }
453
+ };
454
+ }
455
+ if (this._sequence.nextTokenIndex >= context.contextSize) {
456
+ shouldContextShift = true;
457
+ break;
458
+ }
459
+ }
460
+ isFirstEvaluation = false;
461
+ if (shouldContextShift)
462
+ continue;
463
+ break;
464
+ }
465
+ throw new Error("The context size is too small to generate a response");
466
+ }
467
+ }
468
+ function removeRawFromHistoryItem(historyItem) {
469
+ if (historyItem.type === "model") {
470
+ const newHistoryItem = { ...historyItem };
471
+ newHistoryItem.response = newHistoryItem.response.map((item) => {
472
+ if (typeof item === "string")
473
+ return item;
474
+ else
475
+ return {
476
+ ...item,
477
+ raw: undefined
478
+ };
479
+ });
480
+ return newHistoryItem;
481
+ }
482
+ return historyItem;
483
+ }
484
+ async function compressHistoryToFitContextSize({ history, contextShiftSize, contextShiftStrategy, contextShiftLastEvaluationMetadata, contextSize, tokenizer, chatWrapper, functions, documentFunctionParams }) {
485
+ function checkIfHistoryFitsContext(history) {
486
+ const { contextText } = chatWrapper.generateContextText(history, {
487
+ availableFunctions: functions,
488
+ documentFunctionParams
489
+ });
490
+ const tokens = contextText.tokenize(tokenizer);
491
+ return tokens.length <= contextSize - contextShiftSize;
492
+ }
493
+ if (contextSize - contextShiftSize <= 0)
494
+ throw new Error(`The context size (${contextSize}) is too small to fit the context shift size (${contextShiftSize})`);
495
+ if (checkIfHistoryFitsContext(history))
496
+ return {
497
+ compressedHistory: history,
498
+ metadata: null
499
+ };
500
+ if (contextShiftStrategy instanceof Function) {
501
+ try {
502
+ const { chatHistory, metadata } = await contextShiftStrategy({
503
+ chatHistory: history,
504
+ maxTokensCount: contextSize - contextShiftSize,
505
+ tokenizer,
506
+ chatWrapper,
507
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
508
+ });
509
+ if (checkIfHistoryFitsContext(chatHistory))
510
+ return {
511
+ compressedHistory: chatHistory,
512
+ metadata
513
+ };
514
+ console.warn("The provided context shift strategy did not return a history that fits the context size. " +
515
+ "Using the default strategy instead.");
516
+ }
517
+ catch (err) {
518
+ console.error("The provided context shift strategy threw an error. " +
519
+ "Using the default strategy instead.", err);
520
+ }
521
+ }
522
+ else if (contextShiftStrategy !== "eraseFirstResponseAndKeepFirstSystem")
523
+ console.warn(`Unknown context shift strategy "${contextShiftStrategy}". ` +
524
+ "Using the default strategy instead.");
525
+ const { chatHistory, metadata } = await eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy({
526
+ chatHistory: history,
527
+ maxTokensCount: contextSize - contextShiftSize,
528
+ tokenizer,
529
+ chatWrapper,
530
+ lastShiftMetadata: contextShiftLastEvaluationMetadata
531
+ });
532
+ if (!checkIfHistoryFitsContext(chatHistory))
533
+ throw new Error("The default context shift strategy did not return a history that fits the context size");
534
+ return {
535
+ compressedHistory: chatHistory,
536
+ metadata
537
+ };
538
+ }
539
+ function getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, tokenizer) {
540
+ if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length === 0)
541
+ return [];
542
+ else if (partiallyFreeTokens.tokens.length !== 0 && partiallyFreeTokens.text.length === 0)
543
+ return partiallyFreeTokens.tokens;
544
+ else if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length !== 0)
545
+ return tokenizer(partiallyFreeTokens.text);
546
+ const triggerThatStartsWithStringIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] === "string");
547
+ const triggerThatStartsWithTokenIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] !== "string");
548
+ if (triggerThatStartsWithTokenIndex > 0 && triggerThatStartsWithStringIndex < 0)
549
+ return partiallyFreeTokens.tokens;
550
+ else if (triggerThatStartsWithStringIndex > 0 && triggerThatStartsWithTokenIndex < 0)
551
+ return tokenizer(partiallyFreeTokens.text);
552
+ const stringTokens = tokenizer(partiallyFreeTokens.text);
553
+ if (stringTokens.length === partiallyFreeTokens.tokens.length &&
554
+ stringTokens.every((value, index) => value === partiallyFreeTokens.tokens[index]))
555
+ return stringTokens;
556
+ else if (triggerThatStartsWithStringIndex < triggerThatStartsWithTokenIndex)
557
+ return stringTokens;
558
+ return partiallyFreeTokens.tokens;
559
+ }
560
+ function getLastTextModelResponseFromChatHistory(chatHistory) {
561
+ if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
562
+ return "";
563
+ const lastModelResponseItem = chatHistory[chatHistory.length - 1];
564
+ const modelResponse = lastModelResponseItem.response;
565
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string")
566
+ return modelResponse[modelResponse.length - 1];
567
+ return "";
568
+ }
569
+ function setLastModelTextResponseInChatHistory(chatHistory, textResponse) {
570
+ const newChatHistory = chatHistory.slice();
571
+ if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1].type !== "model")
572
+ newChatHistory.push({
573
+ type: "model",
574
+ response: []
575
+ });
576
+ const lastModelResponseItem = newChatHistory[newChatHistory.length - 1];
577
+ const newLastModelResponseItem = { ...lastModelResponseItem };
578
+ newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem;
579
+ const modelResponse = newLastModelResponseItem.response.slice();
580
+ newLastModelResponseItem.response = modelResponse;
581
+ if (modelResponse.length > 0 && typeof modelResponse[modelResponse.length - 1] === "string") {
582
+ if (textResponse === "")
583
+ modelResponse.pop();
584
+ else
585
+ modelResponse[modelResponse.length - 1] = textResponse;
586
+ }
587
+ else if (textResponse !== "")
588
+ modelResponse.push(textResponse);
589
+ return newChatHistory;
590
+ }
591
+ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams }) {
592
+ if (sequence == null)
593
+ throw new DisposedError();
594
+ const model = sequence.model;
595
+ const context = sequence.context;
596
+ if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) {
597
+ const newContextWindow = lastEvaluationContextWindowHistory.slice();
598
+ if (newContextWindow.length === 0 || newContextWindow[newContextWindow.length - 1].type !== "model")
599
+ newContextWindow.push({
600
+ type: "model",
601
+ response: []
602
+ });
603
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(newContextWindow, {
604
+ availableFunctions: functions,
605
+ documentFunctionParams
606
+ });
607
+ const tokens = contextText.tokenize(model.tokenize);
608
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
609
+ const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
610
+ const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
611
+ if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift)
612
+ return {
613
+ history: newContextWindow,
614
+ stopGenerationTriggers,
615
+ tokens,
616
+ newResolvedHistory: resolvedHistory,
617
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
618
+ ignoreStartText: ignoreStartText ?? [],
619
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
620
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
621
+ };
622
+ }
623
+ }
624
+ resolvedHistory = sequence.isLoadedToMemory
625
+ ? resolvedHistory.slice()
626
+ : resolvedHistory.map(removeRawFromHistoryItem);
627
+ if (resolvedContextShift.lastEvaluationMetadata != null) {
628
+ const contextShiftSize = resolvedContextShift.size instanceof Function
629
+ ? await resolvedContextShift.size(sequence)
630
+ : resolvedContextShift.size;
631
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
632
+ history: resolvedHistory,
633
+ contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
634
+ contextShiftStrategy: resolvedContextShift.strategy,
635
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
636
+ contextSize: context.contextSize,
637
+ tokenizer: model.tokenize,
638
+ chatWrapper: chatWrapper,
639
+ functions,
640
+ documentFunctionParams
641
+ });
642
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
643
+ availableFunctions: functions,
644
+ documentFunctionParams
645
+ });
646
+ return {
647
+ history: compressedHistory,
648
+ stopGenerationTriggers,
649
+ tokens: contextText.tokenize(model.tokenize),
650
+ newResolvedHistory: resolvedHistory,
651
+ newHistoryCompressionMetadata: metadata,
652
+ ignoreStartText: ignoreStartText ?? [],
653
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
654
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
655
+ };
656
+ }
657
+ {
658
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(resolvedHistory, {
659
+ availableFunctions: functions,
660
+ documentFunctionParams
661
+ });
662
+ const tokens = contextText.tokenize(model.tokenize);
663
+ if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
664
+ return {
665
+ history: resolvedHistory,
666
+ stopGenerationTriggers,
667
+ tokens,
668
+ newResolvedHistory: resolvedHistory,
669
+ newHistoryCompressionMetadata: lastHistoryCompressionMetadata,
670
+ ignoreStartText: ignoreStartText ?? [],
671
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
672
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
673
+ };
674
+ }
675
+ const contextShiftSize = resolvedContextShift.size instanceof Function
676
+ ? await resolvedContextShift.size(sequence)
677
+ : resolvedContextShift.size;
678
+ const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
679
+ history: resolvedHistory,
680
+ contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
681
+ contextShiftStrategy: resolvedContextShift.strategy,
682
+ contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
683
+ contextSize: context.contextSize,
684
+ tokenizer: model.tokenize,
685
+ chatWrapper: chatWrapper,
686
+ functions,
687
+ documentFunctionParams
688
+ });
689
+ const { contextText, stopGenerationTriggers, ignoreStartText, functionCall } = chatWrapper.generateContextText(compressedHistory, {
690
+ availableFunctions: functions,
691
+ documentFunctionParams
692
+ });
693
+ return {
694
+ history: compressedHistory,
695
+ stopGenerationTriggers,
696
+ tokens: contextText.tokenize(model.tokenize),
697
+ newResolvedHistory: resolvedHistory,
698
+ newHistoryCompressionMetadata: metadata,
699
+ ignoreStartText: ignoreStartText ?? [],
700
+ functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false,
701
+ disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
702
+ };
703
+ }
704
+ //# sourceMappingURL=LlamaChat.js.map