@saber2pr/ai-assistant 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ReadMe.md CHANGED
@@ -49,6 +49,9 @@ Initializes and mounts the AI assistant. This assistant runs entirely in the bro
49
49
  | `containerId` | `string` | `'ai-assistant-root'` | The ID of the container element where the assistant will be mounted. |
50
50
  | `initialPosition` | `{ x: number; y: number }` | Bottom-right | The initial coordinates of the floating button. |
51
51
  | `onBeforeChat` | `Function` | - | A hook to intercept and modify messages before they are sent to the AI. |
52
+ | `maxMessages` | `number` | `20` | Maximum number of messages to keep in context. Older messages will be truncated. Set to 0 to disable. |
53
+ | `maxContextLength` | `number` | `2500` | Maximum total characters in the context. Messages will be truncated from the oldest if exceeded. Set to 0 to disable. |
54
+ | `maxTokens` | `number` | `3200` | Maximum estimated tokens in the context. This prevents exceeding the model's context window. For 4096 context window models, safe value is ~3200 (leaves ~900 tokens buffer). Set to 0 to disable. |
52
55
 
53
56
  ### Local Development
54
57
 
package/lib/app.js CHANGED
@@ -5,23 +5,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  var chat_1 = __importDefault(require("./chat"));
7
7
  var h1 = document.createElement('h1');
8
- h1.textContent = 'Click the bottom right sidebar button to open the AI assistant';
8
+ h1.textContent =
9
+ 'Click the bottom right sidebar button to open the AI assistant';
9
10
  document.body.append(h1);
10
11
  // 默认执行初始化
11
12
  (0, chat_1.default)({
12
- locale: 'en-US',
13
+ locale: 'zh-CN',
13
14
  // welcomeMessage: '有什么可以帮忙的?',
14
15
  // suggestions: ['如何用 Typescript 实现 Helloworld?', '物联网是什么?'],
15
16
  // placeholder: '给 GPT 发送消息',
16
17
  // emptyMessage: '我是AI,可以回答你的问题,请在下方输入框输入你的需求~',
17
- // async onBeforeChat(messages) {
18
- // const knowledgeContent = await fetch('http://localhost:5001/HTML超文本标记语言/移动端禁用双指放大.md').then(res => res.text())
19
- // return [
20
- // {
21
- // role: "system",
22
- // content: `你是我的博客助手,根据我博客内容回答:移动端禁用双指放大的方法:\n${knowledgeContent}`
23
- // },
24
- // ...messages
25
- // ]
26
- // },
27
18
  });
@@ -13,7 +13,7 @@ function MyRuntimeProvider(_a) {
13
13
  var children = _a.children;
14
14
  var llm = (0, context_1.useLLm)();
15
15
  var config = react_1.default.useContext(context_2.AIConfigContext);
16
- var runtime = (0, react_2.useLocalRuntime)((0, myModelAdapterStream_1.MyModelAdapterStream)(llm, config.onBeforeChat), {
16
+ var runtime = (0, react_2.useLocalRuntime)((0, myModelAdapterStream_1.MyModelAdapterStream)(llm, config.onBeforeChat, config.maxMessages, config.maxContextLength, config.maxTokens), {
17
17
  adapters: {
18
18
  speech: new react_2.WebSpeechSynthesisAdapter(),
19
19
  },
@@ -1,4 +1,4 @@
1
1
  import { ChatModelAdapter } from '@assistant-ui/react';
2
2
  import { MLCEngine } from '@mlc-ai/web-llm';
3
3
  import { ChatMessage } from '../../types/assistant';
4
- export declare const MyModelAdapterStream: (llm: MLCEngine, onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]>) => ChatModelAdapter;
4
+ export declare const MyModelAdapterStream: (llm: MLCEngine, onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]>, maxMessages?: number, maxContextLength?: number, maxTokens?: number) => ChatModelAdapter;
@@ -1,4 +1,15 @@
1
1
  "use strict";
2
+ var __assign = (this && this.__assign) || function () {
3
+ __assign = Object.assign || function(t) {
4
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
5
+ s = arguments[i];
6
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
7
+ t[p] = s[p];
8
+ }
9
+ return t;
10
+ };
11
+ return __assign.apply(this, arguments);
12
+ };
2
13
  var __generator = (this && this.__generator) || function (thisArg, body) {
3
14
  var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
4
15
  return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
@@ -46,52 +57,282 @@ var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _ar
46
57
  function reject(value) { resume("throw", value); }
47
58
  function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
48
59
  };
60
+ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
61
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
62
+ if (ar || !(i in from)) {
63
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
64
+ ar[i] = from[i];
65
+ }
66
+ }
67
+ return to.concat(ar || Array.prototype.slice.call(from));
68
+ };
49
69
  Object.defineProperty(exports, "__esModule", { value: true });
50
70
  exports.MyModelAdapterStream = void 0;
51
71
  var event_1 = require("../../utils/event");
52
72
  var constants_1 = require("../../constants");
53
- var MyModelAdapterStream = function (llm, onBeforeChat) { return ({
54
- run: function (_a) {
55
- return __asyncGenerator(this, arguments, function run_1(_b) {
56
- var chatMessages, chunks, reply, _c, chunks_1, chunks_1_1, chunk, e_1_1, summaryResponse, title, error_1;
57
- var _d, e_1, _e, _f;
58
- var _g, _h, _j;
59
- var messages = _b.messages, abortSignal = _b.abortSignal;
60
- return __generator(this, function (_k) {
61
- switch (_k.label) {
62
- case 0:
63
- chatMessages = messages.map(function (item) { return ({
64
- role: item.role,
65
- content: item.content[0].text,
66
- }); });
67
- if (!onBeforeChat) return [3 /*break*/, 2];
68
- return [4 /*yield*/, __await(onBeforeChat(chatMessages, llm))];
69
- case 1:
70
- chatMessages = _k.sent();
71
- _k.label = 2;
72
- case 2: return [4 /*yield*/, __await(llm.chat.completions.create({
73
- messages: chatMessages,
74
- temperature: 1,
75
- stream: true,
76
- }))];
77
- case 3:
78
- chunks = _k.sent();
79
- reply = "";
80
- _k.label = 4;
81
- case 4:
82
- _k.trys.push([4, 11, 12, 17]);
83
- _c = true, chunks_1 = __asyncValues(chunks);
84
- _k.label = 5;
85
- case 5: return [4 /*yield*/, __await(chunks_1.next())];
86
- case 6:
87
- if (!(chunks_1_1 = _k.sent(), _d = chunks_1_1.done, !_d)) return [3 /*break*/, 10];
88
- _f = chunks_1_1.value;
89
- _c = false;
90
- chunk = _f;
91
- reply += ((_g = chunk.choices[0]) === null || _g === void 0 ? void 0 : _g.delta.content) || "";
92
- return [4 /*yield*/, __await({
73
+ /**
74
+ * Rough estimation: 1 token ≈ 1.3-1.5 characters for English, 1.5-2 for Chinese
75
+ * Using 1.5 as a conservative estimate
76
+ */
77
+ function estimateTokens(text) {
78
+ // Rough estimate: Chinese characters count more, English less
79
+ var chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
80
+ var otherChars = text.length - chineseChars;
81
+ // Chinese: ~1.8 tokens per char, English: ~1.3 tokens per char
82
+ return Math.ceil(chineseChars * 1.8 + otherChars * 1.3);
83
+ }
84
+ /**
85
+ * Truncate messages based on maxMessages and maxContextLength
86
+ * Also considers token estimation to avoid exceeding context window
87
+ */
88
+ function truncateMessages(messages, maxMessages, maxContextLength, maxTokens) {
89
+ var result = __spreadArray([], messages, true);
90
+ // Truncate by message count (keep the most recent messages)
91
+ if (maxMessages && maxMessages > 0 && result.length > maxMessages) {
92
+ // Always keep the first message if it's a system message
93
+ var firstMessage = result[0];
94
+ var isSystemFirst = firstMessage.role === 'system';
95
+ if (isSystemFirst) {
96
+ result = __spreadArray([firstMessage], result.slice(-(maxMessages - 1)), true);
97
+ }
98
+ else {
99
+ result = result.slice(-maxMessages);
100
+ }
101
+ }
102
+ // Truncate by total character length
103
+ if (maxContextLength && maxContextLength > 0) {
104
+ var firstMessage = result[0];
105
+ var isSystemFirst = firstMessage.role === 'system';
106
+ var otherMessages = isSystemFirst ? result.slice(1) : result;
107
+ // Calculate length of other messages
108
+ var otherMessagesLength = otherMessages.reduce(function (sum, msg) { return sum + msg.content.length; }, 0);
109
+ var systemMessageLength = isSystemFirst ? firstMessage.content.length : 0;
110
+ var totalLength = otherMessagesLength + systemMessageLength;
111
+ if (totalLength > maxContextLength) {
112
+ // Reserve 20% of maxContextLength for system message, or at least 500 chars
113
+ var systemMessageReserve = Math.max(500, Math.floor(maxContextLength * 0.2));
114
+ var availableForOther = maxContextLength - systemMessageReserve;
115
+ // Truncate system message if it's too long
116
+ var systemMessage = null;
117
+ if (isSystemFirst) {
118
+ if (firstMessage.content.length > systemMessageReserve) {
119
+ // Keep the beginning of system message (usually contains important instructions)
120
+ systemMessage = __assign(__assign({}, firstMessage), { content: firstMessage.content.substring(0, systemMessageReserve) + '...' });
121
+ }
122
+ else {
123
+ systemMessage = firstMessage;
124
+ }
125
+ }
126
+ // Truncate other messages to fit in remaining space
127
+ var truncated = __spreadArray([], otherMessages, true);
128
+ var currentLength = truncated.reduce(function (sum, msg) { return sum + msg.content.length; }, 0);
129
+ var systemLength = systemMessage ? systemMessage.content.length : 0;
130
+ // Remove oldest messages until under limit
131
+ while (truncated.length > 0 && (currentLength + systemLength) > maxContextLength) {
132
+ var removed = truncated.shift();
133
+ if (removed) {
134
+ currentLength -= removed.content.length;
135
+ }
136
+ }
137
+ // If still too long, truncate system message further
138
+ if (systemMessage && (currentLength + systemMessage.content.length) > maxContextLength) {
139
+ var maxSystemLength = Math.max(200, maxContextLength - currentLength - 100); // Leave some buffer
140
+ if (systemMessage.content.length > maxSystemLength) {
141
+ systemMessage = __assign(__assign({}, systemMessage), { content: systemMessage.content.substring(0, maxSystemLength) + '...' });
142
+ }
143
+ }
144
+ result = systemMessage ? __spreadArray([systemMessage], truncated, true) : truncated;
145
+ }
146
+ }
147
+ // Additional token-based truncation as a safety net
148
+ // Default to 3500 tokens (leaving ~600 tokens buffer for 4096 context window)
149
+ var tokenLimit = maxTokens || 3500;
150
+ if (tokenLimit > 0) {
151
+ var totalTokens = result.reduce(function (sum, msg) { return sum + estimateTokens(msg.content); }, 0);
152
+ if (totalTokens > tokenLimit) {
153
+ var firstMessage = result[0];
154
+ var isSystemFirst = firstMessage.role === 'system';
155
+ var systemMessage = isSystemFirst ? firstMessage : null;
156
+ var otherMessages = isSystemFirst ? result.slice(1) : result;
157
+ // Reserve tokens for system message (max 800 tokens)
158
+ var systemTokens = systemMessage ? Math.min(estimateTokens(systemMessage.content), 800) : 0;
159
+ var availableTokens = tokenLimit - systemTokens;
160
+ // Truncate other messages
161
+ var truncated = __spreadArray([], otherMessages, true);
162
+ var currentTokens = truncated.reduce(function (sum, msg) { return sum + estimateTokens(msg.content); }, 0);
163
+ // Remove oldest messages until under token limit
164
+ while (truncated.length > 0 && (currentTokens + systemTokens) > tokenLimit) {
165
+ var removed = truncated.shift();
166
+ if (removed) {
167
+ currentTokens -= estimateTokens(removed.content);
168
+ }
169
+ }
170
+ // Truncate system message if still needed
171
+ var finalSystemMessage = systemMessage;
172
+ if (systemMessage && (currentTokens + systemTokens) > tokenLimit) {
173
+ var maxSystemTokens = Math.max(200, tokenLimit - currentTokens - 100);
174
+ var systemContent = systemMessage.content;
175
+ var systemContentTokens = estimateTokens(systemContent);
176
+ if (systemContentTokens > maxSystemTokens) {
177
+ // Binary search for approximate length
178
+ var low = 0;
179
+ var high = systemContent.length;
180
+ while (low < high) {
181
+ var mid = Math.floor((low + high) / 2);
182
+ var testContent = systemContent.substring(0, mid);
183
+ if (estimateTokens(testContent) <= maxSystemTokens) {
184
+ low = mid + 1;
185
+ }
186
+ else {
187
+ high = mid;
188
+ }
189
+ }
190
+ systemContent = systemContent.substring(0, Math.max(0, low - 1)) + '...';
191
+ }
192
+ finalSystemMessage = __assign(__assign({}, systemMessage), { content: systemContent });
193
+ }
194
+ result = finalSystemMessage ? __spreadArray([finalSystemMessage], truncated, true) : truncated;
195
+ }
196
+ }
197
+ return result;
198
+ }
199
+ var MyModelAdapterStream = function (llm, onBeforeChat, maxMessages, maxContextLength, maxTokens) {
200
+ if (maxMessages === void 0) { maxMessages = 20; }
201
+ if (maxContextLength === void 0) { maxContextLength = 2500; }
202
+ if (maxTokens === void 0) { maxTokens = 3200; }
203
+ return ({
204
+ run: function (_a) {
205
+ return __asyncGenerator(this, arguments, function run_1(_b) {
206
+ var chatMessages, finalTokenLimit, totalTokens, firstMessage, isSystemFirst, systemMessage, otherMessages, finalSystemMessage, systemTokens_1, low, high, mid, testContent, systemTokens, availableTokens, truncated, currentTokens, i, msg, msgTokens, chunks, reply, _c, chunks_1, chunks_1_1, chunk, e_1_1, summaryResponse, title, error_1;
207
+ var _d, e_1, _e, _f;
208
+ var _g, _h, _j;
209
+ var messages = _b.messages, abortSignal = _b.abortSignal;
210
+ return __generator(this, function (_k) {
211
+ switch (_k.label) {
212
+ case 0:
213
+ chatMessages = messages.map(function (item) { return ({
214
+ role: item.role,
215
+ content: item.content[0].text,
216
+ }); });
217
+ // Apply truncation before onBeforeChat hook
218
+ chatMessages = truncateMessages(chatMessages, maxMessages, maxContextLength, maxTokens);
219
+ if (!onBeforeChat) return [3 /*break*/, 2];
220
+ return [4 /*yield*/, __await(onBeforeChat(chatMessages, llm)
221
+ // Re-apply truncation after onBeforeChat in case it added more content
222
+ // Use stricter limits to ensure we don't exceed context window
223
+ )];
224
+ case 1:
225
+ chatMessages = _k.sent();
226
+ // Re-apply truncation after onBeforeChat in case it added more content
227
+ // Use stricter limits to ensure we don't exceed context window
228
+ chatMessages = truncateMessages(chatMessages, maxMessages, maxContextLength, maxTokens);
229
+ _k.label = 2;
230
+ case 2:
231
+ finalTokenLimit = maxTokens || 3200;
232
+ if (finalTokenLimit > 0) {
233
+ totalTokens = chatMessages.reduce(function (sum, msg) { return sum + estimateTokens(msg.content); }, 0);
234
+ if (totalTokens > finalTokenLimit) {
235
+ firstMessage = chatMessages[0];
236
+ isSystemFirst = (firstMessage === null || firstMessage === void 0 ? void 0 : firstMessage.role) === 'system';
237
+ systemMessage = isSystemFirst ? firstMessage : null;
238
+ otherMessages = isSystemFirst ? chatMessages.slice(1) : chatMessages;
239
+ finalSystemMessage = systemMessage;
240
+ if (systemMessage) {
241
+ systemTokens_1 = estimateTokens(systemMessage.content);
242
+ if (systemTokens_1 > 500) {
243
+ low = 0;
244
+ high = systemMessage.content.length;
245
+ while (low < high) {
246
+ mid = Math.floor((low + high) / 2);
247
+ testContent = systemMessage.content.substring(0, mid);
248
+ if (estimateTokens(testContent) <= 500) {
249
+ low = mid + 1;
250
+ }
251
+ else {
252
+ high = mid;
253
+ }
254
+ }
255
+ finalSystemMessage = __assign(__assign({}, systemMessage), { content: systemMessage.content.substring(0, Math.max(0, low - 1)) + '...' });
256
+ }
257
+ }
258
+ systemTokens = finalSystemMessage ? estimateTokens(finalSystemMessage.content) : 0;
259
+ availableTokens = finalTokenLimit - systemTokens;
260
+ truncated = [];
261
+ currentTokens = 0;
262
+ // Add messages from newest to oldest until we hit the limit
263
+ for (i = otherMessages.length - 1; i >= 0; i--) {
264
+ msg = otherMessages[i];
265
+ msgTokens = estimateTokens(msg.content);
266
+ if (currentTokens + msgTokens <= availableTokens) {
267
+ truncated.unshift(msg);
268
+ currentTokens += msgTokens;
269
+ }
270
+ else {
271
+ break;
272
+ }
273
+ }
274
+ chatMessages = finalSystemMessage ? __spreadArray([finalSystemMessage], truncated, true) : truncated;
275
+ }
276
+ }
277
+ return [4 /*yield*/, __await(llm.chat.completions.create({
278
+ messages: chatMessages,
279
+ temperature: 1,
280
+ stream: true,
281
+ }))];
282
+ case 3:
283
+ chunks = _k.sent();
284
+ reply = "";
285
+ _k.label = 4;
286
+ case 4:
287
+ _k.trys.push([4, 11, 12, 17]);
288
+ _c = true, chunks_1 = __asyncValues(chunks);
289
+ _k.label = 5;
290
+ case 5: return [4 /*yield*/, __await(chunks_1.next())];
291
+ case 6:
292
+ if (!(chunks_1_1 = _k.sent(), _d = chunks_1_1.done, !_d)) return [3 /*break*/, 10];
293
+ _f = chunks_1_1.value;
294
+ _c = false;
295
+ chunk = _f;
296
+ reply += ((_g = chunk.choices[0]) === null || _g === void 0 ? void 0 : _g.delta.content) || "";
297
+ return [4 /*yield*/, __await({
298
+ status: {
299
+ type: 'running',
300
+ },
301
+ content: [
302
+ {
303
+ text: reply,
304
+ type: 'text',
305
+ },
306
+ ],
307
+ })];
308
+ case 7: return [4 /*yield*/, _k.sent()];
309
+ case 8:
310
+ _k.sent();
311
+ _k.label = 9;
312
+ case 9:
313
+ _c = true;
314
+ return [3 /*break*/, 5];
315
+ case 10: return [3 /*break*/, 17];
316
+ case 11:
317
+ e_1_1 = _k.sent();
318
+ e_1 = { error: e_1_1 };
319
+ return [3 /*break*/, 17];
320
+ case 12:
321
+ _k.trys.push([12, , 15, 16]);
322
+ if (!(!_c && !_d && (_e = chunks_1.return))) return [3 /*break*/, 14];
323
+ return [4 /*yield*/, __await(_e.call(chunks_1))];
324
+ case 13:
325
+ _k.sent();
326
+ _k.label = 14;
327
+ case 14: return [3 /*break*/, 16];
328
+ case 15:
329
+ if (e_1) throw e_1.error;
330
+ return [7 /*endfinally*/];
331
+ case 16: return [7 /*endfinally*/];
332
+ case 17: return [4 /*yield*/, __await({
93
333
  status: {
94
- type: 'running',
334
+ type: 'complete',
335
+ reason: 'stop',
95
336
  },
96
337
  content: [
97
338
  {
@@ -99,80 +340,45 @@ var MyModelAdapterStream = function (llm, onBeforeChat) { return ({
99
340
  type: 'text',
100
341
  },
101
342
  ],
102
- })];
103
- case 7: return [4 /*yield*/, _k.sent()];
104
- case 8:
105
- _k.sent();
106
- _k.label = 9;
107
- case 9:
108
- _c = true;
109
- return [3 /*break*/, 5];
110
- case 10: return [3 /*break*/, 17];
111
- case 11:
112
- e_1_1 = _k.sent();
113
- e_1 = { error: e_1_1 };
114
- return [3 /*break*/, 17];
115
- case 12:
116
- _k.trys.push([12, , 15, 16]);
117
- if (!(!_c && !_d && (_e = chunks_1.return))) return [3 /*break*/, 14];
118
- return [4 /*yield*/, __await(_e.call(chunks_1))];
119
- case 13:
120
- _k.sent();
121
- _k.label = 14;
122
- case 14: return [3 /*break*/, 16];
123
- case 15:
124
- if (e_1) throw e_1.error;
125
- return [7 /*endfinally*/];
126
- case 16: return [7 /*endfinally*/];
127
- case 17: return [4 /*yield*/, __await({
128
- status: {
129
- type: 'complete',
130
- reason: 'stop',
131
- },
132
- content: [
133
- {
134
- text: reply,
135
- type: 'text',
136
- },
137
- ],
138
- }
139
- // 对话完成后,如果这是第一轮对话,生成标题
140
- )];
141
- case 18: return [4 /*yield*/, _k.sent()];
142
- case 19:
143
- _k.sent();
144
- if (!(messages.length === 1)) return [3 /*break*/, 23];
145
- _k.label = 20;
146
- case 20:
147
- _k.trys.push([20, 22, , 23]);
148
- return [4 /*yield*/, __await(llm.chat.completions.create({
149
- messages: [
150
- {
151
- role: 'system',
152
- content: 'You are a title generation assistant. Please summarize a short title (no more than 10 words) based on the user\'s input, without punctuation.'
153
- },
154
- {
155
- role: 'user',
156
- content: messages[0].content[0].text
157
- }
158
- ],
159
- temperature: 0.5,
160
- }))];
161
- case 21:
162
- summaryResponse = _k.sent();
163
- title = (_j = (_h = summaryResponse.choices[0]) === null || _h === void 0 ? void 0 : _h.message.content) === null || _j === void 0 ? void 0 : _j.trim();
164
- if (title) {
165
- event_1.Dispatcher.instance.dispatch(constants_1.EVENT_THREAD_SET_TITLE, { data: title });
166
- }
167
- return [3 /*break*/, 23];
168
- case 22:
169
- error_1 = _k.sent();
170
- console.error('Failed to generate summary title:', error_1);
171
- return [3 /*break*/, 23];
172
- case 23: return [2 /*return*/];
173
- }
343
+ }
344
+ // 对话完成后,如果这是第一轮对话,生成标题
345
+ )];
346
+ case 18: return [4 /*yield*/, _k.sent()];
347
+ case 19:
348
+ _k.sent();
349
+ if (!(messages.length === 1)) return [3 /*break*/, 23];
350
+ _k.label = 20;
351
+ case 20:
352
+ _k.trys.push([20, 22, , 23]);
353
+ return [4 /*yield*/, __await(llm.chat.completions.create({
354
+ messages: [
355
+ {
356
+ role: 'system',
357
+ content: 'You are a title generation assistant. Please summarize a short title (no more than 10 words) based on the user\'s input, without punctuation.'
358
+ },
359
+ {
360
+ role: 'user',
361
+ content: messages[0].content[0].text
362
+ }
363
+ ],
364
+ temperature: 0.5,
365
+ }))];
366
+ case 21:
367
+ summaryResponse = _k.sent();
368
+ title = (_j = (_h = summaryResponse.choices[0]) === null || _h === void 0 ? void 0 : _h.message.content) === null || _j === void 0 ? void 0 : _j.trim();
369
+ if (title) {
370
+ event_1.Dispatcher.instance.dispatch(constants_1.EVENT_THREAD_SET_TITLE, { data: title });
371
+ }
372
+ return [3 /*break*/, 23];
373
+ case 22:
374
+ error_1 = _k.sent();
375
+ console.error('Failed to generate summary title:', error_1);
376
+ return [3 /*break*/, 23];
377
+ case 23: return [2 /*return*/];
378
+ }
379
+ });
174
380
  });
175
- });
176
- },
177
- }); };
381
+ },
382
+ });
383
+ };
178
384
  exports.MyModelAdapterStream = MyModelAdapterStream;
@@ -55,4 +55,25 @@ export interface AIAssistantConfig {
55
55
  * }
56
56
  */
57
57
  onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]>;
58
+ /**
59
+ * Maximum number of messages to keep in context. Older messages will be truncated.
60
+ * Set to 0 or undefined to disable truncation.
61
+ * @default 20
62
+ */
63
+ maxMessages?: number;
64
+ /**
65
+ * Maximum total characters in the context. Messages will be truncated from the oldest if exceeded.
66
+ * Note: This is a rough estimate. Actual token count may vary. For models with 4096 context window,
67
+ * a safe value is around 2500-3000 characters.
68
+ * Set to 0 or undefined to disable truncation.
69
+ * @default 2500
70
+ */
71
+ maxContextLength?: number;
72
+ /**
73
+ * Maximum estimated tokens in the context. This is a safety net to prevent exceeding model's context window.
74
+ * For models with 4096 context window, a safe value is around 3200 tokens (leaving ~900 tokens buffer for response and overhead).
75
+ * Set to 0 or undefined to disable token-based truncation.
76
+ * @default 3200
77
+ */
78
+ maxTokens?: number;
58
79
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@saber2pr/ai-assistant",
3
- "version": "0.0.13",
3
+ "version": "0.0.15",
4
4
  "description": "AI Assistant",
5
5
  "files": [
6
6
  "lib"