@saber2pr/ai-assistant 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ReadMe.md
CHANGED
|
@@ -49,6 +49,8 @@ Initializes and mounts the AI assistant. This assistant runs entirely in the bro
|
|
|
49
49
|
| `containerId` | `string` | `'ai-assistant-root'` | The ID of the container element where the assistant will be mounted. |
|
|
50
50
|
| `initialPosition` | `{ x: number; y: number }` | Bottom-right | The initial coordinates of the floating button. |
|
|
51
51
|
| `onBeforeChat` | `Function` | - | A hook to intercept and modify messages before they are sent to the AI. |
|
|
52
|
+
| `maxMessages` | `number` | `20` | Maximum number of messages to keep in context. Older messages will be truncated. Set to 0 to disable. |
|
|
53
|
+
| `maxContextLength` | `number` | `8000` | Maximum total characters in the context. Messages will be truncated from the oldest if exceeded. Set to 0 to disable. |
|
|
52
54
|
|
|
53
55
|
### Local Development
|
|
54
56
|
|
|
@@ -13,7 +13,7 @@ function MyRuntimeProvider(_a) {
|
|
|
13
13
|
var children = _a.children;
|
|
14
14
|
var llm = (0, context_1.useLLm)();
|
|
15
15
|
var config = react_1.default.useContext(context_2.AIConfigContext);
|
|
16
|
-
var runtime = (0, react_2.useLocalRuntime)((0, myModelAdapterStream_1.MyModelAdapterStream)(llm, config.onBeforeChat), {
|
|
16
|
+
var runtime = (0, react_2.useLocalRuntime)((0, myModelAdapterStream_1.MyModelAdapterStream)(llm, config.onBeforeChat, config.maxMessages, config.maxContextLength), {
|
|
17
17
|
adapters: {
|
|
18
18
|
speech: new react_2.WebSpeechSynthesisAdapter(),
|
|
19
19
|
},
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { ChatModelAdapter } from '@assistant-ui/react';
|
|
2
2
|
import { MLCEngine } from '@mlc-ai/web-llm';
|
|
3
3
|
import { ChatMessage } from '../../types/assistant';
|
|
4
|
-
export declare const MyModelAdapterStream: (llm: MLCEngine, onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]
|
|
4
|
+
export declare const MyModelAdapterStream: (llm: MLCEngine, onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]>, maxMessages?: number, maxContextLength?: number) => ChatModelAdapter;
|
|
@@ -1,4 +1,15 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __assign = (this && this.__assign) || function () {
|
|
3
|
+
__assign = Object.assign || function(t) {
|
|
4
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
5
|
+
s = arguments[i];
|
|
6
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
7
|
+
t[p] = s[p];
|
|
8
|
+
}
|
|
9
|
+
return t;
|
|
10
|
+
};
|
|
11
|
+
return __assign.apply(this, arguments);
|
|
12
|
+
};
|
|
2
13
|
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
3
14
|
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === "function" ? Iterator : Object).prototype);
|
|
4
15
|
return g.next = verb(0), g["throw"] = verb(1), g["return"] = verb(2), typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
@@ -46,52 +57,166 @@ var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _ar
|
|
|
46
57
|
function reject(value) { resume("throw", value); }
|
|
47
58
|
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
|
|
48
59
|
};
|
|
60
|
+
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
61
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
62
|
+
if (ar || !(i in from)) {
|
|
63
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
64
|
+
ar[i] = from[i];
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
68
|
+
};
|
|
49
69
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
50
70
|
exports.MyModelAdapterStream = void 0;
|
|
51
71
|
var event_1 = require("../../utils/event");
|
|
52
72
|
var constants_1 = require("../../constants");
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
73
|
+
/**
|
|
74
|
+
* Truncate messages based on maxMessages and maxContextLength
|
|
75
|
+
*/
|
|
76
|
+
function truncateMessages(messages, maxMessages, maxContextLength) {
|
|
77
|
+
var result = __spreadArray([], messages, true);
|
|
78
|
+
// Truncate by message count (keep the most recent messages)
|
|
79
|
+
if (maxMessages && maxMessages > 0 && result.length > maxMessages) {
|
|
80
|
+
// Always keep the first message if it's a system message
|
|
81
|
+
var firstMessage = result[0];
|
|
82
|
+
var isSystemFirst = firstMessage.role === 'system';
|
|
83
|
+
if (isSystemFirst) {
|
|
84
|
+
result = __spreadArray([firstMessage], result.slice(-(maxMessages - 1)), true);
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
result = result.slice(-maxMessages);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Truncate by total character length
|
|
91
|
+
if (maxContextLength && maxContextLength > 0) {
|
|
92
|
+
var firstMessage = result[0];
|
|
93
|
+
var isSystemFirst = firstMessage.role === 'system';
|
|
94
|
+
var otherMessages = isSystemFirst ? result.slice(1) : result;
|
|
95
|
+
// Calculate length of other messages
|
|
96
|
+
var otherMessagesLength = otherMessages.reduce(function (sum, msg) { return sum + msg.content.length; }, 0);
|
|
97
|
+
var systemMessageLength = isSystemFirst ? firstMessage.content.length : 0;
|
|
98
|
+
var totalLength = otherMessagesLength + systemMessageLength;
|
|
99
|
+
if (totalLength > maxContextLength) {
|
|
100
|
+
// Reserve 20% of maxContextLength for system message, or at least 500 chars
|
|
101
|
+
var systemMessageReserve = Math.max(500, Math.floor(maxContextLength * 0.2));
|
|
102
|
+
var availableForOther = maxContextLength - systemMessageReserve;
|
|
103
|
+
// Truncate system message if it's too long
|
|
104
|
+
var systemMessage = null;
|
|
105
|
+
if (isSystemFirst) {
|
|
106
|
+
if (firstMessage.content.length > systemMessageReserve) {
|
|
107
|
+
// Keep the beginning of system message (usually contains important instructions)
|
|
108
|
+
systemMessage = __assign(__assign({}, firstMessage), { content: firstMessage.content.substring(0, systemMessageReserve) + '...' });
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
systemMessage = firstMessage;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Truncate other messages to fit in remaining space
|
|
115
|
+
var truncated = __spreadArray([], otherMessages, true);
|
|
116
|
+
var currentLength = truncated.reduce(function (sum, msg) { return sum + msg.content.length; }, 0);
|
|
117
|
+
var systemLength = systemMessage ? systemMessage.content.length : 0;
|
|
118
|
+
// Remove oldest messages until under limit
|
|
119
|
+
while (truncated.length > 0 && (currentLength + systemLength) > maxContextLength) {
|
|
120
|
+
var removed = truncated.shift();
|
|
121
|
+
if (removed) {
|
|
122
|
+
currentLength -= removed.content.length;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// If still too long, truncate system message further
|
|
126
|
+
if (systemMessage && (currentLength + systemMessage.content.length) > maxContextLength) {
|
|
127
|
+
var maxSystemLength = Math.max(200, maxContextLength - currentLength - 100); // Leave some buffer
|
|
128
|
+
if (systemMessage.content.length > maxSystemLength) {
|
|
129
|
+
systemMessage = __assign(__assign({}, systemMessage), { content: systemMessage.content.substring(0, maxSystemLength) + '...' });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
result = systemMessage ? __spreadArray([systemMessage], truncated, true) : truncated;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return result;
|
|
136
|
+
}
|
|
137
|
+
var MyModelAdapterStream = function (llm, onBeforeChat, maxMessages, maxContextLength) {
|
|
138
|
+
if (maxMessages === void 0) { maxMessages = 20; }
|
|
139
|
+
if (maxContextLength === void 0) { maxContextLength = 8000; }
|
|
140
|
+
return ({
|
|
141
|
+
run: function (_a) {
|
|
142
|
+
return __asyncGenerator(this, arguments, function run_1(_b) {
|
|
143
|
+
var chatMessages, chunks, reply, _c, chunks_1, chunks_1_1, chunk, e_1_1, summaryResponse, title, error_1;
|
|
144
|
+
var _d, e_1, _e, _f;
|
|
145
|
+
var _g, _h, _j;
|
|
146
|
+
var messages = _b.messages, abortSignal = _b.abortSignal;
|
|
147
|
+
return __generator(this, function (_k) {
|
|
148
|
+
switch (_k.label) {
|
|
149
|
+
case 0:
|
|
150
|
+
chatMessages = messages.map(function (item) { return ({
|
|
151
|
+
role: item.role,
|
|
152
|
+
content: item.content[0].text,
|
|
153
|
+
}); });
|
|
154
|
+
// Apply truncation before onBeforeChat hook
|
|
155
|
+
chatMessages = truncateMessages(chatMessages, maxMessages, maxContextLength);
|
|
156
|
+
if (!onBeforeChat) return [3 /*break*/, 2];
|
|
157
|
+
return [4 /*yield*/, __await(onBeforeChat(chatMessages, llm))];
|
|
158
|
+
case 1:
|
|
159
|
+
chatMessages = _k.sent();
|
|
160
|
+
_k.label = 2;
|
|
161
|
+
case 2: return [4 /*yield*/, __await(llm.chat.completions.create({
|
|
162
|
+
messages: chatMessages,
|
|
163
|
+
temperature: 1,
|
|
164
|
+
stream: true,
|
|
165
|
+
}))];
|
|
166
|
+
case 3:
|
|
167
|
+
chunks = _k.sent();
|
|
168
|
+
reply = "";
|
|
169
|
+
_k.label = 4;
|
|
170
|
+
case 4:
|
|
171
|
+
_k.trys.push([4, 11, 12, 17]);
|
|
172
|
+
_c = true, chunks_1 = __asyncValues(chunks);
|
|
173
|
+
_k.label = 5;
|
|
174
|
+
case 5: return [4 /*yield*/, __await(chunks_1.next())];
|
|
175
|
+
case 6:
|
|
176
|
+
if (!(chunks_1_1 = _k.sent(), _d = chunks_1_1.done, !_d)) return [3 /*break*/, 10];
|
|
177
|
+
_f = chunks_1_1.value;
|
|
178
|
+
_c = false;
|
|
179
|
+
chunk = _f;
|
|
180
|
+
reply += ((_g = chunk.choices[0]) === null || _g === void 0 ? void 0 : _g.delta.content) || "";
|
|
181
|
+
return [4 /*yield*/, __await({
|
|
182
|
+
status: {
|
|
183
|
+
type: 'running',
|
|
184
|
+
},
|
|
185
|
+
content: [
|
|
186
|
+
{
|
|
187
|
+
text: reply,
|
|
188
|
+
type: 'text',
|
|
189
|
+
},
|
|
190
|
+
],
|
|
191
|
+
})];
|
|
192
|
+
case 7: return [4 /*yield*/, _k.sent()];
|
|
193
|
+
case 8:
|
|
194
|
+
_k.sent();
|
|
195
|
+
_k.label = 9;
|
|
196
|
+
case 9:
|
|
197
|
+
_c = true;
|
|
198
|
+
return [3 /*break*/, 5];
|
|
199
|
+
case 10: return [3 /*break*/, 17];
|
|
200
|
+
case 11:
|
|
201
|
+
e_1_1 = _k.sent();
|
|
202
|
+
e_1 = { error: e_1_1 };
|
|
203
|
+
return [3 /*break*/, 17];
|
|
204
|
+
case 12:
|
|
205
|
+
_k.trys.push([12, , 15, 16]);
|
|
206
|
+
if (!(!_c && !_d && (_e = chunks_1.return))) return [3 /*break*/, 14];
|
|
207
|
+
return [4 /*yield*/, __await(_e.call(chunks_1))];
|
|
208
|
+
case 13:
|
|
209
|
+
_k.sent();
|
|
210
|
+
_k.label = 14;
|
|
211
|
+
case 14: return [3 /*break*/, 16];
|
|
212
|
+
case 15:
|
|
213
|
+
if (e_1) throw e_1.error;
|
|
214
|
+
return [7 /*endfinally*/];
|
|
215
|
+
case 16: return [7 /*endfinally*/];
|
|
216
|
+
case 17: return [4 /*yield*/, __await({
|
|
93
217
|
status: {
|
|
94
|
-
type: '
|
|
218
|
+
type: 'complete',
|
|
219
|
+
reason: 'stop',
|
|
95
220
|
},
|
|
96
221
|
content: [
|
|
97
222
|
{
|
|
@@ -99,80 +224,45 @@ var MyModelAdapterStream = function (llm, onBeforeChat) { return ({
|
|
|
99
224
|
type: 'text',
|
|
100
225
|
},
|
|
101
226
|
],
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
_k.sent();
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
// 对话完成后,如果这是第一轮对话,生成标题
|
|
140
|
-
)];
|
|
141
|
-
case 18: return [4 /*yield*/, _k.sent()];
|
|
142
|
-
case 19:
|
|
143
|
-
_k.sent();
|
|
144
|
-
if (!(messages.length === 1)) return [3 /*break*/, 23];
|
|
145
|
-
_k.label = 20;
|
|
146
|
-
case 20:
|
|
147
|
-
_k.trys.push([20, 22, , 23]);
|
|
148
|
-
return [4 /*yield*/, __await(llm.chat.completions.create({
|
|
149
|
-
messages: [
|
|
150
|
-
{
|
|
151
|
-
role: 'system',
|
|
152
|
-
content: 'You are a title generation assistant. Please summarize a short title (no more than 10 words) based on the user\'s input, without punctuation.'
|
|
153
|
-
},
|
|
154
|
-
{
|
|
155
|
-
role: 'user',
|
|
156
|
-
content: messages[0].content[0].text
|
|
157
|
-
}
|
|
158
|
-
],
|
|
159
|
-
temperature: 0.5,
|
|
160
|
-
}))];
|
|
161
|
-
case 21:
|
|
162
|
-
summaryResponse = _k.sent();
|
|
163
|
-
title = (_j = (_h = summaryResponse.choices[0]) === null || _h === void 0 ? void 0 : _h.message.content) === null || _j === void 0 ? void 0 : _j.trim();
|
|
164
|
-
if (title) {
|
|
165
|
-
event_1.Dispatcher.instance.dispatch(constants_1.EVENT_THREAD_SET_TITLE, { data: title });
|
|
166
|
-
}
|
|
167
|
-
return [3 /*break*/, 23];
|
|
168
|
-
case 22:
|
|
169
|
-
error_1 = _k.sent();
|
|
170
|
-
console.error('Failed to generate summary title:', error_1);
|
|
171
|
-
return [3 /*break*/, 23];
|
|
172
|
-
case 23: return [2 /*return*/];
|
|
173
|
-
}
|
|
227
|
+
}
|
|
228
|
+
// 对话完成后,如果这是第一轮对话,生成标题
|
|
229
|
+
)];
|
|
230
|
+
case 18: return [4 /*yield*/, _k.sent()];
|
|
231
|
+
case 19:
|
|
232
|
+
_k.sent();
|
|
233
|
+
if (!(messages.length === 1)) return [3 /*break*/, 23];
|
|
234
|
+
_k.label = 20;
|
|
235
|
+
case 20:
|
|
236
|
+
_k.trys.push([20, 22, , 23]);
|
|
237
|
+
return [4 /*yield*/, __await(llm.chat.completions.create({
|
|
238
|
+
messages: [
|
|
239
|
+
{
|
|
240
|
+
role: 'system',
|
|
241
|
+
content: 'You are a title generation assistant. Please summarize a short title (no more than 10 words) based on the user\'s input, without punctuation.'
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
role: 'user',
|
|
245
|
+
content: messages[0].content[0].text
|
|
246
|
+
}
|
|
247
|
+
],
|
|
248
|
+
temperature: 0.5,
|
|
249
|
+
}))];
|
|
250
|
+
case 21:
|
|
251
|
+
summaryResponse = _k.sent();
|
|
252
|
+
title = (_j = (_h = summaryResponse.choices[0]) === null || _h === void 0 ? void 0 : _h.message.content) === null || _j === void 0 ? void 0 : _j.trim();
|
|
253
|
+
if (title) {
|
|
254
|
+
event_1.Dispatcher.instance.dispatch(constants_1.EVENT_THREAD_SET_TITLE, { data: title });
|
|
255
|
+
}
|
|
256
|
+
return [3 /*break*/, 23];
|
|
257
|
+
case 22:
|
|
258
|
+
error_1 = _k.sent();
|
|
259
|
+
console.error('Failed to generate summary title:', error_1);
|
|
260
|
+
return [3 /*break*/, 23];
|
|
261
|
+
case 23: return [2 /*return*/];
|
|
262
|
+
}
|
|
263
|
+
});
|
|
174
264
|
});
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
};
|
|
178
268
|
exports.MyModelAdapterStream = MyModelAdapterStream;
|
package/lib/types/assistant.d.ts
CHANGED
|
@@ -55,4 +55,16 @@ export interface AIAssistantConfig {
|
|
|
55
55
|
* }
|
|
56
56
|
*/
|
|
57
57
|
onBeforeChat?: (messages: ChatMessage[], llm: MLCEngine) => ChatMessage[] | Promise<ChatMessage[]>;
|
|
58
|
+
/**
|
|
59
|
+
* Maximum number of messages to keep in context. Older messages will be truncated.
|
|
60
|
+
* Set to 0 or undefined to disable truncation.
|
|
61
|
+
* @default 20
|
|
62
|
+
*/
|
|
63
|
+
maxMessages?: number;
|
|
64
|
+
/**
|
|
65
|
+
* Maximum total characters in the context. Messages will be truncated from the oldest if exceeded.
|
|
66
|
+
* Set to 0 or undefined to disable truncation.
|
|
67
|
+
* @default 8000
|
|
68
|
+
*/
|
|
69
|
+
maxContextLength?: number;
|
|
58
70
|
}
|