deepspider 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,8 @@
5
5
 
6
6
  > 智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent
7
7
 
8
+ [English](README_EN.md)
9
+
8
10
  从 JS 逆向到完整爬虫脚本的一站式 AI Agent 解决方案。
9
11
 
10
12
  ## 特性
@@ -139,11 +141,15 @@ pnpm test
139
141
 
140
142
  ### 使用流程
141
143
 
142
- 1. **启动**: `pnpm run agent https://target-site.com`
144
+ 1. **启动**: `deepspider https://target-site.com`
143
145
  2. **等待**: 浏览器打开,系统自动记录数据(不消耗 API)
144
146
  3. **操作**: 在网站上登录、翻页、触发目标请求
145
147
  4. **选择**: 点击面板的选择按钮 ⦿,进入选择模式
146
- 5. **分析**: 点击目标数据,确认后发送给 Agent
148
+ 5. **分析**: 点击目标数据元素,选择快捷操作:
149
+ - **追踪数据来源** — 定位选中数据的 API 接口
150
+ - **分析加密参数** — 识别并逆向加密参数
151
+ - **完整分析并生成爬虫** — 端到端:逆向、验证、生成代码
152
+ - **提取页面结构** — 分析 DOM 结构,生成选择器和字段配置
147
153
  6. **对话**: 在面板或 CLI 继续提问,深入分析
148
154
 
149
155
  ## 架构
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "deepspider",
3
- "version": "0.3.2",
3
+ "version": "0.4.0",
4
4
  "description": "智能爬虫工程平台 - 基于 DeepAgents + Patchright 的 AI 爬虫 Agent",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -23,7 +23,7 @@
23
23
  "lint": "eslint src/",
24
24
  "lint:fix": "eslint src/ --fix",
25
25
  "setup:crypto": "uv venv .venv --python 3.11 2>/dev/null || true && uv pip install -r requirements-crypto.txt",
26
- "postinstall": "patchright install chromium && npm rebuild isolated-vm 2>/dev/null || true",
26
+ "postinstall": "patchright install chromium && npm rebuild isolated-vm better-sqlite3 2>/dev/null || true",
27
27
  "prepare": "husky"
28
28
  },
29
29
  "keywords": [
@@ -57,8 +57,10 @@
57
57
  "@langchain/anthropic": "^1.3.17",
58
58
  "@langchain/core": "^1.1.24",
59
59
  "@langchain/langgraph": "^1.1.2",
60
+ "@langchain/langgraph-checkpoint-sqlite": "^1.0.1",
60
61
  "@langchain/openai": "^1.2.3",
61
62
  "@modelcontextprotocol/sdk": "^1.26.0",
63
+ "better-sqlite3": "^12.6.2",
62
64
  "crypto-js": "^4.2.0",
63
65
  "deepagents": "^1.7.6",
64
66
  "dotenv": "^17.2.3",
@@ -18,10 +18,16 @@ export class PanelBridge {
18
18
  if (!cdp) return null;
19
19
 
20
20
  try {
21
- const result = await cdp.send('Runtime.evaluate', {
22
- expression: code,
23
- returnByValue: true,
24
- });
21
+ // 3s 超时:断点暂停时 Runtime.evaluate 会永远挂住,必须限时
22
+ const result = await Promise.race([
23
+ cdp.send('Runtime.evaluate', {
24
+ expression: code,
25
+ returnByValue: true,
26
+ }),
27
+ new Promise((_, reject) =>
28
+ setTimeout(() => reject(new Error('evaluateInPage timeout (debugger paused?)')), 3000)
29
+ ),
30
+ ]);
25
31
  return result.result?.value;
26
32
  } catch (e) {
27
33
  this.debug('evaluateInPage 失败:', e.message);
@@ -29,14 +35,34 @@ export class PanelBridge {
29
35
  }
30
36
  }
31
37
 
38
+ /**
39
+ * 等待面板 JS 初始化完成
40
+ */
41
+ async waitForPanel(timeoutMs = 5000) {
42
+ const start = Date.now();
43
+ while (Date.now() - start < timeoutMs) {
44
+ const ready = await this.evaluateInPage('!!window.__deepspider__?.addStructuredMessage');
45
+ if (ready) return true;
46
+ await new Promise(r => setTimeout(r, 200));
47
+ }
48
+ return false;
49
+ }
50
+
51
+ /**
52
+ * 批量发送消息到面板(单次 CDP 调用)
53
+ */
54
+ async sendBatch(messages) {
55
+ if (!messages?.length) return;
56
+ const escaped = JSON.stringify(messages);
57
+ await this.evaluateInPage(
58
+ `(function(msgs){var ds=window.__deepspider__;if(!ds)return;msgs.forEach(function(m){ds.addStructuredMessage?.(m.type,m.data);})})(${escaped})`
59
+ );
60
+ }
61
+
32
62
  /**
33
63
  * 发送结构化消息到前端面板
34
64
  */
35
65
  async sendMessage(type, data) {
36
- const browser = this.getBrowser();
37
- const page = browser?.getPage?.();
38
- if (!page) return;
39
-
40
66
  try {
41
67
  const escapedType = JSON.stringify(type);
42
68
  const escapedData = JSON.stringify(data);
@@ -13,6 +13,31 @@ function cleanDSML(text) {
13
13
  return text ? text.replace(DSML_PATTERN, '') : text;
14
14
  }
15
15
 
16
+ // 流式事件停滞超时(单个事件间隔上限)
17
+ const STALL_TIMEOUT_MS = 150000; // 150s — 超过此时间无新事件则中断流
18
+
19
+ /**
20
+ * 包装异步迭代器,每个 next() 加独立超时
21
+ * 防止 LLM API 或 middleware 无响应时 for-await 永久挂起
22
+ */
23
+ async function* withStallTimeout(asyncIterator, timeoutMs = STALL_TIMEOUT_MS) {
24
+ while (true) {
25
+ let timer;
26
+ const result = await Promise.race([
27
+ asyncIterator.next(),
28
+ new Promise((_, reject) => {
29
+ timer = setTimeout(
30
+ () => reject(new Error(`Stream timeout: no events for ${Math.round(timeoutMs / 1000)}s`)),
31
+ timeoutMs,
32
+ );
33
+ }),
34
+ ]);
35
+ clearTimeout(timer);
36
+ if (result.done) break;
37
+ yield result.value;
38
+ }
39
+ }
40
+
16
41
  // 人工介入配置
17
42
  const INTERVENTION_CONFIG = {
18
43
  idleTimeoutMs: 120000, // 2分钟无响应触发提示
@@ -63,7 +88,7 @@ export class StreamHandler {
63
88
  );
64
89
 
65
90
  this.debug('chatStream: 开始遍历事件');
66
- for await (const event of eventStream) {
91
+ for await (const event of withStallTimeout(eventStream)) {
67
92
  lastEventTime = Date.now();
68
93
  eventCount++;
69
94
 
@@ -73,10 +98,12 @@ export class StreamHandler {
73
98
 
74
99
  await this._handleStreamEvent(event);
75
100
 
76
- if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
101
+ if (event.event === 'on_chat_model_end') {
77
102
  const output = event.data?.output;
78
103
  if (output?.content) {
79
- finalResponse = output.content;
104
+ finalResponse = typeof output.content === 'string'
105
+ ? output.content
106
+ : output.content.filter(c => c.type === 'text').map(c => c.text).join('');
80
107
  this.debug(`chatStream: 收到最终响应, 长度=${finalResponse.length}`);
81
108
  }
82
109
  }
@@ -86,10 +113,15 @@ export class StreamHandler {
86
113
  console.log(`\n[完成] 共处理 ${eventCount} 个事件`);
87
114
 
88
115
  // 发送剩余累积文本
89
- await this._flushFullResponse();
116
+ const flushed = await this._flushFullResponse();
90
117
 
91
118
  // 检测 interrupt 并渲染到面板
92
- await this._checkAndRenderInterrupt();
119
+ const hasInterrupt = await this._checkAndRenderInterrupt();
120
+
121
+ // 兜底:如果没有文本输出也没有 interrupt,发送完成通知
122
+ if (!flushed && !hasInterrupt && eventCount > 0 && lastToolCall) {
123
+ await this.panelBridge.sendToPanel('system', '✅ 任务完成');
124
+ }
93
125
 
94
126
  await this.panelBridge.setBusy(false);
95
127
 
@@ -126,23 +158,31 @@ export class StreamHandler {
126
158
  { ...this.config, version: 'v2' }
127
159
  );
128
160
 
129
- for await (const event of eventStream) {
161
+ for await (const event of withStallTimeout(eventStream)) {
130
162
  lastEventTime = Date.now();
131
163
  eventCount++;
132
164
  await this._handleStreamEvent(event);
133
165
 
134
- if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
166
+ if (event.event === 'on_chat_model_end') {
135
167
  const output = event.data?.output;
136
168
  if (output?.content) {
137
- finalResponse = output.content;
169
+ finalResponse = typeof output.content === 'string'
170
+ ? output.content
171
+ : output.content.filter(c => c.type === 'text').map(c => c.text).join('');
138
172
  }
139
173
  }
140
174
  }
141
175
 
142
176
  clearInterval(heartbeat);
143
177
 
144
- await this._flushFullResponse();
145
- await this._checkAndRenderInterrupt();
178
+ const flushed = await this._flushFullResponse();
179
+ const hasInterrupt = await this._checkAndRenderInterrupt();
180
+
181
+ // 兜底:如果没有文本输出也没有 interrupt,发送完成通知
182
+ if (!flushed && !hasInterrupt && eventCount > 0) {
183
+ await this.panelBridge.sendToPanel('system', '✅ 任务完成');
184
+ }
185
+
146
186
  await this.panelBridge.setBusy(false);
147
187
 
148
188
  console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
@@ -168,6 +208,23 @@ export class StreamHandler {
168
208
  await this.panelBridge.setBusy(true);
169
209
  this.debug(`chatStreamResume: 从检查点恢复, retryCount=${retryCount}`);
170
210
 
211
+ // 恢复前:检查 checkpoint 是否有实际消息
212
+ if (retryCount === 0) {
213
+ try {
214
+ const state = await this.agent.getState(this.config);
215
+ const messages = state?.values?.messages;
216
+ if (!messages?.length) {
217
+ console.log('[恢复] checkpoint 无历史消息,跳过恢复');
218
+ await this.panelBridge.sendToPanel('system', '该会话无历史记录,请重新开始分析');
219
+ await this.panelBridge.setBusy(false);
220
+ return '[无历史消息]';
221
+ }
222
+ await this._restoreHistoryToPanel(messages);
223
+ } catch (e) {
224
+ this.debug('chatStreamResume: getState 失败:', e.message);
225
+ }
226
+ }
227
+
171
228
  const heartbeat = setInterval(() => {
172
229
  const elapsed = Math.round((Date.now() - lastEventTime) / 1000);
173
230
  if (elapsed > 30) {
@@ -181,23 +238,30 @@ export class StreamHandler {
181
238
  { ...this.config, version: 'v2' }
182
239
  );
183
240
 
184
- for await (const event of eventStream) {
241
+ for await (const event of withStallTimeout(eventStream)) {
185
242
  lastEventTime = Date.now();
186
243
  eventCount++;
187
244
  await this._handleStreamEvent(event);
188
245
 
189
- if (event.event === 'on_chat_model_end' && event.name === 'ChatOpenAI') {
246
+ if (event.event === 'on_chat_model_end') {
190
247
  const output = event.data?.output;
191
248
  if (output?.content) {
192
- finalResponse = output.content;
249
+ finalResponse = typeof output.content === 'string'
250
+ ? output.content
251
+ : output.content.filter(c => c.type === 'text').map(c => c.text).join('');
193
252
  }
194
253
  }
195
254
  }
196
255
 
197
256
  clearInterval(heartbeat);
198
257
 
199
- await this._flushFullResponse();
200
- await this._checkAndRenderInterrupt();
258
+ const flushed2 = await this._flushFullResponse();
259
+ const hasInterrupt2 = await this._checkAndRenderInterrupt();
260
+
261
+ if (!flushed2 && !hasInterrupt2 && eventCount > 0) {
262
+ await this.panelBridge.sendToPanel('system', '✅ 任务完成');
263
+ }
264
+
201
265
  await this.panelBridge.setBusy(false);
202
266
 
203
267
  console.log(`\n[恢复完成] 共处理 ${eventCount} 个事件`);
@@ -219,14 +283,49 @@ export class StreamHandler {
219
283
  }
220
284
  }
221
285
 
286
+ /**
287
+ * 从 checkpoint 恢复历史消息到前端面板
288
+ */
289
+ async _restoreHistoryToPanel(messages) {
290
+ try {
291
+ if (!messages?.length) return;
292
+ this.debug(`_restoreHistoryToPanel: ${messages.length} 条历史消息`);
293
+
294
+ const batch = [];
295
+ for (const msg of messages) {
296
+ const type = msg._getType?.() || msg.constructor?.name;
297
+ const content = Array.isArray(msg.content)
298
+ ? msg.content.filter(c => c.type === 'text').map(c => c.text).join('')
299
+ : (typeof msg.content === 'string' ? msg.content : '');
300
+ if (!content.trim()) continue;
301
+
302
+ if (type === 'human') {
303
+ batch.push({ type: 'user', data: { content } });
304
+ } else if (type === 'ai') {
305
+ batch.push({ type: 'text', data: { content } });
306
+ } else if (type === 'tool') {
307
+ const summary = content.length > 200 ? content.slice(0, 200) + '...' : content;
308
+ batch.push({ type: 'system', data: { content: `[工具结果] ${summary}` } });
309
+ }
310
+ }
311
+ await this.panelBridge.sendBatch(batch);
312
+ } catch (e) {
313
+ this.debug('_restoreHistoryToPanel 失败:', e.message);
314
+ }
315
+ }
316
+
222
317
  /**
223
318
  * 发送剩余累积文本到面板
319
+ * 返回 true 如果有文本被发送
224
320
  */
225
321
  async _flushFullResponse() {
226
322
  if (this.fullResponse?.trim()) {
227
323
  await this.panelBridge.sendToPanel('assistant', this.fullResponse);
324
+ this.fullResponse = '';
325
+ return true;
228
326
  }
229
327
  this.fullResponse = '';
328
+ return false;
230
329
  }
231
330
 
232
331
  /**
@@ -7,8 +7,8 @@
7
7
  import 'dotenv/config';
8
8
  import { StateBackend, FilesystemBackend, createFilesystemMiddleware, createPatchToolCallsMiddleware } from 'deepagents';
9
9
  import { createAgent, toolRetryMiddleware, summarizationMiddleware, anthropicPromptCachingMiddleware, todoListMiddleware, humanInTheLoopMiddleware } from 'langchain';
10
- import { ChatOpenAI } from '@langchain/openai';
11
- import { MemorySaver } from '@langchain/langgraph';
10
+ import { ChatAnthropic } from '@langchain/anthropic';
11
+ import { SqliteSaver } from '@langchain/langgraph-checkpoint-sqlite';
12
12
 
13
13
  import { coreTools } from './tools/index.js';
14
14
  import { allSubagents } from './subagents/index.js';
@@ -17,7 +17,10 @@ import { createReportMiddleware } from './middleware/report.js';
17
17
  import { createFilterToolsMiddleware } from './middleware/filterTools.js';
18
18
  import { createCustomSubAgentMiddleware } from './middleware/subagent.js';
19
19
  import { createToolGuardMiddleware } from './middleware/toolGuard.js';
20
+ import { createToolCallLimitMiddleware } from './subagents/factory.js';
20
21
  import { createValidationWorkflowMiddleware } from './middleware/validationWorkflow.js';
22
+ import { createMemoryFlushMiddleware } from './middleware/memoryFlush.js';
23
+ import { createToolAvailabilityMiddleware } from './middleware/toolAvailability.js';
21
24
 
22
25
  // createDeepAgent 内部拼接的 BASE_PROMPT
23
26
  const BASE_PROMPT = 'In order to complete the objective that the user asks of you, you have access to a number of standard tools.';
@@ -29,9 +32,48 @@ const config = {
29
32
  model: process.env.DEEPSPIDER_MODEL || 'gpt-4o',
30
33
  };
31
34
 
35
+ /**
36
+ * 递归移除 JSON Schema 中 Anthropic API 不支持的关键字
37
+ * Zod v4 的 toJSONSchema 会生成 $schema 和 propertyNames,Anthropic 拒绝
38
+ * additionalProperties: {} 空对象也不被接受,改成 true
39
+ */
40
+ function stripUnsupportedSchemaKeys(obj) {
41
+ if (!obj || typeof obj !== 'object') return obj;
42
+ if (Array.isArray(obj)) return obj.map(stripUnsupportedSchemaKeys);
43
+ const res = {};
44
+ for (const k in obj) {
45
+ if (k === '$schema' || k === 'propertyNames') continue;
46
+ // additionalProperties: {} → true (空对象等于"任意类型",但Anthropic不接受空对象)
47
+ if (k === 'additionalProperties' && obj[k] !== null && typeof obj[k] === 'object' && Object.keys(obj[k]).length === 0) {
48
+ res[k] = true;
49
+ continue;
50
+ }
51
+ res[k] = stripUnsupportedSchemaKeys(obj[k]);
52
+ }
53
+ return res;
54
+ }
55
+
56
+ /**
57
+ * 自定义 fetch:拦截 LLM API 请求,strip 工具 schema 中 Zod v4 生成的不兼容字段
58
+ * 保留作为安全网,防止 $schema / propertyNames / additionalProperties:{} 泄漏到 API
59
+ */
60
+ const _origFetch = globalThis.fetch;
61
+ globalThis.fetch = async function(url, opts) {
62
+ if (opts?.body && typeof opts.body === 'string' && opts.body.includes('"tools"')) {
63
+ try {
64
+ const body = JSON.parse(opts.body);
65
+ if (body.tools) {
66
+ body.tools = stripUnsupportedSchemaKeys(body.tools);
67
+ opts = { ...opts, body: JSON.stringify(body) };
68
+ }
69
+ } catch { /* ignore parse errors on non-LLM requests */ }
70
+ }
71
+ return _origFetch(url, opts);
72
+ };
73
+
32
74
  /**
33
75
  * 创建 LLM 模型实例
34
- * 使用 ChatOpenAI 兼容 OpenAI 格式的任意供应商
76
+ * 使用 ChatAnthropic 发送原生 Anthropic 格式,避免代理的 OpenAI→Anthropic 转换引入 schema 错误
35
77
  */
36
78
  function createModel(options = {}) {
37
79
  const {
@@ -40,10 +82,13 @@ function createModel(options = {}) {
40
82
  baseUrl = config.baseUrl,
41
83
  } = options;
42
84
 
43
- return new ChatOpenAI({
85
+ // ChatAnthropic 的 baseURL 不含 /v1(SDK 自动拼接)
86
+ const anthropicBaseUrl = baseUrl?.replace(/\/v1\/?$/, '') || undefined;
87
+
88
+ return new ChatAnthropic({
44
89
  model,
45
- apiKey,
46
- configuration: baseUrl ? { baseURL: baseUrl } : undefined,
90
+ anthropicApiKey: apiKey,
91
+ anthropicApiUrl: anthropicBaseUrl,
47
92
  temperature: 0,
48
93
  });
49
94
  }
@@ -59,18 +104,27 @@ export function createDeepSpiderAgent(options = {}) {
59
104
  enableMemory = true,
60
105
  enableInterrupt = false,
61
106
  onReportReady = null, // 报告就绪回调
107
+ onFileSaved = null, // 文件保存通知回调
108
+ checkpointer,
62
109
  } = options;
63
110
 
64
- // 创建 LLM 模型实例
111
+ // 创建 LLM 模型实例(加 timeout 防止 API 无响应时 streamEvents 永久挂起)
65
112
  const llm = createModel({ model, apiKey, baseUrl });
113
+ llm.timeout = 120000; // 120s — 主 LLM 超时
114
+
115
+ // 摘要专用 LLM:故意不设 timeout
116
+ // 原因:summarizationMiddleware 的 createSummary 有 try-catch,超时会返回错误字符串,
117
+ // 但 beforeModel 仍会用这个错误字符串替换所有原始消息(REMOVE_ALL_MESSAGES),导致数据丢失。
118
+ // 安全网由 StreamHandler.withStallTimeout (150s) 提供 — 它在 BeforeModelNode 完成前触发,
119
+ // 不会写入 checkpoint,原始数据得以保留。
120
+ const summaryLlm = createModel({ model, apiKey, baseUrl });
66
121
 
67
122
  // 后端配置:使用文件系统持久化
68
123
  const backend = enableMemory
69
124
  ? new FilesystemBackend({ rootDir: './.deepspider-agent' })
70
125
  : new StateBackend();
71
126
 
72
- // Checkpointer:保存对话状态,支持断点恢复
73
- const checkpointer = new MemorySaver();
127
+ const resolvedCheckpointer = checkpointer ?? SqliteSaver.fromConnString(':memory:');
74
128
 
75
129
  // 人机交互配置
76
130
  const interruptOn = enableInterrupt
@@ -84,7 +138,7 @@ export function createDeepSpiderAgent(options = {}) {
84
138
  const subagentDefaultMiddleware = [
85
139
  todoListMiddleware(),
86
140
  createFilesystemMiddleware({ backend }),
87
- summarizationMiddleware({ model: llm, trigger: { tokens: 170000 }, keep: { messages: 6 } }),
141
+ summarizationMiddleware({ model: summaryLlm, trigger: { tokens: 100000 }, keep: { messages: 6 } }),
88
142
  anthropicPromptCachingMiddleware({ unsupportedModelBehavior: 'ignore' }),
89
143
  createPatchToolCallsMiddleware(),
90
144
  ];
@@ -107,7 +161,10 @@ export function createDeepSpiderAgent(options = {}) {
107
161
  generalPurposeAgent: false,
108
162
  defaultInterruptOn: interruptOn,
109
163
  }),
110
- summarizationMiddleware({ model: llm, trigger: { tokens: 170000 }, keep: { messages: 6 } }),
164
+ // === 预警 + 拦截(在 summarization 之前)===
165
+ createMemoryFlushMiddleware(),
166
+ createToolAvailabilityMiddleware(),
167
+ summarizationMiddleware({ model: summaryLlm, trigger: { tokens: 100000 }, keep: { messages: 6 } }),
111
168
  anthropicPromptCachingMiddleware({ unsupportedModelBehavior: 'ignore' }),
112
169
  createPatchToolCallsMiddleware(),
113
170
  // === HITL(如果启用)===
@@ -122,15 +179,16 @@ export function createDeepSpiderAgent(options = {}) {
122
179
  },
123
180
  }),
124
181
  createToolGuardMiddleware(),
182
+ createToolCallLimitMiddleware(200),
125
183
  createFilterToolsMiddleware(),
126
184
  createValidationWorkflowMiddleware(),
127
- createReportMiddleware({ onReportReady }),
185
+ createReportMiddleware({ onReportReady, onFileSaved }),
128
186
  ],
129
- checkpointer,
187
+ checkpointer: resolvedCheckpointer,
130
188
  });
131
189
  }
132
190
 
133
- // 默认导出
191
+ // 默认导出(内存模式,兼容 MCP 等非 CLI 场景)
134
192
  export const agent = createDeepSpiderAgent();
135
193
 
136
194
  export default agent;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * DeepSpider - Memory Flush 中间件
3
+ * 在 summarization 触发前(85k token),注入 SystemMessage 提醒 Agent 保存关键进度
4
+ */
5
+
6
+ import { createMiddleware, countTokensApproximately } from 'langchain';
7
+ import { SystemMessage } from '@langchain/core/messages';
8
+
9
+ const FLUSH_THRESHOLD = 85000;
10
+
11
+ const FLUSH_REMINDER = `⚠️ 上下文即将被压缩(当前接近 token 上限)。
12
+ 请立即使用 save_memo 工具保存以下关键信息,否则压缩后将丢失:
13
+ 1. 当前分析目标和已完成的步骤
14
+ 2. 已发现的关键参数、加密逻辑、请求链路
15
+ 3. 下一步计划
16
+
17
+ 保存后继续正常工作。`;
18
+
19
+ export function createMemoryFlushMiddleware() {
20
+ let flushed = false;
21
+
22
+ return createMiddleware({
23
+ name: 'memoryFlushMiddleware',
24
+
25
+ beforeModel: async (state) => {
26
+ const tokens = countTokensApproximately(state.messages);
27
+
28
+ // token 骤降(summarization 已执行),重置标记
29
+ if (flushed && tokens < FLUSH_THRESHOLD * 0.5) {
30
+ flushed = false;
31
+ }
32
+
33
+ // 达到阈值且未提醒过,注入提醒
34
+ if (!flushed && tokens >= FLUSH_THRESHOLD) {
35
+ flushed = true;
36
+ return {
37
+ ...state,
38
+ messages: [
39
+ ...state.messages,
40
+ new SystemMessage(FLUSH_REMINDER),
41
+ ],
42
+ };
43
+ }
44
+
45
+ return state;
46
+ },
47
+ });
48
+ }