deepspider 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.env.example +3 -0
  2. package/README.md +13 -13
  3. package/package.json +6 -6
  4. package/src/agent/core/PanelBridge.js +28 -76
  5. package/src/agent/core/StreamHandler.js +139 -14
  6. package/src/agent/index.js +51 -12
  7. package/src/agent/logger.js +183 -8
  8. package/src/agent/middleware/report.js +41 -15
  9. package/src/agent/middleware/subagent.js +233 -0
  10. package/src/agent/middleware/toolGuard.js +77 -0
  11. package/src/agent/middleware/validationWorkflow.js +171 -0
  12. package/src/agent/prompts/system.js +181 -59
  13. package/src/agent/run.js +41 -6
  14. package/src/agent/skills/crawler/SKILL.md +64 -3
  15. package/src/agent/skills/crawler/evolved.md +9 -1
  16. package/src/agent/skills/dynamic-analysis/SKILL.md +74 -7
  17. package/src/agent/skills/env/SKILL.md +75 -0
  18. package/src/agent/skills/sandbox/SKILL.md +35 -0
  19. package/src/agent/skills/static-analysis/SKILL.md +98 -2
  20. package/src/agent/subagents/anti-detect.js +10 -20
  21. package/src/agent/subagents/captcha.js +7 -19
  22. package/src/agent/subagents/crawler.js +25 -37
  23. package/src/agent/subagents/factory.js +109 -9
  24. package/src/agent/subagents/index.js +4 -13
  25. package/src/agent/subagents/js2python.js +7 -19
  26. package/src/agent/subagents/reverse.js +180 -0
  27. package/src/agent/tools/analysis.js +84 -1
  28. package/src/agent/tools/anti-detect.js +5 -2
  29. package/src/agent/tools/browser.js +160 -0
  30. package/src/agent/tools/capture.js +24 -3
  31. package/src/agent/tools/correlate.js +129 -15
  32. package/src/agent/tools/crawler.js +2 -1
  33. package/src/agent/tools/crawlerGenerator.js +90 -0
  34. package/src/agent/tools/debug.js +43 -6
  35. package/src/agent/tools/evolve.js +5 -2
  36. package/src/agent/tools/extractor.js +5 -1
  37. package/src/agent/tools/file.js +14 -5
  38. package/src/agent/tools/generateHook.js +66 -0
  39. package/src/agent/tools/hookManager.js +19 -9
  40. package/src/agent/tools/index.js +33 -20
  41. package/src/agent/tools/nodejs.js +41 -6
  42. package/src/agent/tools/sandbox.js +21 -1
  43. package/src/agent/tools/scratchpad.js +70 -0
  44. package/src/agent/tools/tracing.js +26 -0
  45. package/src/agent/tools/verifyAlgorithm.js +117 -0
  46. package/src/browser/EnvBridge.js +27 -13
  47. package/src/browser/client.js +124 -18
  48. package/src/browser/collector.js +101 -22
  49. package/src/browser/defaultHooks.js +3 -1
  50. package/src/browser/hooks/index.js +5 -0
  51. package/src/browser/interceptors/AntiDebugInterceptor.js +132 -0
  52. package/src/browser/interceptors/NetworkInterceptor.js +76 -12
  53. package/src/browser/interceptors/ScriptInterceptor.js +32 -7
  54. package/src/browser/interceptors/index.js +1 -0
  55. package/src/browser/ui/analysisPanel.js +469 -464
  56. package/src/cli/commands/config.js +11 -3
  57. package/src/config/paths.js +9 -1
  58. package/src/config/settings.js +7 -1
  59. package/src/core/PatchGenerator.js +24 -4
  60. package/src/core/Sandbox.js +140 -3
  61. package/src/env/EnvCodeGenerator.js +60 -88
  62. package/src/env/modules/bom/history.js +6 -0
  63. package/src/env/modules/bom/location.js +6 -0
  64. package/src/env/modules/bom/navigator.js +13 -0
  65. package/src/env/modules/bom/screen.js +6 -0
  66. package/src/env/modules/bom/storage.js +7 -0
  67. package/src/env/modules/dom/document.js +14 -0
  68. package/src/env/modules/dom/event.js +4 -0
  69. package/src/env/modules/index.js +27 -10
  70. package/src/env/modules/webapi/fetch.js +4 -0
  71. package/src/env/modules/webapi/url.js +4 -0
  72. package/src/env/modules/webapi/xhr.js +8 -0
  73. package/src/store/DataStore.js +125 -42
  74. package/src/store/Store.js +2 -1
  75. package/src/agent/subagents/dynamic.js +0 -64
  76. package/src/agent/subagents/env-agent.js +0 -82
  77. package/src/agent/subagents/sandbox.js +0 -55
  78. package/src/agent/subagents/static.js +0 -66
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { z } from 'zod';
7
7
  import { tool } from '@langchain/core/tools';
8
+ import { getDataStore } from '../../store/DataStore.js';
8
9
 
9
10
  /**
10
11
  * 分析请求-加密关联
@@ -74,22 +75,40 @@ export const analyzeCorrelation = tool(
74
75
 
75
76
  /**
76
77
  * 解析调用栈顶部
78
+ * 支持两种格式:
79
+ * 1. 字符串栈(来自 Error.stack)
80
+ * 2. callFrames 数组(来自 CDP initiator)
77
81
  */
78
82
  function parseStackTop(stack) {
79
83
  if (!stack) return null;
80
- const lines = stack.split('\n').slice(2, 5);
81
- return lines.map(line => {
82
- const match = line.match(/at\s+(.+?)\s+\((.+?):(\d+):(\d+)\)/) ||
83
- line.match(/at\s+(.+?):(\d+):(\d+)/);
84
- if (match) {
85
- return {
86
- func: match[1] || 'anonymous',
87
- file: match[2] || match[1],
88
- line: parseInt(match[3] || match[2])
89
- };
90
- }
91
- return { raw: line.trim() };
92
- });
84
+
85
+ // 处理 callFrames 数组格式(来自 CDP initiator)
86
+ if (Array.isArray(stack)) {
87
+ return stack.slice(0, 3).map(frame => ({
88
+ func: frame.functionName || frame.func || '(anonymous)',
89
+ file: frame.url || frame.file || '',
90
+ line: frame.lineNumber || frame.line || 0
91
+ }));
92
+ }
93
+
94
+ // 处理字符串栈格式(来自 Error.stack)
95
+ if (typeof stack === 'string') {
96
+ const lines = stack.split('\n').slice(2, 5);
97
+ return lines.map(line => {
98
+ const match = line.match(/at\s+(.+?)\s+\((.+?):(\d+):(\d+)\)/) ||
99
+ line.match(/at\s+(.+?):(\d+):(\d+)/);
100
+ if (match) {
101
+ return {
102
+ func: match[1] || 'anonymous',
103
+ file: match[2] || match[1],
104
+ line: parseInt(match[3] || match[2])
105
+ };
106
+ }
107
+ return { raw: line.trim() };
108
+ });
109
+ }
110
+
111
+ return null;
93
112
  }
94
113
 
95
114
  /**
@@ -183,11 +202,12 @@ export const analyzeCookieEncryption = tool(
183
202
  async ({ logs, cookieName }) => {
184
203
  const parsed = typeof logs === 'string' ? JSON.parse(logs) : logs;
185
204
 
186
- // 找到设置该 cookie 的日志
205
+ // 找到设置该 cookie 的日志(匹配 cookie 键名)
187
206
  const cookieLogs = parsed.filter(entry => {
188
207
  if (entry._type !== 'cookie') return false;
189
208
  if (entry.action !== 'write') return false;
190
- return entry.value?.includes(cookieName);
209
+ // cookie hook 日志的 value 格式为 "name=value",匹配键名部分
210
+ return entry.value?.startsWith(cookieName + '=') || entry.name === cookieName;
191
211
  });
192
212
 
193
213
  if (cookieLogs.length === 0) {
@@ -294,10 +314,104 @@ export const analyzeResponseDecryption = tool(
294
314
  }
295
315
  );
296
316
 
317
+ /**
318
+ * 识别加密模式
319
+ */
320
+ function identifyPattern(value) {
321
+ if (/^[0-9a-fA-F]+$/.test(value)) {
322
+ if (value.length === 32) return 'hash-md5';
323
+ if (value.length === 40) return 'hash-sha1';
324
+ if (value.length === 64) return 'hash-sha256';
325
+ return 'hex';
326
+ }
327
+ if (/^[A-Za-z0-9+/]{20,}={0,2}$/.test(value)) return 'base64';
328
+ if (/^ey[A-Za-z0-9_-]+\./.test(value)) return 'jwt';
329
+ return 'unknown';
330
+ }
331
+
332
+ /**
333
+ * 判断值是否像加密结果
334
+ */
335
+ function looksEncrypted(value) {
336
+ if (/^[0-9a-fA-F]{32,}$/.test(value)) return true;
337
+ if (/^[A-Za-z0-9+/]{20,}={0,2}$/.test(value)) return true;
338
+ if (/^ey[A-Za-z0-9_-]+\./.test(value)) return true;
339
+ return false;
340
+ }
341
+
342
+ /**
343
+ * 解析请求 body
344
+ */
345
+ function parseBody(body) {
346
+ if (!body) return {};
347
+ try {
348
+ return JSON.parse(body);
349
+ } catch {
350
+ // 尝试 form-urlencoded
351
+ try {
352
+ return Object.fromEntries(new URLSearchParams(body));
353
+ } catch {
354
+ return { _raw: body.slice(0, 200) };
355
+ }
356
+ }
357
+ }
358
+
359
+ /**
360
+ * 分析请求参数结构(不依赖 Hook 日志)
361
+ */
362
+ export const analyzeRequestParams = tool(
363
+ async ({ site, id }) => {
364
+ const store = getDataStore();
365
+ const detail = await store.getResponse(site, id);
366
+ if (!detail) return JSON.stringify({ error: '未找到该请求' });
367
+
368
+ let urlParams = {};
369
+ try {
370
+ urlParams = Object.fromEntries(new URL(detail.url).searchParams);
371
+ } catch { /* invalid URL */ }
372
+
373
+ const bodyParams = parseBody(detail.requestBody);
374
+
375
+ // 识别可疑参数
376
+ const suspiciousParams = [];
377
+ const allParams = { ...urlParams, ...bodyParams };
378
+ for (const [key, value] of Object.entries(allParams)) {
379
+ const str = String(value);
380
+ if (str.length > 20 && looksEncrypted(str)) {
381
+ suspiciousParams.push({
382
+ name: key,
383
+ value: str.slice(0, 50) + (str.length > 50 ? '...' : ''),
384
+ length: str.length,
385
+ pattern: identifyPattern(str),
386
+ });
387
+ }
388
+ }
389
+
390
+ return JSON.stringify({
391
+ url: detail.url,
392
+ method: detail.method,
393
+ urlParams,
394
+ bodyParams: typeof bodyParams === 'object' && !bodyParams._raw
395
+ ? bodyParams : { _raw: detail.requestBody?.slice(0, 200) },
396
+ suspiciousParams,
397
+ initiator: detail.initiator || null,
398
+ }, null, 2);
399
+ },
400
+ {
401
+ name: 'analyze_request_params',
402
+ description: '分析请求的参数结构,自动识别可疑的加密参数(hex/base64/hash)。不依赖 Hook 日志,可直接使用。',
403
+ schema: z.object({
404
+ site: z.string().describe('站点 hostname'),
405
+ id: z.string().describe('请求 ID'),
406
+ }),
407
+ }
408
+ );
409
+
297
410
  export const correlateTools = [
298
411
  analyzeCorrelation,
299
412
  locateCryptoSource,
300
413
  analyzeHeaderEncryption,
301
414
  analyzeCookieEncryption,
302
415
  analyzeResponseDecryption,
416
+ analyzeRequestParams,
303
417
  ];
@@ -113,4 +113,5 @@ export const e2eTest = tool(
113
113
  }
114
114
  );
115
115
 
116
- export const crawlerTools = [siteAnalyze, complexityAssess, e2eTest];
116
+ // e2eTest 暂未实现(stub),不导出到子代理工具列表,避免子代理浪费轮次调用空壳工具
117
+ export const crawlerTools = [siteAnalyze, complexityAssess];
@@ -0,0 +1,90 @@
1
+ /**
2
+ * DeepSpider - 爬虫代码生成工具
3
+ * 通过 LangGraph interrupt 机制实现面板交互式选择
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import { interrupt } from '@langchain/langgraph';
9
+
10
+ /**
11
+ * 请求用户选择爬虫框架并生成代码
12
+ * interrupt payload 遵循统一协议,StreamHandler 自动渲染到面板
13
+ */
14
+ export const generateCrawlerWithConfirm = tool(
15
+ async ({ analysisSummary, domain }) => {
16
+ const userChoice = interrupt({
17
+ type: 'choices',
18
+ question: '分析完成!选择爬虫框架生成完整脚本:',
19
+ options: [
20
+ { id: 'requests', label: 'requests', description: '简单易用,适合快速原型' },
21
+ { id: 'httpx', label: 'httpx', description: '异步高性能,适合大规模并发' },
22
+ { id: 'scrapy', label: 'Scrapy', description: '企业级框架,适合复杂项目' },
23
+ { id: 'skip', label: '不需要', description: '仅保存当前分析结果' },
24
+ ],
25
+ });
26
+
27
+ return JSON.stringify({
28
+ success: true,
29
+ framework: userChoice,
30
+ domain,
31
+ message: userChoice === '不需要'
32
+ ? '用户选择不生成爬虫脚本'
33
+ : `用户选择使用 ${userChoice} 框架生成爬虫`,
34
+ });
35
+ },
36
+ {
37
+ name: 'generate_crawler_code',
38
+ description: `分析完成后,向用户展示可点击的框架选项(requests/httpx/Scrapy/不需要)。
39
+
40
+ 用户点击后,工具返回用户选择的框架名称。根据返回值委托 crawler 子代理生成代码。`,
41
+ schema: z.object({
42
+ analysisSummary: z.string().describe('分析结果摘要'),
43
+ domain: z.string().describe('目标网站域名'),
44
+ }),
45
+ }
46
+ );
47
+
48
+ /**
49
+ * 根据用户选择的框架委托 crawler 子代理生成代码
50
+ */
51
+ export const delegateCrawlerGeneration = tool(
52
+ async ({ framework, config, domain }) => {
53
+ return JSON.stringify({
54
+ success: true,
55
+ ready: true,
56
+ framework,
57
+ config,
58
+ domain,
59
+ message: `准备使用 ${framework} 框架生成爬虫,请调用 task 工具委托 crawler 子代理`,
60
+ });
61
+ },
62
+ {
63
+ name: 'delegate_crawler_generation',
64
+ description: '准备参数,委托 crawler 子代理生成特定框架的爬虫代码',
65
+ schema: z.object({
66
+ framework: z.enum(['requests', 'httpx', 'scrapy']).describe('用户选择的爬虫框架'),
67
+ config: z.object({
68
+ url: z.string(),
69
+ stages: z.array(z.object({
70
+ name: z.string(),
71
+ fields: z.array(z.object({
72
+ name: z.string(),
73
+ xpath: z.string(),
74
+ type: z.string(),
75
+ })),
76
+ entry: z.any().nullable(),
77
+ pagination: z.any().nullable(),
78
+ })),
79
+ }).describe('爬虫配置'),
80
+ domain: z.string().describe('目标网站域名'),
81
+ }),
82
+ }
83
+ );
84
+
85
+ export const crawlerGeneratorTools = [
86
+ generateCrawlerWithConfirm,
87
+ delegateCrawlerGeneration,
88
+ ];
89
+
90
+ export default crawlerGeneratorTools;
@@ -6,6 +6,7 @@ import { z } from 'zod';
6
6
  import { tool } from '@langchain/core/tools';
7
7
  import { getBrowser } from '../../browser/index.js';
8
8
  import { CDPSession } from '../../browser/cdp.js';
9
+ import { logStore } from '../logger.js';
9
10
 
10
11
  let cdpSession = null;
11
12
  let isPaused = false;
@@ -19,18 +20,26 @@ async function getSession() {
19
20
  const browser = await getBrowser();
20
21
  cdpSession = await CDPSession.fromBrowser(browser);
21
22
 
22
- // 监听暂停事件
23
+ // 过滤反调试 debugger 语句的噪音:只在命中我们设的断点时打日志
24
+ let lastPauseIsBreakpoint = false;
25
+
23
26
  cdpSession.on('Debugger.paused', (params) => {
24
- isPaused = true;
25
- currentCallFrames = params.callFrames || [];
26
- console.log('[debug] Debugger paused, callFrames:', currentCallFrames.length);
27
+ lastPauseIsBreakpoint = params.reason === 'breakpoint' || params.hitBreakpoints?.length > 0;
28
+ if (lastPauseIsBreakpoint) {
29
+ isPaused = true;
30
+ currentCallFrames = params.callFrames || [];
31
+ const top = currentCallFrames[0];
32
+ const func = top?.functionName || '(anonymous)';
33
+ const url = top?.url?.split('/').pop() || top?.url || '?';
34
+ const line = top?.location?.lineNumber ?? '?';
35
+ console.log(`[debug] Breakpoint hit: ${func} @ ${url}:${line}`);
36
+ }
27
37
  });
28
38
 
29
- // 监听恢复事件
30
39
  cdpSession.on('Debugger.resumed', () => {
31
40
  isPaused = false;
32
41
  currentCallFrames = [];
33
- console.log('[debug] Debugger resumed');
42
+ lastPauseIsBreakpoint = false;
34
43
  });
35
44
  }
36
45
  return cdpSession;
@@ -235,6 +244,33 @@ export const stepOver = tool(
235
244
  }
236
245
  );
237
246
 
247
+ /**
248
+ * 查询 Agent 执行日志
249
+ */
250
+ export const getAgentLogs = tool(
251
+ async ({ category, level, limit, toolName }) => {
252
+ if (category === 'stats') {
253
+ return JSON.stringify(logStore.getStats(), null, 2);
254
+ }
255
+ const logs = logStore.query({ category, level, limit, toolName });
256
+ return JSON.stringify(logs, null, 2);
257
+ },
258
+ {
259
+ name: 'get_agent_logs',
260
+ description: '获取当前 Agent 会话的执行日志,包括 LLM 调用、工具调用、错误等。用于调试和分析 Agent 执行过程。category=stats 可获取统计概览。',
261
+ schema: z.object({
262
+ category: z.enum(['LLM', 'TOOL', 'CHAIN', 'AGENT', 'stats']).optional()
263
+ .describe('日志类别:LLM/TOOL/CHAIN/AGENT,或 stats 获取统计'),
264
+ level: z.enum(['INFO', 'DEBUG', 'ERROR']).optional()
265
+ .describe('日志级别'),
266
+ limit: z.number().optional().default(50)
267
+ .describe('返回条数(默认50,最近的N条)'),
268
+ toolName: z.string().optional()
269
+ .describe('按工具名过滤(仅 TOOL 类别有效)'),
270
+ }),
271
+ }
272
+ );
273
+
238
274
  export const debugTools = [
239
275
  setBreakpoint,
240
276
  setXHRBreakpoint,
@@ -243,4 +279,5 @@ export const debugTools = [
243
279
  evaluateAtBreakpoint,
244
280
  resumeExecution,
245
281
  stepOver,
282
+ getAgentLogs,
246
283
  ];
@@ -23,6 +23,9 @@ function getSkillPath(skillName) {
23
23
  'sandbox': SKILLS.sandbox,
24
24
  'env': SKILLS.env,
25
25
  'js2python': SKILLS.js2python,
26
+ 'crawler': SKILLS.crawler,
27
+ 'captcha': SKILLS.captcha,
28
+ 'anti-detect': SKILLS.antiDetect,
26
29
  'report': SKILLS.report,
27
30
  'general': SKILLS.general,
28
31
  };
@@ -83,7 +86,7 @@ export const evolveSkill = tool(
83
86
  if (!skillInfo) {
84
87
  return JSON.stringify({
85
88
  success: false,
86
- error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general。或使用 new:<name> 创建新 skill。`
89
+ error: `未知的 skill: ${skill}。可用: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general。或使用 new:<name> 创建新 skill。`
87
90
  });
88
91
  }
89
92
 
@@ -152,7 +155,7 @@ export const evolveSkill = tool(
152
155
  name: 'evolve_skill',
153
156
  description: '记录分析过程中学到的经验。支持现有 skill 或 new:<name> 创建新 skill',
154
157
  schema: z.object({
155
- skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, report, general,或 new:<name> 创建新 skill'),
158
+ skill: z.string().describe('目标 skill: static-analysis, dynamic-analysis, sandbox, env, js2python, crawler, captcha, anti-detect, report, general,或 new:<name> 创建新 skill'),
156
159
  title: z.string().describe('经验标题,简短描述'),
157
160
  scenario: z.string().describe('具体场景/案例'),
158
161
  insight: z.string().describe('一句话总结经验'),
@@ -34,16 +34,20 @@ export const listFunctions = tool(
34
34
  */
35
35
  export const getFunctionCode = tool(
36
36
  async ({ code, funcName }) => {
37
+ // buildDependencyGraph 先调用,extractSlice 内部会复用 this.ast 缓存
38
+ const graph = astAnalyzer.buildDependencyGraph(code);
39
+ const deps = graph.get(funcName) || [];
37
40
  const slice = astAnalyzer.extractSlice(code, funcName);
38
41
  return JSON.stringify({
39
42
  funcName,
40
43
  found: !!slice,
41
44
  code: slice || '未找到该函数',
45
+ dependencies: deps,
42
46
  }, null, 2);
43
47
  },
44
48
  {
45
49
  name: 'get_function_code',
46
- description: '获取指定函数的代码片段',
50
+ description: '提取指定函数的完整代码(含递归依赖函数和全局变量)。返回可独立运行的代码片段 + 依赖函数列表',
47
51
  schema: z.object({
48
52
  code: z.string().describe('源代码'),
49
53
  funcName: z.string().describe('函数名'),
@@ -6,7 +6,7 @@
6
6
  import { z } from 'zod';
7
7
  import { tool } from '@langchain/core/tools';
8
8
  import { writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
9
- import { dirname, join, isAbsolute, relative } from 'path';
9
+ import { dirname, join, isAbsolute, relative, resolve } from 'path';
10
10
  import { PATHS, ensureDir, DEEPSPIDER_HOME } from '../../config/paths.js';
11
11
 
12
12
  const OUTPUT_DIR = PATHS.OUTPUT_DIR;
@@ -17,15 +17,24 @@ function ensureFileDir(filePath) {
17
17
  }
18
18
 
19
19
  function getSafePath(filePath) {
20
+ let resolved;
20
21
  if (isAbsolute(filePath)) {
21
22
  // 如果是 ~/.deepspider/ 目录下的路径,直接使用
22
23
  if (filePath.startsWith(DEEPSPIDER_HOME)) {
23
- return filePath;
24
+ resolved = filePath;
25
+ } else {
26
+ // 其他绝对路径:放到 OUTPUT_DIR 下
27
+ resolved = join(OUTPUT_DIR, filePath.replace(/^\/+/, ''));
24
28
  }
25
- // 其他绝对路径:放到 OUTPUT_DIR 下
26
- return join(OUTPUT_DIR, filePath.replace(/^\/+/, ''));
29
+ } else {
30
+ resolved = join(OUTPUT_DIR, filePath);
27
31
  }
28
- return join(OUTPUT_DIR, filePath);
32
+ // 防止 ../ 穿越到 DEEPSPIDER_HOME 之外
33
+ const normalized = resolve(resolved);
34
+ if (!normalized.startsWith(DEEPSPIDER_HOME)) {
35
+ throw new Error(`路径不允许超出 ${DEEPSPIDER_HOME}: ${filePath}`);
36
+ }
37
+ return normalized;
29
38
  }
30
39
 
31
40
  export const artifactSave = tool(
@@ -0,0 +1,66 @@
1
+ /**
2
+ * DeepSpider - 统一 Hook 代码生成工具
3
+ * 合并 hookTools + cryptoHookTools + asyncTools + antiDebugTools
4
+ */
5
+
6
+ import { z } from 'zod';
7
+ import { tool } from '@langchain/core/tools';
8
+ import { NetworkHook } from '../../env/NetworkHook.js';
9
+ import { CookieHook } from '../../env/CookieHook.js';
10
+ import { CryptoHook } from '../../env/CryptoHook.js';
11
+ import { AsyncHook } from '../../env/AsyncHook.js';
12
+ import { AntiAntiDebug } from '../../env/AntiAntiDebug.js';
13
+
14
+ const networkHook = new NetworkHook();
15
+ const cookieHook = new CookieHook();
16
+ const cryptoHook = new CryptoHook();
17
+ const asyncHook = new AsyncHook();
18
+ const antiDebug = new AntiAntiDebug();
19
+
20
+ const HOOK_TYPES = {
21
+ // 网络
22
+ xhr: { gen: () => networkHook.generateXHRHookCode({ captureBody: true, captureResponse: true }), usage: "getLogs('xhr')" },
23
+ fetch: { gen: () => networkHook.generateFetchHookCode({ captureBody: true, captureResponse: true }), usage: "getLogs('fetch')" },
24
+ cookie: { gen: () => cookieHook.generateCookieHookCode({ trackRead: true, trackWrite: true }), usage: "getLogs('cookie')" },
25
+ // 加密
26
+ cryptojs: { gen: () => cryptoHook.generateCryptoJSHookCode(), usage: "getLogs('crypto')" },
27
+ sm_crypto: { gen: () => cryptoHook.generateSMCryptoHookCode(), usage: "getLogs('crypto')" },
28
+ rsa: { gen: () => cryptoHook.generateRSAHookCode(), usage: "getLogs('crypto')" },
29
+ generic_crypto: { gen: () => cryptoHook.generateGenericCryptoHookCode(), usage: "getLogs('crypto')" },
30
+ // 异步
31
+ promise: { gen: () => asyncHook.generatePromiseHookCode(), usage: "getLogs('async')" },
32
+ timer: { gen: () => asyncHook.generateTimerHookCode(), usage: "getLogs('timer')" },
33
+ // 反反调试
34
+ anti_debugger: { gen: () => antiDebug.generateAntiDebuggerCode(), usage: '绕过无限 debugger' },
35
+ anti_console: { gen: () => antiDebug.generateAntiConsoleDetectCode(), usage: '绕过控制台检测' },
36
+ anti_cdp: { gen: () => antiDebug.generateAntiCDPDetectCode(), usage: '绕过 CDP 检测' },
37
+ anti_debug_full: { gen: () => antiDebug.generateFullAntiDebugCode(), usage: '完整反反调试(包含以上所有)' },
38
+ };
39
+
40
+ const typeEnum = /** @type {[string, ...string[]]} */ (Object.keys(HOOK_TYPES));
41
+
42
+ export const generateHook = tool(
43
+ async ({ type }) => {
44
+ const entry = HOOK_TYPES[type];
45
+ if (!entry) {
46
+ return JSON.stringify({ success: false, error: `未知类型: ${type},可选: ${typeEnum.join(', ')}` });
47
+ }
48
+ const code = entry.gen();
49
+ return JSON.stringify({ success: true, type, code, usage: entry.usage }, null, 2);
50
+ },
51
+ {
52
+ name: 'generate_hook',
53
+ description: `生成 Hook 代码。生成后需通过 inject_hook 注入浏览器。
54
+
55
+ 类型:
56
+ - 网络: xhr, fetch, cookie
57
+ - 加密: cryptojs(CryptoJS), sm_crypto(国密), rsa(JSEncrypt/node-forge), generic_crypto(通用)
58
+ - 异步: promise, timer
59
+ - 反反调试: anti_debugger, anti_console, anti_cdp, anti_debug_full(完整)`,
60
+ schema: z.object({
61
+ type: z.enum(typeEnum).describe('Hook 类型'),
62
+ }),
63
+ }
64
+ );
65
+
66
+ export const generateHookTools = [generateHook];
@@ -8,16 +8,28 @@ import { tool } from '@langchain/core/tools';
8
8
  import { getBrowser } from '../../browser/index.js';
9
9
 
10
10
  /**
11
- * 通过 CDP 执行 JS
11
+ * 通过 CDP 执行 JS(带超时保护)
12
12
  */
13
- async function evaluateViaCDP(browser, expression) {
13
+ async function evaluateViaCDP(browser, expression, timeout = 5000) {
14
14
  const cdp = await browser.getCDPSession();
15
15
  if (!cdp) return null;
16
- const result = await cdp.send('Runtime.evaluate', {
16
+
17
+ const evaluatePromise = cdp.send('Runtime.evaluate', {
17
18
  expression,
18
19
  returnByValue: true,
19
20
  });
20
- return result.result?.value;
21
+
22
+ const timeoutPromise = new Promise((_, reject) =>
23
+ setTimeout(() => reject(new Error('CDP evaluate timeout')), timeout)
24
+ );
25
+
26
+ try {
27
+ const result = await Promise.race([evaluatePromise, timeoutPromise]);
28
+ return result.result?.value;
29
+ } catch (e) {
30
+ console.error('[evaluateViaCDP] 超时或错误:', e.message);
31
+ return null;
32
+ }
21
33
  }
22
34
 
23
35
  /**
@@ -115,14 +127,12 @@ export const injectHook = tool(
115
127
  if (!browser.getPage()) {
116
128
  return JSON.stringify({ success: false, error: '浏览器未就绪' });
117
129
  }
118
- const escapedCode = code
119
- .replace(/\\/g, '\\\\')
120
- .replace(/'/g, "\\'")
121
- .replace(/\n/g, '\\n');
130
+ // JSON.stringify 安全转义,避免手动转义遗漏特殊字符
131
+ const safeCode = JSON.stringify(code);
122
132
 
123
133
  const result = await evaluateViaCDP(
124
134
  browser,
125
- `JSON.stringify(window.__deepspider__?.injectHook?.('${escapedCode}'))`
135
+ `JSON.stringify(window.__deepspider__?.injectHook?.(${safeCode}))`
126
136
  );
127
137
  return result || JSON.stringify({ success: false, error: '注入失败' });
128
138
  } catch (e) {