deepspider 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/check.md +122 -0
- package/.claude/agents/debug.md +106 -0
- package/.claude/agents/dispatch.md +214 -0
- package/.claude/agents/implement.md +96 -0
- package/.claude/agents/plan.md +396 -0
- package/.claude/agents/research.md +120 -0
- package/.claude/commands/evolve/merge.md +80 -0
- package/.claude/commands/trellis/before-backend-dev.md +13 -0
- package/.claude/commands/trellis/before-frontend-dev.md +13 -0
- package/.claude/commands/trellis/break-loop.md +107 -0
- package/.claude/commands/trellis/check-backend.md +13 -0
- package/.claude/commands/trellis/check-cross-layer.md +153 -0
- package/.claude/commands/trellis/check-frontend.md +13 -0
- package/.claude/commands/trellis/create-command.md +154 -0
- package/.claude/commands/trellis/finish-work.md +129 -0
- package/.claude/commands/trellis/integrate-skill.md +219 -0
- package/.claude/commands/trellis/onboard.md +358 -0
- package/.claude/commands/trellis/parallel.md +193 -0
- package/.claude/commands/trellis/record-session.md +62 -0
- package/.claude/commands/trellis/start.md +280 -0
- package/.claude/commands/trellis/update-spec.md +213 -0
- package/.claude/hooks/inject-subagent-context.py +758 -0
- package/.claude/hooks/ralph-loop.py +374 -0
- package/.claude/hooks/session-start.py +126 -0
- package/.claude/settings.json +41 -0
- package/.claude/skills/deepagents-guide/SKILL.md +428 -0
- package/.cursor/commands/trellis-before-backend-dev.md +13 -0
- package/.cursor/commands/trellis-before-frontend-dev.md +13 -0
- package/.cursor/commands/trellis-break-loop.md +107 -0
- package/.cursor/commands/trellis-check-backend.md +13 -0
- package/.cursor/commands/trellis-check-cross-layer.md +153 -0
- package/.cursor/commands/trellis-check-frontend.md +13 -0
- package/.cursor/commands/trellis-create-command.md +154 -0
- package/.cursor/commands/trellis-finish-work.md +129 -0
- package/.cursor/commands/trellis-integrate-skill.md +219 -0
- package/.cursor/commands/trellis-onboard.md +358 -0
- package/.cursor/commands/trellis-record-session.md +62 -0
- package/.cursor/commands/trellis-start.md +156 -0
- package/.cursor/commands/trellis-update-spec.md +213 -0
- package/.env.example +11 -0
- package/.husky/pre-commit +1 -0
- package/.mcp.json +8 -0
- package/.trellis/.template-hashes.json +65 -0
- package/.trellis/.version +1 -0
- package/.trellis/scripts/add-session.sh +384 -0
- package/.trellis/scripts/common/developer.sh +129 -0
- package/.trellis/scripts/common/git-context.sh +263 -0
- package/.trellis/scripts/common/paths.sh +208 -0
- package/.trellis/scripts/common/phase.sh +150 -0
- package/.trellis/scripts/common/registry.sh +247 -0
- package/.trellis/scripts/common/task-queue.sh +142 -0
- package/.trellis/scripts/common/task-utils.sh +151 -0
- package/.trellis/scripts/common/worktree.sh +128 -0
- package/.trellis/scripts/create-bootstrap.sh +299 -0
- package/.trellis/scripts/get-context.sh +7 -0
- package/.trellis/scripts/get-developer.sh +15 -0
- package/.trellis/scripts/init-developer.sh +34 -0
- package/.trellis/scripts/multi-agent/cleanup.sh +396 -0
- package/.trellis/scripts/multi-agent/create-pr.sh +241 -0
- package/.trellis/scripts/multi-agent/plan.sh +207 -0
- package/.trellis/scripts/multi-agent/start.sh +310 -0
- package/.trellis/scripts/multi-agent/status.sh +828 -0
- package/.trellis/scripts/task.sh +1118 -0
- package/.trellis/spec/backend/deepagents-guide.md +337 -0
- package/.trellis/spec/backend/directory-structure.md +126 -0
- package/.trellis/spec/backend/examples/skills/deepagents-guide/README.md +11 -0
- package/.trellis/spec/backend/examples/skills/deepagents-guide/agent.js.template +20 -0
- package/.trellis/spec/backend/examples/skills/deepagents-guide/skills-config.js.template +13 -0
- package/.trellis/spec/backend/examples/skills/deepagents-guide/subagent.js.template +19 -0
- package/.trellis/spec/backend/hook-guidelines.md +178 -0
- package/.trellis/spec/backend/index.md +36 -0
- package/.trellis/spec/backend/quality-guidelines.md +201 -0
- package/.trellis/spec/backend/state-management.md +76 -0
- package/.trellis/spec/backend/tool-guidelines.md +144 -0
- package/.trellis/spec/backend/type-safety.md +71 -0
- package/.trellis/spec/guides/code-reuse-thinking-guide.md +92 -0
- package/.trellis/spec/guides/cross-layer-thinking-guide.md +94 -0
- package/.trellis/spec/guides/index.md +79 -0
- package/.trellis/tasks/archive/02-02-evolving-skills/prd.md +61 -0
- package/.trellis/tasks/archive/02-02-evolving-skills/task.json +29 -0
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/prd.md +86 -0
- package/.trellis/tasks/archive/2026-02/00-bootstrap-guidelines/task.json +27 -0
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/check.jsonl +3 -0
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/debug.jsonl +2 -0
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/implement.jsonl +5 -0
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/prd.md +33 -0
- package/.trellis/tasks/archive/2026-02/02-02-skills-system/task.json +41 -0
- package/.trellis/workflow.md +407 -0
- package/.trellis/workspace/index.md +123 -0
- package/.trellis/workspace/pony/index.md +40 -0
- package/.trellis/workspace/pony/journal-1.md +7 -0
- package/.trellis/worktree.yaml +47 -0
- package/AGENTS.md +18 -0
- package/CLAUDE.md +292 -0
- package/README.md +134 -0
- package/agents/deepspider.md +142 -0
- package/docs/DEBUG.md +42 -0
- package/docs/GUIDE.md +334 -0
- package/docs/PROMPT.md +60 -0
- package/docs/USAGE.md +226 -0
- package/eslint.config.js +51 -0
- package/package.json +78 -0
- package/requirements-crypto.txt +14 -0
- package/src/agent/index.js +97 -0
- package/src/agent/logger.js +164 -0
- package/src/agent/middleware/filterTools.js +64 -0
- package/src/agent/middleware/report.js +79 -0
- package/src/agent/prompts/system.js +315 -0
- package/src/agent/run.js +575 -0
- package/src/agent/skills/anti-detect/SKILL.md +28 -0
- package/src/agent/skills/anti-detect/evolved.md +12 -0
- package/src/agent/skills/captcha/SKILL.md +37 -0
- package/src/agent/skills/captcha/evolved.md +12 -0
- package/src/agent/skills/config.js +30 -0
- package/src/agent/skills/crawler/SKILL.md +9 -0
- package/src/agent/skills/crawler/evolved.md +16 -0
- package/src/agent/skills/dynamic-analysis/SKILL.md +91 -0
- package/src/agent/skills/dynamic-analysis/evolved.md +12 -0
- package/src/agent/skills/env/SKILL.md +72 -0
- package/src/agent/skills/env/evolved.md +12 -0
- package/src/agent/skills/evolve.js +79 -0
- package/src/agent/skills/general/SKILL.md +12 -0
- package/src/agent/skills/general/evolved.md +12 -0
- package/src/agent/skills/js2python/SKILL.md +30 -0
- package/src/agent/skills/js2python/evolved.md +13 -0
- package/src/agent/skills/report/SKILL.md +21 -0
- package/src/agent/skills/report/evolved.md +12 -0
- package/src/agent/skills/sandbox/SKILL.md +22 -0
- package/src/agent/skills/sandbox/evolved.md +16 -0
- package/src/agent/skills/static-analysis/SKILL.md +93 -0
- package/src/agent/skills/static-analysis/evolved.md +12 -0
- package/src/agent/skills/xpath/SKILL.md +119 -0
- package/src/agent/subagents/anti-detect.js +45 -0
- package/src/agent/subagents/captcha.js +51 -0
- package/src/agent/subagents/crawler.js +138 -0
- package/src/agent/subagents/dynamic.js +64 -0
- package/src/agent/subagents/env-agent.js +82 -0
- package/src/agent/subagents/index.js +37 -0
- package/src/agent/subagents/js2python.js +72 -0
- package/src/agent/subagents/sandbox.js +55 -0
- package/src/agent/subagents/static.js +66 -0
- package/src/agent/tools/analysis.js +135 -0
- package/src/agent/tools/analyzer.js +85 -0
- package/src/agent/tools/anti-detect.js +89 -0
- package/src/agent/tools/antidebug.js +64 -0
- package/src/agent/tools/async.js +43 -0
- package/src/agent/tools/browser.js +324 -0
- package/src/agent/tools/captcha.js +223 -0
- package/src/agent/tools/capture.js +179 -0
- package/src/agent/tools/correlate.js +303 -0
- package/src/agent/tools/crawler.js +116 -0
- package/src/agent/tools/cryptohook.js +80 -0
- package/src/agent/tools/debug.js +246 -0
- package/src/agent/tools/deobfuscator.js +90 -0
- package/src/agent/tools/env.js +83 -0
- package/src/agent/tools/envdump.js +92 -0
- package/src/agent/tools/evolve.js +164 -0
- package/src/agent/tools/extract.js +114 -0
- package/src/agent/tools/extractor.js +54 -0
- package/src/agent/tools/file.js +224 -0
- package/src/agent/tools/hook.js +84 -0
- package/src/agent/tools/hookManager.js +178 -0
- package/src/agent/tools/index.js +137 -0
- package/src/agent/tools/nodejs.js +101 -0
- package/src/agent/tools/patch.js +46 -0
- package/src/agent/tools/preprocess.js +71 -0
- package/src/agent/tools/profile.js +122 -0
- package/src/agent/tools/python.js +627 -0
- package/src/agent/tools/report.js +124 -0
- package/src/agent/tools/runtime.js +132 -0
- package/src/agent/tools/sandbox.js +79 -0
- package/src/agent/tools/store.js +73 -0
- package/src/agent/tools/trace.js +74 -0
- package/src/agent/tools/tracing.js +201 -0
- package/src/agent/tools/utils.js +51 -0
- package/src/agent/tools/verify.js +184 -0
- package/src/agent/tools/webcrack.js +109 -0
- package/src/analyzer/ASTAnalyzer.js +387 -0
- package/src/analyzer/CallStackAnalyzer.js +379 -0
- package/src/analyzer/Deobfuscator.js +289 -0
- package/src/analyzer/EncryptionAnalyzer.js +99 -0
- package/src/analyzer/index.js +22 -0
- package/src/browser/EnvBridge.js +186 -0
- package/src/browser/cdp.js +168 -0
- package/src/browser/client.js +197 -0
- package/src/browser/collector.js +444 -0
- package/src/browser/collectors/RequestCryptoLinker.js +109 -0
- package/src/browser/collectors/ResponseSearcher.js +107 -0
- package/src/browser/collectors/ScriptCollector.js +158 -0
- package/src/browser/collectors/index.js +26 -0
- package/src/browser/defaultHooks.js +932 -0
- package/src/browser/hooks/crypto.js +55 -0
- package/src/browser/hooks/index.js +64 -0
- package/src/browser/hooks/native.js +9 -0
- package/src/browser/hooks/network.js +33 -0
- package/src/browser/index.js +42 -0
- package/src/browser/interceptors/NetworkInterceptor.js +116 -0
- package/src/browser/interceptors/ScriptInterceptor.js +76 -0
- package/src/browser/interceptors/index.js +6 -0
- package/src/browser/ui/analysisPanel.js +1782 -0
- package/src/browser/ui/confirmDialog.js +158 -0
- package/src/browser/ui/panel.html +152 -0
- package/src/browser/ui/selector.js +170 -0
- package/src/config/index.js +5 -0
- package/src/config/paths.js +71 -0
- package/src/config/patterns/crypto.js +36 -0
- package/src/config/profiles/chrome.json +71 -0
- package/src/config/profiles/firefox.json +44 -0
- package/src/config/profiles/safari.json +38 -0
- package/src/core/EnvMonitor.js +200 -0
- package/src/core/PatchGenerator.js +278 -0
- package/src/core/Sandbox.js +181 -0
- package/src/env/AntiAntiDebug.js +111 -0
- package/src/env/AsyncHook.js +68 -0
- package/src/env/BrowserAPIList.js +265 -0
- package/src/env/CookieHook.js +48 -0
- package/src/env/CryptoHook.js +205 -0
- package/src/env/EnvCodeGenerator.js +157 -0
- package/src/env/EnvDumper.js +356 -0
- package/src/env/EnvExtractor.js +220 -0
- package/src/env/HookBase.js +618 -0
- package/src/env/NetworkHook.js +159 -0
- package/src/env/modules/bom/history.js +29 -0
- package/src/env/modules/bom/location.js +26 -0
- package/src/env/modules/bom/navigator.js +70 -0
- package/src/env/modules/bom/screen.js +26 -0
- package/src/env/modules/bom/storage.js +23 -0
- package/src/env/modules/dom/document.js +110 -0
- package/src/env/modules/dom/event.js +51 -0
- package/src/env/modules/index.js +34 -0
- package/src/env/modules/webapi/fetch.js +46 -0
- package/src/env/modules/webapi/url.js +47 -0
- package/src/env/modules/webapi/xhr.js +48 -0
- package/src/index.js +27 -0
- package/src/mcp/server.js +89 -0
- package/src/store/DataStore.js +708 -0
- package/src/store/Store.js +158 -0
- package/src/store/Validator.js +24 -0
- package/test/analyze.test.js +90 -0
- package/test/envdump.test.js +74 -0
- package/test/flow.test.js +90 -0
- package/test/hooks.test.js +138 -0
- package/test/plugin.test.js +35 -0
- package/test/refactor-full.test.js +30 -0
- package/test/refactor.test.js +21 -0
- package/test/samples/obfuscated.js +61 -0
- package/test/samples/original.js +66 -0
- package/test/samples/v10_eval_chain.js +52 -0
- package/test/samples/v11_bytecode_vm.js +81 -0
- package/test/samples/v12_polymorphic.js +69 -0
- package/test/samples/v1_ob_basic.js +98 -0
- package/test/samples/v2_ob_advanced.js +99 -0
- package/test/samples/v3_jjencode.js +77 -0
- package/test/samples/v4_aaencode.js +73 -0
- package/test/samples/v5_control_flow.js +86 -0
- package/test/samples/v6_string_encryption.js +71 -0
- package/test/samples/v7_jsvmp.js +83 -0
- package/test/samples/v8_anti_debug.js +79 -0
- package/test/samples/v9_proxy_trap.js +49 -0
- package/test/samples.test.js +96 -0
- package/test/webcrack.test.js +55 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 加密库 Hook
|
|
3
|
+
* 已废弃,请使用 src/env/CryptoHook.js
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export const cryptoHook = `
|
|
7
|
+
(function() {
|
|
8
|
+
const deepspider = window.__deepspider__;
|
|
9
|
+
if (!deepspider) return;
|
|
10
|
+
|
|
11
|
+
// Hook Function.prototype.apply (CryptoJS)
|
|
12
|
+
const _apply = Function.prototype.apply;
|
|
13
|
+
const applyHook = function() {
|
|
14
|
+
const result = _apply.call(this, ...arguments);
|
|
15
|
+
try {
|
|
16
|
+
if (arguments.length === 2 && arguments[1]?.[0]) {
|
|
17
|
+
const cfg = arguments[1][0];
|
|
18
|
+
if (cfg.ciphertext && cfg.key && cfg.algorithm) {
|
|
19
|
+
deepspider.log('crypto', {
|
|
20
|
+
algo: 'CryptoJS',
|
|
21
|
+
key: cfg.key?.toString?.() || '',
|
|
22
|
+
iv: cfg.iv?.toString?.() || '',
|
|
23
|
+
mode: cfg.mode?.name || 'unknown'
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
} catch (e) {}
|
|
28
|
+
return result;
|
|
29
|
+
};
|
|
30
|
+
Function.prototype.apply = deepspider.native(applyHook, _apply);
|
|
31
|
+
|
|
32
|
+
// Hook RSA
|
|
33
|
+
const _call = Function.prototype.call;
|
|
34
|
+
const callHook = function() {
|
|
35
|
+
const result = _call.call(this, ...arguments);
|
|
36
|
+
try {
|
|
37
|
+
const arg = arguments[0];
|
|
38
|
+
if (arg?.__proto__?.getPublicKey && arg?.__proto__?.encrypt) {
|
|
39
|
+
const proto = arg.__proto__.__proto__;
|
|
40
|
+
if (proto?.encrypt && !proto.__hooked__) {
|
|
41
|
+
proto.__hooked__ = true;
|
|
42
|
+
const _enc = proto.encrypt;
|
|
43
|
+
proto.encrypt = deepspider.native(function(data) {
|
|
44
|
+
const enc = _enc.call(this, data);
|
|
45
|
+
deepspider.log('crypto', { algo: 'RSA', data, encrypted: enc });
|
|
46
|
+
return enc;
|
|
47
|
+
}, _enc);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
} catch (e) {}
|
|
51
|
+
return result;
|
|
52
|
+
};
|
|
53
|
+
Function.prototype.call = deepspider.native(callHook, _call);
|
|
54
|
+
})();
|
|
55
|
+
`;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - Hook 管理器
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { cryptoHook } from './crypto.js';
|
|
6
|
+
import { networkHook } from './network.js';
|
|
7
|
+
import { nativeProtect } from './native.js';
|
|
8
|
+
|
|
9
|
+
export class HookManager {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.logs = [];
|
|
12
|
+
this.onLog = null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 获取完整的 Hook 脚本
|
|
17
|
+
*/
|
|
18
|
+
getCombinedScript() {
|
|
19
|
+
return [
|
|
20
|
+
nativeProtect,
|
|
21
|
+
cryptoHook,
|
|
22
|
+
networkHook,
|
|
23
|
+
].join('\n\n');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* 注入 Hook 到页面
|
|
28
|
+
*/
|
|
29
|
+
async inject(page) {
|
|
30
|
+
const script = this.getCombinedScript();
|
|
31
|
+
|
|
32
|
+
// 在新文档加载前注入
|
|
33
|
+
await page.addInitScript(script);
|
|
34
|
+
|
|
35
|
+
// 监听 console 输出
|
|
36
|
+
page.on('console', (msg) => {
|
|
37
|
+
const text = msg.text();
|
|
38
|
+
if (text.includes('[DeepSpider:')) {
|
|
39
|
+
this.logs.push({
|
|
40
|
+
type: msg.type(),
|
|
41
|
+
text,
|
|
42
|
+
timestamp: Date.now(),
|
|
43
|
+
});
|
|
44
|
+
if (this.onLog) {
|
|
45
|
+
this.onLog({ type: msg.type(), text });
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* 获取捕获的日志
|
|
53
|
+
*/
|
|
54
|
+
getLogs() {
|
|
55
|
+
return this.logs;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* 清空日志
|
|
60
|
+
*/
|
|
61
|
+
clearLogs() {
|
|
62
|
+
this.logs = [];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 网络请求 Hook
|
|
3
|
+
* 已废弃,请使用 src/env/NetworkHook.js
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export const networkHook = `
|
|
7
|
+
(function() {
|
|
8
|
+
const deepspider = window.__deepspider__;
|
|
9
|
+
if (!deepspider) return;
|
|
10
|
+
|
|
11
|
+
// Hook fetch
|
|
12
|
+
const _fetch = window.fetch;
|
|
13
|
+
window.fetch = deepspider.native(async function(url, options = {}) {
|
|
14
|
+
deepspider.log('fetch', { url, body: options.body });
|
|
15
|
+
return _fetch.call(this, url, options);
|
|
16
|
+
}, _fetch);
|
|
17
|
+
|
|
18
|
+
// Hook XHR
|
|
19
|
+
const _open = XMLHttpRequest.prototype.open;
|
|
20
|
+
const _send = XMLHttpRequest.prototype.send;
|
|
21
|
+
|
|
22
|
+
XMLHttpRequest.prototype.open = deepspider.native(function(method, url) {
|
|
23
|
+
this._url = url;
|
|
24
|
+
this._method = method;
|
|
25
|
+
return _open.apply(this, arguments);
|
|
26
|
+
}, _open);
|
|
27
|
+
|
|
28
|
+
XMLHttpRequest.prototype.send = deepspider.native(function(body) {
|
|
29
|
+
deepspider.log('xhr', { method: this._method, url: this._url, body });
|
|
30
|
+
return _send.apply(this, arguments);
|
|
31
|
+
}, _send);
|
|
32
|
+
})();
|
|
33
|
+
`;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - 浏览器基础设施
|
|
3
|
+
* 提供真实浏览器环境,作为动态分析的基础
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export { BrowserClient } from './client.js';
|
|
7
|
+
export { CDPSession } from './cdp.js';
|
|
8
|
+
export { HookManager } from './hooks/index.js';
|
|
9
|
+
export { EnvCollector } from './collector.js';
|
|
10
|
+
export { EnvBridge } from './EnvBridge.js';
|
|
11
|
+
|
|
12
|
+
// 单例实例
|
|
13
|
+
let browserInstance = null;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 获取浏览器实例(单例)
|
|
17
|
+
*/
|
|
18
|
+
export async function getBrowser(options = {}) {
|
|
19
|
+
if (!browserInstance) {
|
|
20
|
+
const { BrowserClient } = await import('./client.js');
|
|
21
|
+
browserInstance = new BrowserClient();
|
|
22
|
+
await browserInstance.launch(options);
|
|
23
|
+
}
|
|
24
|
+
return browserInstance;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* 关闭浏览器
|
|
29
|
+
*/
|
|
30
|
+
export async function closeBrowser() {
|
|
31
|
+
if (browserInstance) {
|
|
32
|
+
await browserInstance.close();
|
|
33
|
+
browserInstance = null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* 获取当前浏览器客户端(不创建新实例)
|
|
39
|
+
*/
|
|
40
|
+
export function getBrowserClient() {
|
|
41
|
+
return browserInstance;
|
|
42
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - CDP 网络拦截器
|
|
3
|
+
* 通过 CDP 捕获网络请求/响应,按站点存储到文件系统
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getDataStore } from '../../store/DataStore.js';
|
|
7
|
+
|
|
8
|
+
export class NetworkInterceptor {
|
|
9
|
+
constructor(cdpClient, page) {
|
|
10
|
+
this.client = cdpClient;
|
|
11
|
+
this.page = page; // Playwright page 对象
|
|
12
|
+
this.store = getDataStore();
|
|
13
|
+
this.pendingRequests = new Map();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* 获取当前页面 URL
|
|
18
|
+
*/
|
|
19
|
+
getPageUrl() {
|
|
20
|
+
try {
|
|
21
|
+
return this.page?.url() || '';
|
|
22
|
+
} catch {
|
|
23
|
+
return '';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* 启动拦截
|
|
29
|
+
*/
|
|
30
|
+
async start() {
|
|
31
|
+
// 启用网络域
|
|
32
|
+
await this.client.send('Network.enable');
|
|
33
|
+
|
|
34
|
+
// 监听请求
|
|
35
|
+
this.client.on('Network.requestWillBeSent', (params) => {
|
|
36
|
+
this.onRequest(params);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// 监听响应
|
|
40
|
+
this.client.on('Network.responseReceived', (params) => {
|
|
41
|
+
this.onResponse(params);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// 监听加载完成
|
|
45
|
+
this.client.on('Network.loadingFinished', (params) => {
|
|
46
|
+
this.onLoadingFinished(params);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
console.log('[NetworkInterceptor] 已启动');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
onRequest(params) {
|
|
53
|
+
const { requestId, request, timestamp } = params;
|
|
54
|
+
|
|
55
|
+
// 只记录 XHR/Fetch 请求
|
|
56
|
+
const type = params.type;
|
|
57
|
+
if (type !== 'XHR' && type !== 'Fetch') return;
|
|
58
|
+
|
|
59
|
+
this.pendingRequests.set(requestId, {
|
|
60
|
+
url: request.url,
|
|
61
|
+
method: request.method,
|
|
62
|
+
headers: request.headers,
|
|
63
|
+
postData: request.postData,
|
|
64
|
+
timestamp: timestamp * 1000,
|
|
65
|
+
pageUrl: this.getPageUrl() // 记录请求时的页面 URL
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
onResponse(params) {
|
|
70
|
+
const { requestId, response } = params;
|
|
71
|
+
const pending = this.pendingRequests.get(requestId);
|
|
72
|
+
if (!pending) return;
|
|
73
|
+
|
|
74
|
+
pending.status = response.status;
|
|
75
|
+
pending.responseHeaders = response.headers;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async onLoadingFinished(params) {
|
|
79
|
+
const { requestId } = params;
|
|
80
|
+
const pending = this.pendingRequests.get(requestId);
|
|
81
|
+
if (!pending) return;
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
// 获取响应体
|
|
85
|
+
const { body, base64Encoded } = await this.client.send(
|
|
86
|
+
'Network.getResponseBody',
|
|
87
|
+
{ requestId }
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
const responseBody = base64Encoded
|
|
91
|
+
? Buffer.from(body, 'base64').toString('utf-8')
|
|
92
|
+
: body;
|
|
93
|
+
|
|
94
|
+
// 异步存储到文件
|
|
95
|
+
this.store.saveResponse({
|
|
96
|
+
url: pending.url,
|
|
97
|
+
method: pending.method,
|
|
98
|
+
status: pending.status,
|
|
99
|
+
requestHeaders: pending.headers,
|
|
100
|
+
requestBody: pending.postData,
|
|
101
|
+
responseBody: responseBody.slice(0, 50000),
|
|
102
|
+
timestamp: pending.timestamp,
|
|
103
|
+
pageUrl: pending.pageUrl // 传递页面 URL 用于分站点存储
|
|
104
|
+
}).catch(e => {
|
|
105
|
+
console.error('[NetworkInterceptor] 保存失败:', e.message);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
} catch (e) {
|
|
109
|
+
// 某些响应无法获取 body
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
this.pendingRequests.delete(requestId);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export default NetworkInterceptor;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeepSpider - CDP 脚本拦截器
|
|
3
|
+
* 通过 CDP 捕获 JS 脚本源码,按站点存储到文件系统
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getDataStore } from '../../store/DataStore.js';
|
|
7
|
+
|
|
8
|
+
export class ScriptInterceptor {
|
|
9
|
+
constructor(cdpClient, page) {
|
|
10
|
+
this.client = cdpClient;
|
|
11
|
+
this.page = page; // Playwright page 对象
|
|
12
|
+
this.store = getDataStore();
|
|
13
|
+
this.scriptIds = new Set();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* 获取当前页面 URL
|
|
18
|
+
*/
|
|
19
|
+
getPageUrl() {
|
|
20
|
+
try {
|
|
21
|
+
return this.page?.url() || '';
|
|
22
|
+
} catch {
|
|
23
|
+
return '';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* 启动拦截
|
|
29
|
+
*/
|
|
30
|
+
async start() {
|
|
31
|
+
await this.client.send('Debugger.enable');
|
|
32
|
+
|
|
33
|
+
this.client.on('Debugger.scriptParsed', (params) => {
|
|
34
|
+
this.onScriptParsed(params);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
console.log('[ScriptInterceptor] 已启动');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async onScriptParsed(params) {
|
|
41
|
+
const { scriptId, url, length } = params;
|
|
42
|
+
|
|
43
|
+
// 跳过扩展和空脚本
|
|
44
|
+
if (!url || url.startsWith('chrome-extension://')) return;
|
|
45
|
+
if (this.scriptIds.has(scriptId)) return;
|
|
46
|
+
|
|
47
|
+
this.scriptIds.add(scriptId);
|
|
48
|
+
|
|
49
|
+
// 异步获取并存储源码
|
|
50
|
+
this.fetchAndSave(scriptId, url).catch(() => {});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async fetchAndSave(scriptId, url) {
|
|
54
|
+
try {
|
|
55
|
+
const { scriptSource } = await this.client.send(
|
|
56
|
+
'Debugger.getScriptSource',
|
|
57
|
+
{ scriptId }
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// 限制大小,超大脚本只保存部分
|
|
61
|
+
const source = scriptSource.slice(0, 500000);
|
|
62
|
+
|
|
63
|
+
await this.store.saveScript({
|
|
64
|
+
url,
|
|
65
|
+
type: 'external',
|
|
66
|
+
source,
|
|
67
|
+
timestamp: Date.now(),
|
|
68
|
+
pageUrl: this.getPageUrl() // 传递页面 URL
|
|
69
|
+
});
|
|
70
|
+
} catch (e) {
|
|
71
|
+
// 获取失败,跳过
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export default ScriptInterceptor;
|