hyper-agent-browser 0.1.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +334 -92
- package/package.json +2 -2
- package/src/browser/manager.ts +61 -3
- package/src/cli.ts +151 -1
- package/src/commands/extract.ts +76 -0
- package/src/commands/network.ts +111 -0
- package/src/commands/wait.ts +226 -0
- package/src/daemon/browser-pool.ts +90 -17
- package/src/daemon/server.ts +66 -0
- package/src/extractors/form-extractor.ts +153 -0
- package/src/extractors/list-extractor.ts +213 -0
- package/src/extractors/meta-extractor.ts +139 -0
- package/src/extractors/table-extractor.ts +215 -0
- package/src/snapshot/dom-extractor.ts +28 -15
- package/src/utils/network-listener.ts +247 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import type { Page } from "patchright";
|
|
2
|
+
|
|
3
|
+
export interface MetaData {
|
|
4
|
+
type: "metadata";
|
|
5
|
+
url: string;
|
|
6
|
+
timestamp: number;
|
|
7
|
+
seo?: {
|
|
8
|
+
title?: string;
|
|
9
|
+
description?: string;
|
|
10
|
+
keywords?: string[];
|
|
11
|
+
canonical?: string;
|
|
12
|
+
robots?: string;
|
|
13
|
+
};
|
|
14
|
+
og?: Record<string, string>;
|
|
15
|
+
twitter?: Record<string, string>;
|
|
16
|
+
schema?: any[];
|
|
17
|
+
other?: Record<string, string>;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class MetaExtractor {
|
|
21
|
+
/**
|
|
22
|
+
* 提取页面元数据
|
|
23
|
+
*/
|
|
24
|
+
async extract(page: Page, options: { include?: string[] } = {}): Promise<MetaData> {
|
|
25
|
+
const include = options.include ?? ["seo", "og", "twitter", "schema", "other"];
|
|
26
|
+
|
|
27
|
+
const url = page.url();
|
|
28
|
+
const timestamp = Date.now();
|
|
29
|
+
|
|
30
|
+
const result = await page.evaluate((includeTypes) => {
|
|
31
|
+
const data: any = {};
|
|
32
|
+
|
|
33
|
+
// SEO 基础元数据
|
|
34
|
+
if (includeTypes.includes("seo")) {
|
|
35
|
+
data.seo = {
|
|
36
|
+
title: document.title,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const description = document.querySelector('meta[name="description"]');
|
|
40
|
+
if (description instanceof HTMLMetaElement) {
|
|
41
|
+
data.seo.description = description.content;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const keywords = document.querySelector('meta[name="keywords"]');
|
|
45
|
+
if (keywords instanceof HTMLMetaElement) {
|
|
46
|
+
data.seo.keywords = keywords.content.split(",").map((k) => k.trim());
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const canonical = document.querySelector('link[rel="canonical"]');
|
|
50
|
+
if (canonical instanceof HTMLLinkElement) {
|
|
51
|
+
data.seo.canonical = canonical.href;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const robots = document.querySelector('meta[name="robots"]');
|
|
55
|
+
if (robots instanceof HTMLMetaElement) {
|
|
56
|
+
data.seo.robots = robots.content;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Open Graph
|
|
61
|
+
if (includeTypes.includes("og")) {
|
|
62
|
+
data.og = {};
|
|
63
|
+
const ogTags = document.querySelectorAll('meta[property^="og:"]');
|
|
64
|
+
for (const tag of Array.from(ogTags)) {
|
|
65
|
+
if (tag instanceof HTMLMetaElement) {
|
|
66
|
+
const property = tag.getAttribute("property");
|
|
67
|
+
if (property) {
|
|
68
|
+
const key = property.substring(3); // 移除 "og:" 前缀
|
|
69
|
+
data.og[key] = tag.content;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Twitter Cards
|
|
76
|
+
if (includeTypes.includes("twitter")) {
|
|
77
|
+
data.twitter = {};
|
|
78
|
+
const twitterTags = document.querySelectorAll('meta[name^="twitter:"]');
|
|
79
|
+
for (const tag of Array.from(twitterTags)) {
|
|
80
|
+
if (tag instanceof HTMLMetaElement) {
|
|
81
|
+
const name = tag.getAttribute("name");
|
|
82
|
+
if (name) {
|
|
83
|
+
const key = name.substring(8); // 移除 "twitter:" 前缀
|
|
84
|
+
data.twitter[key] = tag.content;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Schema.org (JSON-LD)
|
|
91
|
+
if (includeTypes.includes("schema")) {
|
|
92
|
+
data.schema = [];
|
|
93
|
+
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
|
94
|
+
for (const script of Array.from(scripts)) {
|
|
95
|
+
try {
|
|
96
|
+
const json = JSON.parse(script.textContent || "");
|
|
97
|
+
data.schema.push(json);
|
|
98
|
+
} catch (e) {
|
|
99
|
+
// 忽略解析错误
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// 其他元数据
|
|
105
|
+
if (includeTypes.includes("other")) {
|
|
106
|
+
data.other = {};
|
|
107
|
+
|
|
108
|
+
const charset = document.characterSet;
|
|
109
|
+
if (charset) {
|
|
110
|
+
data.other.charset = charset;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const viewport = document.querySelector('meta[name="viewport"]');
|
|
114
|
+
if (viewport instanceof HTMLMetaElement) {
|
|
115
|
+
data.other.viewport = viewport.content;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const lang = document.documentElement.lang;
|
|
119
|
+
if (lang) {
|
|
120
|
+
data.other.lang = lang;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const author = document.querySelector('meta[name="author"]');
|
|
124
|
+
if (author instanceof HTMLMetaElement) {
|
|
125
|
+
data.other.author = author.content;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return data;
|
|
130
|
+
}, include);
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
type: "metadata",
|
|
134
|
+
url,
|
|
135
|
+
timestamp,
|
|
136
|
+
...result,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
}
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import type { Page } from "patchright";
|
|
2
|
+
|
|
3
|
+
export interface TableData {
|
|
4
|
+
type: "table";
|
|
5
|
+
url: string;
|
|
6
|
+
selector: string;
|
|
7
|
+
timestamp: number;
|
|
8
|
+
headers: string[];
|
|
9
|
+
rows: number;
|
|
10
|
+
data: Record<string, string>[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class TableExtractor {
|
|
14
|
+
/**
|
|
15
|
+
* 提取表格数据
|
|
16
|
+
*/
|
|
17
|
+
async extract(
|
|
18
|
+
page: Page,
|
|
19
|
+
selector?: string,
|
|
20
|
+
options: { includeHeaders?: boolean; maxRows?: number } = {},
|
|
21
|
+
): Promise<TableData> {
|
|
22
|
+
const maxRows = options.maxRows ?? Number.POSITIVE_INFINITY;
|
|
23
|
+
|
|
24
|
+
const url = page.url();
|
|
25
|
+
const timestamp = Date.now();
|
|
26
|
+
|
|
27
|
+
// 在页面上下文中执行提取逻辑
|
|
28
|
+
const result = await page.evaluate(
|
|
29
|
+
({ selector, maxRows }) => {
|
|
30
|
+
// 查找表格元素
|
|
31
|
+
let table: HTMLTableElement | null = null;
|
|
32
|
+
|
|
33
|
+
if (selector) {
|
|
34
|
+
const element = document.querySelector(selector);
|
|
35
|
+
if (element instanceof HTMLTableElement) {
|
|
36
|
+
table = element;
|
|
37
|
+
} else if (element) {
|
|
38
|
+
// 在指定元素内查找表格
|
|
39
|
+
const nestedTable = element.querySelector("table");
|
|
40
|
+
if (nestedTable instanceof HTMLTableElement) {
|
|
41
|
+
table = nestedTable;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
} else {
|
|
45
|
+
// 查找第一个可见的表格
|
|
46
|
+
const tables = Array.from(document.querySelectorAll("table"));
|
|
47
|
+
table =
|
|
48
|
+
tables.find((t) => {
|
|
49
|
+
const style = window.getComputedStyle(t);
|
|
50
|
+
return style.display !== "none" && style.visibility !== "hidden";
|
|
51
|
+
}) || null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!table) {
|
|
55
|
+
// 尝试查找 ARIA table
|
|
56
|
+
const ariaTable = selector
|
|
57
|
+
? document.querySelector(selector)
|
|
58
|
+
: document.querySelector('[role="table"], [role="grid"]');
|
|
59
|
+
|
|
60
|
+
if (ariaTable) {
|
|
61
|
+
return extractAriaTable(ariaTable as HTMLElement, maxRows);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
throw new Error("No table found on page");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return extractHtmlTable(table, maxRows);
|
|
68
|
+
},
|
|
69
|
+
{ selector: selector || "", maxRows },
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
type: "table",
|
|
74
|
+
url,
|
|
75
|
+
selector: selector || "table",
|
|
76
|
+
timestamp,
|
|
77
|
+
headers: result.headers,
|
|
78
|
+
rows: result.data.length,
|
|
79
|
+
data: result.data,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* 在页面上下文中执行的辅助函数
|
|
86
|
+
*/
|
|
87
|
+
|
|
88
|
+
// 提取 HTML 表格
|
|
89
|
+
function extractHtmlTable(
|
|
90
|
+
table: HTMLTableElement,
|
|
91
|
+
maxRows: number,
|
|
92
|
+
): { headers: string[]; data: Record<string, string>[] } {
|
|
93
|
+
const headers: string[] = [];
|
|
94
|
+
const data: Record<string, string>[] = [];
|
|
95
|
+
|
|
96
|
+
// 提取表头
|
|
97
|
+
const thead = table.querySelector("thead");
|
|
98
|
+
const headerRow = thead?.querySelector("tr") || table.querySelector("tr");
|
|
99
|
+
|
|
100
|
+
if (headerRow) {
|
|
101
|
+
const headerCells = Array.from(headerRow.querySelectorAll("th, td"));
|
|
102
|
+
headerCells.forEach((cell, index) => {
|
|
103
|
+
const text = cell.textContent?.trim() || "";
|
|
104
|
+
const headerText = text || `column_${index + 1}`;
|
|
105
|
+
headers.push(headerText);
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// 如果没有表头,根据第一行推断
|
|
110
|
+
if (headers.length === 0) {
|
|
111
|
+
const firstRow = table.querySelector("tr");
|
|
112
|
+
if (firstRow) {
|
|
113
|
+
const cells = Array.from(firstRow.querySelectorAll("td, th"));
|
|
114
|
+
cells.forEach((_, index) => {
|
|
115
|
+
headers.push(`column_${index + 1}`);
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 提取数据行
|
|
121
|
+
const tbody = table.querySelector("tbody") || table;
|
|
122
|
+
const rows = Array.from(tbody.querySelectorAll("tr"));
|
|
123
|
+
|
|
124
|
+
// 跳过表头行(如果表头在 tbody 内)
|
|
125
|
+
const startIndex = thead ? 0 : 1;
|
|
126
|
+
|
|
127
|
+
for (let i = startIndex; i < rows.length && data.length < maxRows; i++) {
|
|
128
|
+
const row = rows[i];
|
|
129
|
+
|
|
130
|
+
// 跳过隐藏行
|
|
131
|
+
const style = window.getComputedStyle(row);
|
|
132
|
+
if (style.display === "none" || style.visibility === "hidden") {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const cells = Array.from(row.querySelectorAll("td, th"));
|
|
137
|
+
const rowData: Record<string, string> = {};
|
|
138
|
+
|
|
139
|
+
cells.forEach((cell, index) => {
|
|
140
|
+
if (index >= headers.length) return;
|
|
141
|
+
|
|
142
|
+
const text = cell.textContent?.trim() || "";
|
|
143
|
+
// 处理合并单元格
|
|
144
|
+
const colspan = Number.parseInt(cell.getAttribute("colspan") || "1");
|
|
145
|
+
|
|
146
|
+
if (colspan > 1) {
|
|
147
|
+
// 对于合并的单元格,填充到多个列
|
|
148
|
+
for (let j = 0; j < colspan && index + j < headers.length; j++) {
|
|
149
|
+
rowData[headers[index + j]] = text;
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
rowData[headers[index]] = text;
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// 只添加非空行
|
|
157
|
+
if (Object.values(rowData).some((v) => v !== "")) {
|
|
158
|
+
data.push(rowData);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return { headers, data };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// 提取 ARIA 表格
|
|
166
|
+
function extractAriaTable(
|
|
167
|
+
table: HTMLElement,
|
|
168
|
+
maxRows: number,
|
|
169
|
+
): { headers: string[]; data: Record<string, string>[] } {
|
|
170
|
+
const headers: string[] = [];
|
|
171
|
+
const data: Record<string, string>[] = [];
|
|
172
|
+
|
|
173
|
+
// 查找表头行
|
|
174
|
+
const headerRow = table.querySelector('[role="row"]:first-child');
|
|
175
|
+
if (headerRow) {
|
|
176
|
+
const headerCells = Array.from(
|
|
177
|
+
headerRow.querySelectorAll('[role="columnheader"], [role="gridcell"]'),
|
|
178
|
+
);
|
|
179
|
+
headerCells.forEach((cell, index) => {
|
|
180
|
+
const text = cell.textContent?.trim() || "";
|
|
181
|
+
headers.push(text || `column_${index + 1}`);
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// 查找数据行
|
|
186
|
+
const rows = Array.from(table.querySelectorAll('[role="row"]'));
|
|
187
|
+
|
|
188
|
+
// 跳过表头行
|
|
189
|
+
const startIndex = headerRow ? 1 : 0;
|
|
190
|
+
|
|
191
|
+
for (let i = startIndex; i < rows.length && data.length < maxRows; i++) {
|
|
192
|
+
const row = rows[i];
|
|
193
|
+
|
|
194
|
+
// 跳过隐藏行
|
|
195
|
+
const style = window.getComputedStyle(row);
|
|
196
|
+
if (style.display === "none" || style.visibility === "hidden") {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const cells = Array.from(row.querySelectorAll('[role="gridcell"], [role="cell"]'));
|
|
201
|
+
const rowData: Record<string, string> = {};
|
|
202
|
+
|
|
203
|
+
cells.forEach((cell, index) => {
|
|
204
|
+
if (index >= headers.length) return;
|
|
205
|
+
const text = cell.textContent?.trim() || "";
|
|
206
|
+
rowData[headers[index]] = text;
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
if (Object.values(rowData).some((v) => v !== "")) {
|
|
210
|
+
data.push(rowData);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return { headers, data };
|
|
215
|
+
}
|
|
@@ -101,30 +101,43 @@ export class DomSnapshotExtractor {
|
|
|
101
101
|
// Try ID first
|
|
102
102
|
if (el.id) return `#${el.id}`;
|
|
103
103
|
|
|
104
|
-
//
|
|
105
|
-
|
|
104
|
+
// Build path from element to root (or an element with ID)
|
|
105
|
+
const path: string[] = [];
|
|
106
106
|
let current: Element | null = el;
|
|
107
107
|
|
|
108
|
-
while (current
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
108
|
+
while (current && current !== document.body && path.length < 6) {
|
|
109
|
+
const parentEl: Element | null = current.parentElement;
|
|
110
|
+
if (!parentEl) break;
|
|
111
|
+
|
|
112
|
+
// Find index among siblings of same tag type
|
|
113
|
+
const currentTag = current.tagName;
|
|
114
|
+
const siblings: Element[] = [];
|
|
115
|
+
for (let i = 0; i < parentEl.children.length; i++) {
|
|
116
|
+
const child = parentEl.children[i];
|
|
117
|
+
if (child.tagName === currentTag) {
|
|
118
|
+
siblings.push(child);
|
|
119
|
+
}
|
|
115
120
|
}
|
|
121
|
+
const index = siblings.indexOf(current) + 1;
|
|
122
|
+
|
|
123
|
+
// Use nth-of-type for uniqueness among same-tag siblings
|
|
124
|
+
const segment =
|
|
125
|
+
siblings.length > 1
|
|
126
|
+
? `${current.tagName.toLowerCase()}:nth-of-type(${index})`
|
|
127
|
+
: current.tagName.toLowerCase();
|
|
128
|
+
|
|
129
|
+
path.unshift(segment);
|
|
116
130
|
|
|
117
|
-
|
|
118
|
-
if (
|
|
119
|
-
|
|
131
|
+
// Stop if parent has ID
|
|
132
|
+
if (parentEl.id) {
|
|
133
|
+
path.unshift(`#${parentEl.id}`);
|
|
120
134
|
break;
|
|
121
135
|
}
|
|
122
136
|
|
|
123
|
-
|
|
124
|
-
if (selector.split(">").length > 5) break;
|
|
137
|
+
current = parentEl;
|
|
125
138
|
}
|
|
126
139
|
|
|
127
|
-
return
|
|
140
|
+
return path.join(" > ");
|
|
128
141
|
}
|
|
129
142
|
|
|
130
143
|
// Traverse DOM
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import type { Page } from "patchright";
|
|
5
|
+
|
|
6
|
+
export interface NetworkListenerConfig {
|
|
7
|
+
id: string;
|
|
8
|
+
sessionDir: string;
|
|
9
|
+
filter: {
|
|
10
|
+
types: string[];
|
|
11
|
+
urlPattern?: string;
|
|
12
|
+
methods?: string[];
|
|
13
|
+
};
|
|
14
|
+
startTime: number;
|
|
15
|
+
status: "active" | "stopped";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface NetworkRequest {
|
|
19
|
+
id: string;
|
|
20
|
+
url: string;
|
|
21
|
+
method: string;
|
|
22
|
+
request: {
|
|
23
|
+
headers: Record<string, string>;
|
|
24
|
+
postData?: any;
|
|
25
|
+
};
|
|
26
|
+
response?: {
|
|
27
|
+
status: number;
|
|
28
|
+
statusText: string;
|
|
29
|
+
headers: Record<string, string>;
|
|
30
|
+
body?: any;
|
|
31
|
+
};
|
|
32
|
+
timing: {
|
|
33
|
+
startTime: number;
|
|
34
|
+
endTime?: number;
|
|
35
|
+
duration?: number;
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export class NetworkListener {
|
|
40
|
+
private config: NetworkListenerConfig;
|
|
41
|
+
private metaFile: string;
|
|
42
|
+
private requestsFile: string;
|
|
43
|
+
private requestCount = 0;
|
|
44
|
+
private cleanup?: () => void;
|
|
45
|
+
|
|
46
|
+
constructor(config: NetworkListenerConfig) {
|
|
47
|
+
this.config = config;
|
|
48
|
+
const networkDir = join(config.sessionDir, "network", config.id);
|
|
49
|
+
this.metaFile = join(networkDir, "meta.json");
|
|
50
|
+
this.requestsFile = join(networkDir, "requests.jsonl");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* 启动网络监听
|
|
55
|
+
*/
|
|
56
|
+
async start(page: Page): Promise<void> {
|
|
57
|
+
// 确保目录存在
|
|
58
|
+
const networkDir = join(this.config.sessionDir, "network", this.config.id);
|
|
59
|
+
if (!existsSync(networkDir)) {
|
|
60
|
+
await mkdir(networkDir, { recursive: true, mode: 0o700 });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// 保存元数据
|
|
64
|
+
await this.saveMeta();
|
|
65
|
+
|
|
66
|
+
// 设置请求拦截
|
|
67
|
+
const requestHandler = async (request: any) => {
|
|
68
|
+
const resourceType = request.resourceType();
|
|
69
|
+
const method = request.method();
|
|
70
|
+
const url = request.url();
|
|
71
|
+
|
|
72
|
+
// 过滤资源类型
|
|
73
|
+
if (!this.shouldCapture(resourceType, method, url)) {
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const requestId = `req_${++this.requestCount}`;
|
|
78
|
+
const requestData: NetworkRequest = {
|
|
79
|
+
id: requestId,
|
|
80
|
+
url,
|
|
81
|
+
method,
|
|
82
|
+
request: {
|
|
83
|
+
headers: request.headers(),
|
|
84
|
+
postData: request.postData(),
|
|
85
|
+
},
|
|
86
|
+
timing: {
|
|
87
|
+
startTime: Date.now(),
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// 等待响应
|
|
92
|
+
try {
|
|
93
|
+
const response = await request.response();
|
|
94
|
+
if (response) {
|
|
95
|
+
requestData.response = {
|
|
96
|
+
status: response.status(),
|
|
97
|
+
statusText: response.statusText(),
|
|
98
|
+
headers: response.headers(),
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
// 尝试获取响应体
|
|
102
|
+
try {
|
|
103
|
+
const contentType = response.headers()["content-type"] || "";
|
|
104
|
+
if (contentType.includes("application/json")) {
|
|
105
|
+
requestData.response.body = await response.json();
|
|
106
|
+
} else if (contentType.includes("text/")) {
|
|
107
|
+
requestData.response.body = await response.text();
|
|
108
|
+
}
|
|
109
|
+
} catch {
|
|
110
|
+
// 忽略响应体获取失败
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
requestData.timing.endTime = Date.now();
|
|
114
|
+
requestData.timing.duration = requestData.timing.endTime - requestData.timing.startTime;
|
|
115
|
+
}
|
|
116
|
+
} catch {
|
|
117
|
+
// 请求失败
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 追加到文件
|
|
121
|
+
await this.appendRequest(requestData);
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
page.on("request", requestHandler);
|
|
125
|
+
|
|
126
|
+
// 保存清理函数
|
|
127
|
+
this.cleanup = () => {
|
|
128
|
+
page.off("request", requestHandler);
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
// 设置自动超时清理(1小时)
|
|
132
|
+
setTimeout(
|
|
133
|
+
async () => {
|
|
134
|
+
if (this.config.status === "active") {
|
|
135
|
+
await this.stop();
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
60 * 60 * 1000,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* 停止监听
|
|
144
|
+
*/
|
|
145
|
+
async stop(): Promise<void> {
|
|
146
|
+
this.config.status = "stopped";
|
|
147
|
+
await this.saveMeta();
|
|
148
|
+
|
|
149
|
+
if (this.cleanup) {
|
|
150
|
+
this.cleanup();
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* 获取捕获的请求
|
|
156
|
+
*/
|
|
157
|
+
async getRequests(): Promise<NetworkRequest[]> {
|
|
158
|
+
if (!existsSync(this.requestsFile)) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const content = await readFile(this.requestsFile, "utf-8");
|
|
163
|
+
const lines = content.trim().split("\n");
|
|
164
|
+
return lines.filter((line) => line).map((line) => JSON.parse(line));
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* 获取监听器信息
|
|
169
|
+
*/
|
|
170
|
+
getInfo(): {
|
|
171
|
+
listenerId: string;
|
|
172
|
+
startTime: number;
|
|
173
|
+
status: string;
|
|
174
|
+
filter: any;
|
|
175
|
+
} {
|
|
176
|
+
return {
|
|
177
|
+
listenerId: this.config.id,
|
|
178
|
+
startTime: this.config.startTime,
|
|
179
|
+
status: this.config.status,
|
|
180
|
+
filter: this.config.filter,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* 判断是否应该捕获请求
|
|
186
|
+
*/
|
|
187
|
+
private shouldCapture(resourceType: string, method: string, url: string): boolean {
|
|
188
|
+
// 资源类型过滤
|
|
189
|
+
const typeMap: Record<string, string> = {
|
|
190
|
+
xhr: "xhr",
|
|
191
|
+
fetch: "fetch",
|
|
192
|
+
document: "document",
|
|
193
|
+
script: "script",
|
|
194
|
+
image: "image",
|
|
195
|
+
font: "font",
|
|
196
|
+
stylesheet: "stylesheet",
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
const mappedType = typeMap[resourceType];
|
|
200
|
+
if (!mappedType || !this.config.filter.types.includes(mappedType)) {
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// HTTP 方法过滤
|
|
205
|
+
if (this.config.filter.methods && this.config.filter.methods.length > 0) {
|
|
206
|
+
if (!this.config.filter.methods.includes(method)) {
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// URL 模式过滤
|
|
212
|
+
if (this.config.filter.urlPattern) {
|
|
213
|
+
const pattern = this.config.filter.urlPattern;
|
|
214
|
+
// 简单的 glob 模式匹配
|
|
215
|
+
const regex = new RegExp(pattern.replace(/\*/g, ".*").replace(/\?/g, "."));
|
|
216
|
+
if (!regex.test(url)) {
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* 保存元数据
|
|
226
|
+
*/
|
|
227
|
+
private async saveMeta(): Promise<void> {
|
|
228
|
+
await writeFile(this.metaFile, JSON.stringify(this.config, null, 2));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* 追加请求数据
|
|
233
|
+
*/
|
|
234
|
+
private async appendRequest(request: NetworkRequest): Promise<void> {
|
|
235
|
+
const line = `${JSON.stringify(request)}\n`;
|
|
236
|
+
await appendFile(this.requestsFile, line);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* 生成唯一的监听器 ID
|
|
242
|
+
*/
|
|
243
|
+
export function generateListenerId(): string {
|
|
244
|
+
const timestamp = Date.now().toString(36);
|
|
245
|
+
const random = Math.random().toString(36).substring(2, 8);
|
|
246
|
+
return `listener_${timestamp}_${random}`;
|
|
247
|
+
}
|