@skrillex1224/playwright-toolkit 2.0.58 → 2.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +205 -23
- package/dist/index.cjs.map +4 -4
- package/dist/index.js +205 -23
- package/dist/index.js.map +4 -4
- package/index.d.ts +48 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -84,7 +84,9 @@ function createLogger(moduleName) {
|
|
|
84
84
|
// src/errors.js
|
|
85
85
|
var errors_exports = {};
|
|
86
86
|
__export(errors_exports, {
|
|
87
|
-
CrawlerError: () => CrawlerError
|
|
87
|
+
CrawlerError: () => CrawlerError,
|
|
88
|
+
FirstDataTimeoutError: () => FirstDataTimeoutError,
|
|
89
|
+
OverallTimeoutError: () => OverallTimeoutError
|
|
88
90
|
});
|
|
89
91
|
import { serializeError } from "serialize-error";
|
|
90
92
|
var CrawlerError = class _CrawlerError extends Error {
|
|
@@ -137,6 +139,38 @@ var CrawlerError = class _CrawlerError extends Error {
|
|
|
137
139
|
return crawlerError;
|
|
138
140
|
}
|
|
139
141
|
};
|
|
142
|
+
var FirstDataTimeoutError = class _FirstDataTimeoutError extends Error {
|
|
143
|
+
/**
|
|
144
|
+
* @param {number} timeout - 超时时间(毫秒)
|
|
145
|
+
* @param {Object} [context={}] - 上下文信息
|
|
146
|
+
*/
|
|
147
|
+
constructor(timeout, context = {}) {
|
|
148
|
+
super(`\u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6 (${timeout}ms)`);
|
|
149
|
+
this.name = "FirstDataTimeoutError";
|
|
150
|
+
this.timeout = timeout;
|
|
151
|
+
this.context = context;
|
|
152
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
153
|
+
if (Error.captureStackTrace) {
|
|
154
|
+
Error.captureStackTrace(this, _FirstDataTimeoutError);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
var OverallTimeoutError = class _OverallTimeoutError extends Error {
|
|
159
|
+
/**
|
|
160
|
+
* @param {number} timeout - 超时时间(毫秒)
|
|
161
|
+
* @param {Object} [context={}] - 上下文信息
|
|
162
|
+
*/
|
|
163
|
+
constructor(timeout, context = {}) {
|
|
164
|
+
super(`\u6574\u4F53\u8BF7\u6C42\u8D85\u65F6 (${timeout}ms)`);
|
|
165
|
+
this.name = "OverallTimeoutError";
|
|
166
|
+
this.timeout = timeout;
|
|
167
|
+
this.context = context;
|
|
168
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
169
|
+
if (Error.captureStackTrace) {
|
|
170
|
+
Error.captureStackTrace(this, _OverallTimeoutError);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
};
|
|
140
174
|
|
|
141
175
|
// src/apify-kit.js
|
|
142
176
|
import { serializeError as serializeError2 } from "serialize-error";
|
|
@@ -243,28 +277,6 @@ var ApifyKit = {
|
|
|
243
277
|
import delay from "delay";
|
|
244
278
|
var logger2 = createLogger("Utils");
|
|
245
279
|
var Utils = {
|
|
246
|
-
/**
|
|
247
|
-
* 解析 SSE 流文本
|
|
248
|
-
* 支持 `data: {...}` 和 `data:{...}` 两种格式
|
|
249
|
-
*/
|
|
250
|
-
parseSseStream(sseStreamText) {
|
|
251
|
-
const events = [];
|
|
252
|
-
const lines = sseStreamText.split("\n");
|
|
253
|
-
for (const line of lines) {
|
|
254
|
-
if (line.startsWith("data:")) {
|
|
255
|
-
try {
|
|
256
|
-
const jsonContent = line.substring(5).trim();
|
|
257
|
-
if (jsonContent && jsonContent !== "[DONE]") {
|
|
258
|
-
events.push(JSON.parse(jsonContent));
|
|
259
|
-
}
|
|
260
|
-
} catch (e) {
|
|
261
|
-
logger2.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
logger2.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
|
|
266
|
-
return events;
|
|
267
|
-
},
|
|
268
280
|
/**
|
|
269
281
|
* 解析 Cookie 字符串为 Playwright 格式的 Cookie 数组
|
|
270
282
|
* @param {string} cookieString - Cookie 字符串
|
|
@@ -994,6 +1006,175 @@ var Captcha = {
|
|
|
994
1006
|
useCaptchaMonitor
|
|
995
1007
|
};
|
|
996
1008
|
|
|
1009
|
+
// src/sse.js
|
|
1010
|
+
import https from "https";
|
|
1011
|
+
import { URL } from "url";
|
|
1012
|
+
var logger8 = createLogger("Sse");
|
|
1013
|
+
var Sse = {
|
|
1014
|
+
/**
|
|
1015
|
+
* 解析 SSE 流文本
|
|
1016
|
+
* 支持 `data: {...}` 和 `data:{...}` 两种格式
|
|
1017
|
+
* @param {string} sseStreamText
|
|
1018
|
+
* @returns {Array<Object>} events
|
|
1019
|
+
*/
|
|
1020
|
+
parseSseStream(sseStreamText) {
|
|
1021
|
+
const events = [];
|
|
1022
|
+
const lines = sseStreamText.split("\n");
|
|
1023
|
+
for (const line of lines) {
|
|
1024
|
+
if (line.startsWith("data:")) {
|
|
1025
|
+
try {
|
|
1026
|
+
const jsonContent = line.substring(5).trim();
|
|
1027
|
+
if (jsonContent) {
|
|
1028
|
+
events.push(JSON.parse(jsonContent));
|
|
1029
|
+
}
|
|
1030
|
+
} catch (e) {
|
|
1031
|
+
logger8.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
logger8.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
|
|
1036
|
+
return events;
|
|
1037
|
+
},
|
|
1038
|
+
/**
|
|
1039
|
+
* 拦截网络请求并使用 Node.js 原生 https 模块转发,以解决流式数据捕获问题。
|
|
1040
|
+
* @param {import('playwright').Page} page
|
|
1041
|
+
* @param {string|RegExp} urlPattern - 拦截的 URL 模式
|
|
1042
|
+
* @param {object} options
|
|
1043
|
+
* @param {function(string, function, string): void} [options.onData] - (textChunk, resolve, accumulatedText) => void
|
|
1044
|
+
* @param {function(string, function): void} [options.onEnd] - (fullText, resolve) => void
|
|
1045
|
+
* @param {function(Error): void} [options.onTimeout] - (error) => void,error 为 FirstDataTimeoutError 或 OverallTimeoutError
|
|
1046
|
+
* @param {number} [options.firstDataTimeout=90000] - 首次数据接收超时 (ms),默认 90s
|
|
1047
|
+
* @param {number} [options.timeout=180000] - 整体请求超时时间 (ms),默认 180s
|
|
1048
|
+
* @returns {Promise<any>} - 返回 Promise,当流满足条件时 resolve
|
|
1049
|
+
*/
|
|
1050
|
+
async intercept(page, urlPattern, options = {}) {
|
|
1051
|
+
const {
|
|
1052
|
+
onData,
|
|
1053
|
+
onEnd,
|
|
1054
|
+
onTimeout,
|
|
1055
|
+
firstDataTimeout = 9e4,
|
|
1056
|
+
timeout = 18e4
|
|
1057
|
+
} = options;
|
|
1058
|
+
let streamResolve, streamReject;
|
|
1059
|
+
const capturePromise = new Promise((resolve, reject) => {
|
|
1060
|
+
streamResolve = resolve;
|
|
1061
|
+
streamReject = reject;
|
|
1062
|
+
});
|
|
1063
|
+
let firstDataTimer = null;
|
|
1064
|
+
let overallTimer = null;
|
|
1065
|
+
let hasReceivedFirstData = false;
|
|
1066
|
+
const clearAllTimers = () => {
|
|
1067
|
+
if (firstDataTimer) {
|
|
1068
|
+
clearTimeout(firstDataTimer);
|
|
1069
|
+
firstDataTimer = null;
|
|
1070
|
+
}
|
|
1071
|
+
if (overallTimer) {
|
|
1072
|
+
clearTimeout(overallTimer);
|
|
1073
|
+
overallTimer = null;
|
|
1074
|
+
}
|
|
1075
|
+
};
|
|
1076
|
+
firstDataTimer = setTimeout(() => {
|
|
1077
|
+
if (!hasReceivedFirstData) {
|
|
1078
|
+
const error = new FirstDataTimeoutError(firstDataTimeout);
|
|
1079
|
+
logger8.error(`[Intercept] \u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6`);
|
|
1080
|
+
clearAllTimers();
|
|
1081
|
+
if (onTimeout) {
|
|
1082
|
+
onTimeout(error);
|
|
1083
|
+
}
|
|
1084
|
+
streamReject(error);
|
|
1085
|
+
}
|
|
1086
|
+
}, firstDataTimeout);
|
|
1087
|
+
overallTimer = setTimeout(() => {
|
|
1088
|
+
const error = new OverallTimeoutError(timeout);
|
|
1089
|
+
logger8.error(`[Intercept] \u6574\u4F53\u8BF7\u6C42\u8D85\u65F6`);
|
|
1090
|
+
clearAllTimers();
|
|
1091
|
+
if (onTimeout) {
|
|
1092
|
+
onTimeout(error);
|
|
1093
|
+
}
|
|
1094
|
+
streamReject(error);
|
|
1095
|
+
}, timeout);
|
|
1096
|
+
await page.route(urlPattern, async (route) => {
|
|
1097
|
+
const request = route.request();
|
|
1098
|
+
const requestUrl = request.url();
|
|
1099
|
+
logger8.info(`[MITM] \u5DF2\u62E6\u622A\u8BF7\u6C42: ${requestUrl}`);
|
|
1100
|
+
try {
|
|
1101
|
+
const headers = await request.allHeaders();
|
|
1102
|
+
const postData = request.postData();
|
|
1103
|
+
const urlObj = new URL(requestUrl);
|
|
1104
|
+
delete headers["accept-encoding"];
|
|
1105
|
+
delete headers["content-length"];
|
|
1106
|
+
const reqOptions = {
|
|
1107
|
+
hostname: urlObj.hostname,
|
|
1108
|
+
port: 443,
|
|
1109
|
+
path: urlObj.pathname + urlObj.search,
|
|
1110
|
+
method: request.method(),
|
|
1111
|
+
headers,
|
|
1112
|
+
timeout
|
|
1113
|
+
};
|
|
1114
|
+
const req = https.request(reqOptions, (res) => {
|
|
1115
|
+
const chunks = [];
|
|
1116
|
+
let accumulatedText = "";
|
|
1117
|
+
res.on("data", (chunk) => {
|
|
1118
|
+
if (!hasReceivedFirstData) {
|
|
1119
|
+
hasReceivedFirstData = true;
|
|
1120
|
+
if (firstDataTimer) {
|
|
1121
|
+
clearTimeout(firstDataTimer);
|
|
1122
|
+
firstDataTimer = null;
|
|
1123
|
+
}
|
|
1124
|
+
logger8.debug("[Intercept] \u5DF2\u63A5\u6536\u9996\u6B21\u6570\u636E");
|
|
1125
|
+
}
|
|
1126
|
+
chunks.push(chunk);
|
|
1127
|
+
const textChunk = chunk.toString("utf-8");
|
|
1128
|
+
accumulatedText += textChunk;
|
|
1129
|
+
logger8.debug(`[CHUNK] ${textChunk.length} bytes`);
|
|
1130
|
+
if (onData) {
|
|
1131
|
+
try {
|
|
1132
|
+
onData(textChunk, streamResolve, accumulatedText);
|
|
1133
|
+
} catch (e) {
|
|
1134
|
+
logger8.error(`onData callback error: ${e.message}`);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
});
|
|
1138
|
+
res.on("end", () => {
|
|
1139
|
+
logger8.info("[MITM] \u4E0A\u6E38\u54CD\u5E94\u7ED3\u675F (Stream End)");
|
|
1140
|
+
clearAllTimers();
|
|
1141
|
+
const fullBody = Buffer.concat(chunks);
|
|
1142
|
+
if (onEnd) {
|
|
1143
|
+
try {
|
|
1144
|
+
onEnd(accumulatedText, streamResolve);
|
|
1145
|
+
} catch (e) {
|
|
1146
|
+
logger8.error(`onEnd callback error: ${e.message}`);
|
|
1147
|
+
}
|
|
1148
|
+
} else if (!onData) {
|
|
1149
|
+
streamResolve(accumulatedText);
|
|
1150
|
+
}
|
|
1151
|
+
route.fulfill({
|
|
1152
|
+
status: res.statusCode,
|
|
1153
|
+
headers: res.headers,
|
|
1154
|
+
body: fullBody
|
|
1155
|
+
}).catch((e) => logger8.warning(`Route fulfill failed: ${e.message}`));
|
|
1156
|
+
});
|
|
1157
|
+
});
|
|
1158
|
+
req.on("error", (e) => {
|
|
1159
|
+
logger8.error(`[MITM] Upstream request error: ${e.message}`);
|
|
1160
|
+
clearAllTimers();
|
|
1161
|
+
route.abort();
|
|
1162
|
+
streamReject(e);
|
|
1163
|
+
});
|
|
1164
|
+
if (postData) {
|
|
1165
|
+
req.write(postData);
|
|
1166
|
+
}
|
|
1167
|
+
req.end();
|
|
1168
|
+
} catch (e) {
|
|
1169
|
+
logger8.error(`[MITM] Setup error: ${e.message}`);
|
|
1170
|
+
clearAllTimers();
|
|
1171
|
+
route.continue();
|
|
1172
|
+
}
|
|
1173
|
+
});
|
|
1174
|
+
return capturePromise;
|
|
1175
|
+
}
|
|
1176
|
+
};
|
|
1177
|
+
|
|
997
1178
|
// index.js
|
|
998
1179
|
var usePlaywrightToolKit = () => {
|
|
999
1180
|
return {
|
|
@@ -1005,6 +1186,7 @@ var usePlaywrightToolKit = () => {
|
|
|
1005
1186
|
Constants: constants_exports,
|
|
1006
1187
|
Utils,
|
|
1007
1188
|
Captcha,
|
|
1189
|
+
Sse,
|
|
1008
1190
|
Errors: errors_exports
|
|
1009
1191
|
};
|
|
1010
1192
|
};
|