@skrillex1224/playwright-toolkit 2.0.58 → 2.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +205 -23
- package/dist/index.cjs.map +4 -4
- package/dist/index.js +205 -23
- package/dist/index.js.map +4 -4
- package/index.d.ts +48 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -113,7 +113,9 @@ function createLogger(moduleName) {
|
|
|
113
113
|
// src/errors.js
|
|
114
114
|
var errors_exports = {};
|
|
115
115
|
__export(errors_exports, {
|
|
116
|
-
CrawlerError: () => CrawlerError
|
|
116
|
+
CrawlerError: () => CrawlerError,
|
|
117
|
+
FirstDataTimeoutError: () => FirstDataTimeoutError,
|
|
118
|
+
OverallTimeoutError: () => OverallTimeoutError
|
|
117
119
|
});
|
|
118
120
|
var import_serialize_error = require("serialize-error");
|
|
119
121
|
var CrawlerError = class _CrawlerError extends Error {
|
|
@@ -166,6 +168,38 @@ var CrawlerError = class _CrawlerError extends Error {
|
|
|
166
168
|
return crawlerError;
|
|
167
169
|
}
|
|
168
170
|
};
|
|
171
|
+
var FirstDataTimeoutError = class _FirstDataTimeoutError extends Error {
|
|
172
|
+
/**
|
|
173
|
+
* @param {number} timeout - 超时时间(毫秒)
|
|
174
|
+
* @param {Object} [context={}] - 上下文信息
|
|
175
|
+
*/
|
|
176
|
+
constructor(timeout, context = {}) {
|
|
177
|
+
super(`\u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6 (${timeout}ms)`);
|
|
178
|
+
this.name = "FirstDataTimeoutError";
|
|
179
|
+
this.timeout = timeout;
|
|
180
|
+
this.context = context;
|
|
181
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
182
|
+
if (Error.captureStackTrace) {
|
|
183
|
+
Error.captureStackTrace(this, _FirstDataTimeoutError);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
var OverallTimeoutError = class _OverallTimeoutError extends Error {
|
|
188
|
+
/**
|
|
189
|
+
* @param {number} timeout - 超时时间(毫秒)
|
|
190
|
+
* @param {Object} [context={}] - 上下文信息
|
|
191
|
+
*/
|
|
192
|
+
constructor(timeout, context = {}) {
|
|
193
|
+
super(`\u6574\u4F53\u8BF7\u6C42\u8D85\u65F6 (${timeout}ms)`);
|
|
194
|
+
this.name = "OverallTimeoutError";
|
|
195
|
+
this.timeout = timeout;
|
|
196
|
+
this.context = context;
|
|
197
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
198
|
+
if (Error.captureStackTrace) {
|
|
199
|
+
Error.captureStackTrace(this, _OverallTimeoutError);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
};
|
|
169
203
|
|
|
170
204
|
// src/apify-kit.js
|
|
171
205
|
var import_serialize_error2 = require("serialize-error");
|
|
@@ -272,28 +306,6 @@ var ApifyKit = {
|
|
|
272
306
|
var import_delay = __toESM(require("delay"), 1);
|
|
273
307
|
var logger2 = createLogger("Utils");
|
|
274
308
|
var Utils = {
|
|
275
|
-
/**
|
|
276
|
-
* 解析 SSE 流文本
|
|
277
|
-
* 支持 `data: {...}` 和 `data:{...}` 两种格式
|
|
278
|
-
*/
|
|
279
|
-
parseSseStream(sseStreamText) {
|
|
280
|
-
const events = [];
|
|
281
|
-
const lines = sseStreamText.split("\n");
|
|
282
|
-
for (const line of lines) {
|
|
283
|
-
if (line.startsWith("data:")) {
|
|
284
|
-
try {
|
|
285
|
-
const jsonContent = line.substring(5).trim();
|
|
286
|
-
if (jsonContent && jsonContent !== "[DONE]") {
|
|
287
|
-
events.push(JSON.parse(jsonContent));
|
|
288
|
-
}
|
|
289
|
-
} catch (e) {
|
|
290
|
-
logger2.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
logger2.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
|
|
295
|
-
return events;
|
|
296
|
-
},
|
|
297
309
|
/**
|
|
298
310
|
* 解析 Cookie 字符串为 Playwright 格式的 Cookie 数组
|
|
299
311
|
* @param {string} cookieString - Cookie 字符串
|
|
@@ -1023,6 +1035,175 @@ var Captcha = {
|
|
|
1023
1035
|
useCaptchaMonitor
|
|
1024
1036
|
};
|
|
1025
1037
|
|
|
1038
|
+
// src/sse.js
|
|
1039
|
+
var import_https = __toESM(require("https"), 1);
|
|
1040
|
+
var import_url = require("url");
|
|
1041
|
+
var logger8 = createLogger("Sse");
|
|
1042
|
+
var Sse = {
|
|
1043
|
+
/**
|
|
1044
|
+
* 解析 SSE 流文本
|
|
1045
|
+
* 支持 `data: {...}` 和 `data:{...}` 两种格式
|
|
1046
|
+
* @param {string} sseStreamText
|
|
1047
|
+
* @returns {Array<Object>} events
|
|
1048
|
+
*/
|
|
1049
|
+
parseSseStream(sseStreamText) {
|
|
1050
|
+
const events = [];
|
|
1051
|
+
const lines = sseStreamText.split("\n");
|
|
1052
|
+
for (const line of lines) {
|
|
1053
|
+
if (line.startsWith("data:")) {
|
|
1054
|
+
try {
|
|
1055
|
+
const jsonContent = line.substring(5).trim();
|
|
1056
|
+
if (jsonContent) {
|
|
1057
|
+
events.push(JSON.parse(jsonContent));
|
|
1058
|
+
}
|
|
1059
|
+
} catch (e) {
|
|
1060
|
+
logger8.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
logger8.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
|
|
1065
|
+
return events;
|
|
1066
|
+
},
|
|
1067
|
+
/**
|
|
1068
|
+
* 拦截网络请求并使用 Node.js 原生 https 模块转发,以解决流式数据捕获问题。
|
|
1069
|
+
* @param {import('playwright').Page} page
|
|
1070
|
+
* @param {string|RegExp} urlPattern - 拦截的 URL 模式
|
|
1071
|
+
* @param {object} options
|
|
1072
|
+
* @param {function(string, function, string): void} [options.onData] - (textChunk, resolve, accumulatedText) => void
|
|
1073
|
+
* @param {function(string, function): void} [options.onEnd] - (fullText, resolve) => void
|
|
1074
|
+
* @param {function(Error): void} [options.onTimeout] - (error) => void,error 为 FirstDataTimeoutError 或 OverallTimeoutError
|
|
1075
|
+
* @param {number} [options.firstDataTimeout=90000] - 首次数据接收超时 (ms),默认 90s
|
|
1076
|
+
* @param {number} [options.timeout=180000] - 整体请求超时时间 (ms),默认 180s
|
|
1077
|
+
* @returns {Promise<any>} - 返回 Promise,当流满足条件时 resolve
|
|
1078
|
+
*/
|
|
1079
|
+
async intercept(page, urlPattern, options = {}) {
|
|
1080
|
+
const {
|
|
1081
|
+
onData,
|
|
1082
|
+
onEnd,
|
|
1083
|
+
onTimeout,
|
|
1084
|
+
firstDataTimeout = 9e4,
|
|
1085
|
+
timeout = 18e4
|
|
1086
|
+
} = options;
|
|
1087
|
+
let streamResolve, streamReject;
|
|
1088
|
+
const capturePromise = new Promise((resolve, reject) => {
|
|
1089
|
+
streamResolve = resolve;
|
|
1090
|
+
streamReject = reject;
|
|
1091
|
+
});
|
|
1092
|
+
let firstDataTimer = null;
|
|
1093
|
+
let overallTimer = null;
|
|
1094
|
+
let hasReceivedFirstData = false;
|
|
1095
|
+
const clearAllTimers = () => {
|
|
1096
|
+
if (firstDataTimer) {
|
|
1097
|
+
clearTimeout(firstDataTimer);
|
|
1098
|
+
firstDataTimer = null;
|
|
1099
|
+
}
|
|
1100
|
+
if (overallTimer) {
|
|
1101
|
+
clearTimeout(overallTimer);
|
|
1102
|
+
overallTimer = null;
|
|
1103
|
+
}
|
|
1104
|
+
};
|
|
1105
|
+
firstDataTimer = setTimeout(() => {
|
|
1106
|
+
if (!hasReceivedFirstData) {
|
|
1107
|
+
const error = new FirstDataTimeoutError(firstDataTimeout);
|
|
1108
|
+
logger8.error(`[Intercept] \u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6`);
|
|
1109
|
+
clearAllTimers();
|
|
1110
|
+
if (onTimeout) {
|
|
1111
|
+
onTimeout(error);
|
|
1112
|
+
}
|
|
1113
|
+
streamReject(error);
|
|
1114
|
+
}
|
|
1115
|
+
}, firstDataTimeout);
|
|
1116
|
+
overallTimer = setTimeout(() => {
|
|
1117
|
+
const error = new OverallTimeoutError(timeout);
|
|
1118
|
+
logger8.error(`[Intercept] \u6574\u4F53\u8BF7\u6C42\u8D85\u65F6`);
|
|
1119
|
+
clearAllTimers();
|
|
1120
|
+
if (onTimeout) {
|
|
1121
|
+
onTimeout(error);
|
|
1122
|
+
}
|
|
1123
|
+
streamReject(error);
|
|
1124
|
+
}, timeout);
|
|
1125
|
+
await page.route(urlPattern, async (route) => {
|
|
1126
|
+
const request = route.request();
|
|
1127
|
+
const requestUrl = request.url();
|
|
1128
|
+
logger8.info(`[MITM] \u5DF2\u62E6\u622A\u8BF7\u6C42: ${requestUrl}`);
|
|
1129
|
+
try {
|
|
1130
|
+
const headers = await request.allHeaders();
|
|
1131
|
+
const postData = request.postData();
|
|
1132
|
+
const urlObj = new import_url.URL(requestUrl);
|
|
1133
|
+
delete headers["accept-encoding"];
|
|
1134
|
+
delete headers["content-length"];
|
|
1135
|
+
const reqOptions = {
|
|
1136
|
+
hostname: urlObj.hostname,
|
|
1137
|
+
port: 443,
|
|
1138
|
+
path: urlObj.pathname + urlObj.search,
|
|
1139
|
+
method: request.method(),
|
|
1140
|
+
headers,
|
|
1141
|
+
timeout
|
|
1142
|
+
};
|
|
1143
|
+
const req = import_https.default.request(reqOptions, (res) => {
|
|
1144
|
+
const chunks = [];
|
|
1145
|
+
let accumulatedText = "";
|
|
1146
|
+
res.on("data", (chunk) => {
|
|
1147
|
+
if (!hasReceivedFirstData) {
|
|
1148
|
+
hasReceivedFirstData = true;
|
|
1149
|
+
if (firstDataTimer) {
|
|
1150
|
+
clearTimeout(firstDataTimer);
|
|
1151
|
+
firstDataTimer = null;
|
|
1152
|
+
}
|
|
1153
|
+
logger8.debug("[Intercept] \u5DF2\u63A5\u6536\u9996\u6B21\u6570\u636E");
|
|
1154
|
+
}
|
|
1155
|
+
chunks.push(chunk);
|
|
1156
|
+
const textChunk = chunk.toString("utf-8");
|
|
1157
|
+
accumulatedText += textChunk;
|
|
1158
|
+
logger8.debug(`[CHUNK] ${textChunk.length} bytes`);
|
|
1159
|
+
if (onData) {
|
|
1160
|
+
try {
|
|
1161
|
+
onData(textChunk, streamResolve, accumulatedText);
|
|
1162
|
+
} catch (e) {
|
|
1163
|
+
logger8.error(`onData callback error: ${e.message}`);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
});
|
|
1167
|
+
res.on("end", () => {
|
|
1168
|
+
logger8.info("[MITM] \u4E0A\u6E38\u54CD\u5E94\u7ED3\u675F (Stream End)");
|
|
1169
|
+
clearAllTimers();
|
|
1170
|
+
const fullBody = Buffer.concat(chunks);
|
|
1171
|
+
if (onEnd) {
|
|
1172
|
+
try {
|
|
1173
|
+
onEnd(accumulatedText, streamResolve);
|
|
1174
|
+
} catch (e) {
|
|
1175
|
+
logger8.error(`onEnd callback error: ${e.message}`);
|
|
1176
|
+
}
|
|
1177
|
+
} else if (!onData) {
|
|
1178
|
+
streamResolve(accumulatedText);
|
|
1179
|
+
}
|
|
1180
|
+
route.fulfill({
|
|
1181
|
+
status: res.statusCode,
|
|
1182
|
+
headers: res.headers,
|
|
1183
|
+
body: fullBody
|
|
1184
|
+
}).catch((e) => logger8.warning(`Route fulfill failed: ${e.message}`));
|
|
1185
|
+
});
|
|
1186
|
+
});
|
|
1187
|
+
req.on("error", (e) => {
|
|
1188
|
+
logger8.error(`[MITM] Upstream request error: ${e.message}`);
|
|
1189
|
+
clearAllTimers();
|
|
1190
|
+
route.abort();
|
|
1191
|
+
streamReject(e);
|
|
1192
|
+
});
|
|
1193
|
+
if (postData) {
|
|
1194
|
+
req.write(postData);
|
|
1195
|
+
}
|
|
1196
|
+
req.end();
|
|
1197
|
+
} catch (e) {
|
|
1198
|
+
logger8.error(`[MITM] Setup error: ${e.message}`);
|
|
1199
|
+
clearAllTimers();
|
|
1200
|
+
route.continue();
|
|
1201
|
+
}
|
|
1202
|
+
});
|
|
1203
|
+
return capturePromise;
|
|
1204
|
+
}
|
|
1205
|
+
};
|
|
1206
|
+
|
|
1026
1207
|
// index.js
|
|
1027
1208
|
var usePlaywrightToolKit = () => {
|
|
1028
1209
|
return {
|
|
@@ -1034,6 +1215,7 @@ var usePlaywrightToolKit = () => {
|
|
|
1034
1215
|
Constants: constants_exports,
|
|
1035
1216
|
Utils,
|
|
1036
1217
|
Captcha,
|
|
1218
|
+
Sse,
|
|
1037
1219
|
Errors: errors_exports
|
|
1038
1220
|
};
|
|
1039
1221
|
};
|