@skrillex1224/playwright-toolkit 2.0.58 → 2.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -113,7 +113,9 @@ function createLogger(moduleName) {
113
113
  // src/errors.js
114
114
  var errors_exports = {};
115
115
  __export(errors_exports, {
116
- CrawlerError: () => CrawlerError
116
+ CrawlerError: () => CrawlerError,
117
+ FirstDataTimeoutError: () => FirstDataTimeoutError,
118
+ OverallTimeoutError: () => OverallTimeoutError
117
119
  });
118
120
  var import_serialize_error = require("serialize-error");
119
121
  var CrawlerError = class _CrawlerError extends Error {
@@ -166,6 +168,38 @@ var CrawlerError = class _CrawlerError extends Error {
166
168
  return crawlerError;
167
169
  }
168
170
  };
171
+ var FirstDataTimeoutError = class _FirstDataTimeoutError extends Error {
172
+ /**
173
+ * @param {number} timeout - 超时时间(毫秒)
174
+ * @param {Object} [context={}] - 上下文信息
175
+ */
176
+ constructor(timeout, context = {}) {
177
+ super(`\u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6 (${timeout}ms)`);
178
+ this.name = "FirstDataTimeoutError";
179
+ this.timeout = timeout;
180
+ this.context = context;
181
+ this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
182
+ if (Error.captureStackTrace) {
183
+ Error.captureStackTrace(this, _FirstDataTimeoutError);
184
+ }
185
+ }
186
+ };
187
+ var OverallTimeoutError = class _OverallTimeoutError extends Error {
188
+ /**
189
+ * @param {number} timeout - 超时时间(毫秒)
190
+ * @param {Object} [context={}] - 上下文信息
191
+ */
192
+ constructor(timeout, context = {}) {
193
+ super(`\u6574\u4F53\u8BF7\u6C42\u8D85\u65F6 (${timeout}ms)`);
194
+ this.name = "OverallTimeoutError";
195
+ this.timeout = timeout;
196
+ this.context = context;
197
+ this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
198
+ if (Error.captureStackTrace) {
199
+ Error.captureStackTrace(this, _OverallTimeoutError);
200
+ }
201
+ }
202
+ };
169
203
 
170
204
  // src/apify-kit.js
171
205
  var import_serialize_error2 = require("serialize-error");
@@ -272,28 +306,6 @@ var ApifyKit = {
272
306
  var import_delay = __toESM(require("delay"), 1);
273
307
  var logger2 = createLogger("Utils");
274
308
  var Utils = {
275
- /**
276
- * 解析 SSE 流文本
277
- * 支持 `data: {...}` 和 `data:{...}` 两种格式
278
- */
279
- parseSseStream(sseStreamText) {
280
- const events = [];
281
- const lines = sseStreamText.split("\n");
282
- for (const line of lines) {
283
- if (line.startsWith("data:")) {
284
- try {
285
- const jsonContent = line.substring(5).trim();
286
- if (jsonContent && jsonContent !== "[DONE]") {
287
- events.push(JSON.parse(jsonContent));
288
- }
289
- } catch (e) {
290
- logger2.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
291
- }
292
- }
293
- }
294
- logger2.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
295
- return events;
296
- },
297
309
  /**
298
310
  * 解析 Cookie 字符串为 Playwright 格式的 Cookie 数组
299
311
  * @param {string} cookieString - Cookie 字符串
@@ -1023,6 +1035,175 @@ var Captcha = {
1023
1035
  useCaptchaMonitor
1024
1036
  };
1025
1037
 
1038
+ // src/sse.js
1039
+ var import_https = __toESM(require("https"), 1);
1040
+ var import_url = require("url");
1041
+ var logger8 = createLogger("Sse");
1042
+ var Sse = {
1043
+ /**
1044
+ * 解析 SSE 流文本
1045
+ * 支持 `data: {...}` 和 `data:{...}` 两种格式
1046
+ * @param {string} sseStreamText
1047
+ * @returns {Array<Object>} events
1048
+ */
1049
+ parseSseStream(sseStreamText) {
1050
+ const events = [];
1051
+ const lines = sseStreamText.split("\n");
1052
+ for (const line of lines) {
1053
+ if (line.startsWith("data:")) {
1054
+ try {
1055
+ const jsonContent = line.substring(5).trim();
1056
+ if (jsonContent) {
1057
+ events.push(JSON.parse(jsonContent));
1058
+ }
1059
+ } catch (e) {
1060
+ logger8.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
1061
+ }
1062
+ }
1063
+ }
1064
+ logger8.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
1065
+ return events;
1066
+ },
1067
+ /**
1068
+ * 拦截网络请求并使用 Node.js 原生 https 模块转发,以解决流式数据捕获问题。
1069
+ * @param {import('playwright').Page} page
1070
+ * @param {string|RegExp} urlPattern - 拦截的 URL 模式
1071
+ * @param {object} options
1072
+ * @param {function(string, function, string): void} [options.onData] - (textChunk, resolve, accumulatedText) => void
1073
+ * @param {function(string, function): void} [options.onEnd] - (fullText, resolve) => void
1074
+ * @param {function(Error): void} [options.onTimeout] - (error) => void,error 为 FirstDataTimeoutError 或 OverallTimeoutError
1075
+ * @param {number} [options.firstDataTimeout=90000] - 首次数据接收超时 (ms),默认 90s
1076
+ * @param {number} [options.timeout=180000] - 整体请求超时时间 (ms),默认 180s
1077
+ * @returns {Promise<any>} - 返回 Promise,当流满足条件时 resolve
1078
+ */
1079
+ async intercept(page, urlPattern, options = {}) {
1080
+ const {
1081
+ onData,
1082
+ onEnd,
1083
+ onTimeout,
1084
+ firstDataTimeout = 9e4,
1085
+ timeout = 18e4
1086
+ } = options;
1087
+ let streamResolve, streamReject;
1088
+ const capturePromise = new Promise((resolve, reject) => {
1089
+ streamResolve = resolve;
1090
+ streamReject = reject;
1091
+ });
1092
+ let firstDataTimer = null;
1093
+ let overallTimer = null;
1094
+ let hasReceivedFirstData = false;
1095
+ const clearAllTimers = () => {
1096
+ if (firstDataTimer) {
1097
+ clearTimeout(firstDataTimer);
1098
+ firstDataTimer = null;
1099
+ }
1100
+ if (overallTimer) {
1101
+ clearTimeout(overallTimer);
1102
+ overallTimer = null;
1103
+ }
1104
+ };
1105
+ firstDataTimer = setTimeout(() => {
1106
+ if (!hasReceivedFirstData) {
1107
+ const error = new FirstDataTimeoutError(firstDataTimeout);
1108
+ logger8.error(`[Intercept] \u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6`);
1109
+ clearAllTimers();
1110
+ if (onTimeout) {
1111
+ onTimeout(error);
1112
+ }
1113
+ streamReject(error);
1114
+ }
1115
+ }, firstDataTimeout);
1116
+ overallTimer = setTimeout(() => {
1117
+ const error = new OverallTimeoutError(timeout);
1118
+ logger8.error(`[Intercept] \u6574\u4F53\u8BF7\u6C42\u8D85\u65F6`);
1119
+ clearAllTimers();
1120
+ if (onTimeout) {
1121
+ onTimeout(error);
1122
+ }
1123
+ streamReject(error);
1124
+ }, timeout);
1125
+ await page.route(urlPattern, async (route) => {
1126
+ const request = route.request();
1127
+ const requestUrl = request.url();
1128
+ logger8.info(`[MITM] \u5DF2\u62E6\u622A\u8BF7\u6C42: ${requestUrl}`);
1129
+ try {
1130
+ const headers = await request.allHeaders();
1131
+ const postData = request.postData();
1132
+ const urlObj = new import_url.URL(requestUrl);
1133
+ delete headers["accept-encoding"];
1134
+ delete headers["content-length"];
1135
+ const reqOptions = {
1136
+ hostname: urlObj.hostname,
1137
+ port: 443,
1138
+ path: urlObj.pathname + urlObj.search,
1139
+ method: request.method(),
1140
+ headers,
1141
+ timeout
1142
+ };
1143
+ const req = import_https.default.request(reqOptions, (res) => {
1144
+ const chunks = [];
1145
+ let accumulatedText = "";
1146
+ res.on("data", (chunk) => {
1147
+ if (!hasReceivedFirstData) {
1148
+ hasReceivedFirstData = true;
1149
+ if (firstDataTimer) {
1150
+ clearTimeout(firstDataTimer);
1151
+ firstDataTimer = null;
1152
+ }
1153
+ logger8.debug("[Intercept] \u5DF2\u63A5\u6536\u9996\u6B21\u6570\u636E");
1154
+ }
1155
+ chunks.push(chunk);
1156
+ const textChunk = chunk.toString("utf-8");
1157
+ accumulatedText += textChunk;
1158
+ logger8.debug(`[CHUNK] ${textChunk.length} bytes`);
1159
+ if (onData) {
1160
+ try {
1161
+ onData(textChunk, streamResolve, accumulatedText);
1162
+ } catch (e) {
1163
+ logger8.error(`onData callback error: ${e.message}`);
1164
+ }
1165
+ }
1166
+ });
1167
+ res.on("end", () => {
1168
+ logger8.info("[MITM] \u4E0A\u6E38\u54CD\u5E94\u7ED3\u675F (Stream End)");
1169
+ clearAllTimers();
1170
+ const fullBody = Buffer.concat(chunks);
1171
+ if (onEnd) {
1172
+ try {
1173
+ onEnd(accumulatedText, streamResolve);
1174
+ } catch (e) {
1175
+ logger8.error(`onEnd callback error: ${e.message}`);
1176
+ }
1177
+ } else if (!onData) {
1178
+ streamResolve(accumulatedText);
1179
+ }
1180
+ route.fulfill({
1181
+ status: res.statusCode,
1182
+ headers: res.headers,
1183
+ body: fullBody
1184
+ }).catch((e) => logger8.warning(`Route fulfill failed: ${e.message}`));
1185
+ });
1186
+ });
1187
+ req.on("error", (e) => {
1188
+ logger8.error(`[MITM] Upstream request error: ${e.message}`);
1189
+ clearAllTimers();
1190
+ route.abort();
1191
+ streamReject(e);
1192
+ });
1193
+ if (postData) {
1194
+ req.write(postData);
1195
+ }
1196
+ req.end();
1197
+ } catch (e) {
1198
+ logger8.error(`[MITM] Setup error: ${e.message}`);
1199
+ clearAllTimers();
1200
+ route.continue();
1201
+ }
1202
+ });
1203
+ return capturePromise;
1204
+ }
1205
+ };
1206
+
1026
1207
  // index.js
1027
1208
  var usePlaywrightToolKit = () => {
1028
1209
  return {
@@ -1034,6 +1215,7 @@ var usePlaywrightToolKit = () => {
1034
1215
  Constants: constants_exports,
1035
1216
  Utils,
1036
1217
  Captcha,
1218
+ Sse,
1037
1219
  Errors: errors_exports
1038
1220
  };
1039
1221
  };