@skrillex1224/playwright-toolkit 2.0.58 → 2.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -84,7 +84,9 @@ function createLogger(moduleName) {
84
84
  // src/errors.js
85
85
  var errors_exports = {};
86
86
  __export(errors_exports, {
87
- CrawlerError: () => CrawlerError
87
+ CrawlerError: () => CrawlerError,
88
+ FirstDataTimeoutError: () => FirstDataTimeoutError,
89
+ OverallTimeoutError: () => OverallTimeoutError
88
90
  });
89
91
  import { serializeError } from "serialize-error";
90
92
  var CrawlerError = class _CrawlerError extends Error {
@@ -137,6 +139,38 @@ var CrawlerError = class _CrawlerError extends Error {
137
139
  return crawlerError;
138
140
  }
139
141
  };
142
+ var FirstDataTimeoutError = class _FirstDataTimeoutError extends Error {
143
+ /**
144
+ * @param {number} timeout - 超时时间(毫秒)
145
+ * @param {Object} [context={}] - 上下文信息
146
+ */
147
+ constructor(timeout, context = {}) {
148
+ super(`\u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6 (${timeout}ms)`);
149
+ this.name = "FirstDataTimeoutError";
150
+ this.timeout = timeout;
151
+ this.context = context;
152
+ this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
153
+ if (Error.captureStackTrace) {
154
+ Error.captureStackTrace(this, _FirstDataTimeoutError);
155
+ }
156
+ }
157
+ };
158
+ var OverallTimeoutError = class _OverallTimeoutError extends Error {
159
+ /**
160
+ * @param {number} timeout - 超时时间(毫秒)
161
+ * @param {Object} [context={}] - 上下文信息
162
+ */
163
+ constructor(timeout, context = {}) {
164
+ super(`\u6574\u4F53\u8BF7\u6C42\u8D85\u65F6 (${timeout}ms)`);
165
+ this.name = "OverallTimeoutError";
166
+ this.timeout = timeout;
167
+ this.context = context;
168
+ this.timestamp = (/* @__PURE__ */ new Date()).toISOString();
169
+ if (Error.captureStackTrace) {
170
+ Error.captureStackTrace(this, _OverallTimeoutError);
171
+ }
172
+ }
173
+ };
140
174
 
141
175
  // src/apify-kit.js
142
176
  import { serializeError as serializeError2 } from "serialize-error";
@@ -243,28 +277,6 @@ var ApifyKit = {
243
277
  import delay from "delay";
244
278
  var logger2 = createLogger("Utils");
245
279
  var Utils = {
246
- /**
247
- * 解析 SSE 流文本
248
- * 支持 `data: {...}` 和 `data:{...}` 两种格式
249
- */
250
- parseSseStream(sseStreamText) {
251
- const events = [];
252
- const lines = sseStreamText.split("\n");
253
- for (const line of lines) {
254
- if (line.startsWith("data:")) {
255
- try {
256
- const jsonContent = line.substring(5).trim();
257
- if (jsonContent && jsonContent !== "[DONE]") {
258
- events.push(JSON.parse(jsonContent));
259
- }
260
- } catch (e) {
261
- logger2.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
262
- }
263
- }
264
- }
265
- logger2.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
266
- return events;
267
- },
268
280
  /**
269
281
  * 解析 Cookie 字符串为 Playwright 格式的 Cookie 数组
270
282
  * @param {string} cookieString - Cookie 字符串
@@ -994,6 +1006,175 @@ var Captcha = {
994
1006
  useCaptchaMonitor
995
1007
  };
996
1008
 
1009
+ // src/sse.js
1010
+ import https from "https";
1011
+ import { URL } from "url";
1012
+ var logger8 = createLogger("Sse");
1013
+ var Sse = {
1014
+ /**
1015
+ * 解析 SSE 流文本
1016
+ * 支持 `data: {...}` 和 `data:{...}` 两种格式
1017
+ * @param {string} sseStreamText
1018
+ * @returns {Array<Object>} events
1019
+ */
1020
+ parseSseStream(sseStreamText) {
1021
+ const events = [];
1022
+ const lines = sseStreamText.split("\n");
1023
+ for (const line of lines) {
1024
+ if (line.startsWith("data:")) {
1025
+ try {
1026
+ const jsonContent = line.substring(5).trim();
1027
+ if (jsonContent) {
1028
+ events.push(JSON.parse(jsonContent));
1029
+ }
1030
+ } catch (e) {
1031
+ logger8.debug("parseSseStream", `JSON \u89E3\u6790\u5931\u8D25: ${e.message}, line: ${line.substring(0, 100)}...`);
1032
+ }
1033
+ }
1034
+ }
1035
+ logger8.success("parseSseStream", `\u89E3\u6790\u5B8C\u6210, events \u6570\u91CF: ${events.length}`);
1036
+ return events;
1037
+ },
1038
+ /**
1039
+ * 拦截网络请求并使用 Node.js 原生 https 模块转发,以解决流式数据捕获问题。
1040
+ * @param {import('playwright').Page} page
1041
+ * @param {string|RegExp} urlPattern - 拦截的 URL 模式
1042
+ * @param {object} options
1043
+ * @param {function(string, function, string): void} [options.onData] - (textChunk, resolve, accumulatedText) => void
1044
+ * @param {function(string, function): void} [options.onEnd] - (fullText, resolve) => void
1045
+ * @param {function(Error): void} [options.onTimeout] - (error) => void,error 为 FirstDataTimeoutError 或 OverallTimeoutError
1046
+ * @param {number} [options.firstDataTimeout=90000] - 首次数据接收超时 (ms),默认 90s
1047
+ * @param {number} [options.timeout=180000] - 整体请求超时时间 (ms),默认 180s
1048
+ * @returns {Promise<any>} - 返回 Promise,当流满足条件时 resolve
1049
+ */
1050
+ async intercept(page, urlPattern, options = {}) {
1051
+ const {
1052
+ onData,
1053
+ onEnd,
1054
+ onTimeout,
1055
+ firstDataTimeout = 9e4,
1056
+ timeout = 18e4
1057
+ } = options;
1058
+ let streamResolve, streamReject;
1059
+ const capturePromise = new Promise((resolve, reject) => {
1060
+ streamResolve = resolve;
1061
+ streamReject = reject;
1062
+ });
1063
+ let firstDataTimer = null;
1064
+ let overallTimer = null;
1065
+ let hasReceivedFirstData = false;
1066
+ const clearAllTimers = () => {
1067
+ if (firstDataTimer) {
1068
+ clearTimeout(firstDataTimer);
1069
+ firstDataTimer = null;
1070
+ }
1071
+ if (overallTimer) {
1072
+ clearTimeout(overallTimer);
1073
+ overallTimer = null;
1074
+ }
1075
+ };
1076
+ firstDataTimer = setTimeout(() => {
1077
+ if (!hasReceivedFirstData) {
1078
+ const error = new FirstDataTimeoutError(firstDataTimeout);
1079
+ logger8.error(`[Intercept] \u9996\u6B21\u6570\u636E\u63A5\u6536\u8D85\u65F6`);
1080
+ clearAllTimers();
1081
+ if (onTimeout) {
1082
+ onTimeout(error);
1083
+ }
1084
+ streamReject(error);
1085
+ }
1086
+ }, firstDataTimeout);
1087
+ overallTimer = setTimeout(() => {
1088
+ const error = new OverallTimeoutError(timeout);
1089
+ logger8.error(`[Intercept] \u6574\u4F53\u8BF7\u6C42\u8D85\u65F6`);
1090
+ clearAllTimers();
1091
+ if (onTimeout) {
1092
+ onTimeout(error);
1093
+ }
1094
+ streamReject(error);
1095
+ }, timeout);
1096
+ await page.route(urlPattern, async (route) => {
1097
+ const request = route.request();
1098
+ const requestUrl = request.url();
1099
+ logger8.info(`[MITM] \u5DF2\u62E6\u622A\u8BF7\u6C42: ${requestUrl}`);
1100
+ try {
1101
+ const headers = await request.allHeaders();
1102
+ const postData = request.postData();
1103
+ const urlObj = new URL(requestUrl);
1104
+ delete headers["accept-encoding"];
1105
+ delete headers["content-length"];
1106
+ const reqOptions = {
1107
+ hostname: urlObj.hostname,
1108
+ port: 443,
1109
+ path: urlObj.pathname + urlObj.search,
1110
+ method: request.method(),
1111
+ headers,
1112
+ timeout
1113
+ };
1114
+ const req = https.request(reqOptions, (res) => {
1115
+ const chunks = [];
1116
+ let accumulatedText = "";
1117
+ res.on("data", (chunk) => {
1118
+ if (!hasReceivedFirstData) {
1119
+ hasReceivedFirstData = true;
1120
+ if (firstDataTimer) {
1121
+ clearTimeout(firstDataTimer);
1122
+ firstDataTimer = null;
1123
+ }
1124
+ logger8.debug("[Intercept] \u5DF2\u63A5\u6536\u9996\u6B21\u6570\u636E");
1125
+ }
1126
+ chunks.push(chunk);
1127
+ const textChunk = chunk.toString("utf-8");
1128
+ accumulatedText += textChunk;
1129
+ logger8.debug(`[CHUNK] ${textChunk.length} bytes`);
1130
+ if (onData) {
1131
+ try {
1132
+ onData(textChunk, streamResolve, accumulatedText);
1133
+ } catch (e) {
1134
+ logger8.error(`onData callback error: ${e.message}`);
1135
+ }
1136
+ }
1137
+ });
1138
+ res.on("end", () => {
1139
+ logger8.info("[MITM] \u4E0A\u6E38\u54CD\u5E94\u7ED3\u675F (Stream End)");
1140
+ clearAllTimers();
1141
+ const fullBody = Buffer.concat(chunks);
1142
+ if (onEnd) {
1143
+ try {
1144
+ onEnd(accumulatedText, streamResolve);
1145
+ } catch (e) {
1146
+ logger8.error(`onEnd callback error: ${e.message}`);
1147
+ }
1148
+ } else if (!onData) {
1149
+ streamResolve(accumulatedText);
1150
+ }
1151
+ route.fulfill({
1152
+ status: res.statusCode,
1153
+ headers: res.headers,
1154
+ body: fullBody
1155
+ }).catch((e) => logger8.warning(`Route fulfill failed: ${e.message}`));
1156
+ });
1157
+ });
1158
+ req.on("error", (e) => {
1159
+ logger8.error(`[MITM] Upstream request error: ${e.message}`);
1160
+ clearAllTimers();
1161
+ route.abort();
1162
+ streamReject(e);
1163
+ });
1164
+ if (postData) {
1165
+ req.write(postData);
1166
+ }
1167
+ req.end();
1168
+ } catch (e) {
1169
+ logger8.error(`[MITM] Setup error: ${e.message}`);
1170
+ clearAllTimers();
1171
+ route.continue();
1172
+ }
1173
+ });
1174
+ return capturePromise;
1175
+ }
1176
+ };
1177
+
997
1178
  // index.js
998
1179
  var usePlaywrightToolKit = () => {
999
1180
  return {
@@ -1005,6 +1186,7 @@ var usePlaywrightToolKit = () => {
1005
1186
  Constants: constants_exports,
1006
1187
  Utils,
1007
1188
  Captcha,
1189
+ Sse,
1008
1190
  Errors: errors_exports
1009
1191
  };
1010
1192
  };