ppxc-leads-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -0
- package/dist/backend/config.js +13 -0
- package/dist/backend/ppxc-client.js +156 -0
- package/dist/backend/ppxc-login-window.js +168 -0
- package/dist/backend/token-store.js +65 -0
- package/dist/browser/comments.js +9 -0
- package/dist/browser/douyin-runner.js +15 -0
- package/dist/browser/kernel/electron-profile.js +32 -0
- package/dist/browser/kernel/logger.js +57 -0
- package/dist/browser/kernel/page-scripts/index.js +1422 -0
- package/dist/browser/kernel/runner-page-manager.js +145 -0
- package/dist/browser/kernel/runner-page-session.js +1465 -0
- package/dist/browser/kernel/runner-page-session.search-parser.js +187 -0
- package/dist/browser/kernel/runner-page-session.user-agent.js +32 -0
- package/dist/browser/platform-runner.js +312 -0
- package/dist/browser/platforms/detect-platform.js +33 -0
- package/dist/browser/platforms/douyin/adapter.js +162 -0
- package/dist/browser/platforms/douyin/comments.js +130 -0
- package/dist/browser/platforms/kuaishou/adapter.js +178 -0
- package/dist/browser/platforms/kuaishou/comments.js +170 -0
- package/dist/browser/platforms/registry.js +23 -0
- package/dist/browser/platforms/shared/cdp-json-waiter.js +75 -0
- package/dist/browser/platforms/types.js +3 -0
- package/dist/browser/platforms/xiaohongshu/adapter.js +233 -0
- package/dist/browser/platforms/xiaohongshu/comments.js +184 -0
- package/dist/browser/usage-throttle.js +72 -0
- package/dist/main.js +64 -0
- package/dist/mcp/battle-report.js +325 -0
- package/dist/mcp/content-insights.js +66 -0
- package/dist/mcp/diagnostics.js +79 -0
- package/dist/mcp/server.js +829 -0
- package/dist/version.js +19 -0
- package/package.json +43 -0
- package/scripts/launch-mcp.cjs +96 -0
- package/skills/ppxc-find-customers/SKILL.md +110 -0
|
@@ -0,0 +1,1422 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DOUYIN_PRESCROLL_SCRIPT = exports.DOUYIN_COLLECT_SCRIPT = exports.DOUYIN_USER_LIKE_SEARCH_SCRIPT = exports.DOUYIN_PROBE_SCRIPT = exports.DOUYIN_VERIFY_DETECTOR = exports.DOUYIN_SEARCH_RESPONSE_HOOK_SCRIPT = void 0;
|
|
4
|
+
exports.DOUYIN_SEARCH_RESPONSE_HOOK_SCRIPT = `(() => {
|
|
5
|
+
try {
|
|
6
|
+
const GLOBAL_KEY = '__OPC1_DOUYIN_SEARCH_RESPONSE_HOOK__';
|
|
7
|
+
const BRIDGE_NAME = '__OPC1_DOUYIN_SEARCH_CAPTURE__';
|
|
8
|
+
const MAX_BODY_CHARS = 4 * 1024 * 1024;
|
|
9
|
+
if (globalThis[GLOBAL_KEY]) return;
|
|
10
|
+
globalThis[GLOBAL_KEY] = true;
|
|
11
|
+
|
|
12
|
+
const staticExtRe = /\\.(?:js|mjs|css|png|jpe?g|webp|gif|svg|ico|woff2?|ttf|mp4|webm|m4a|mp3)(?:$|\\?)/i;
|
|
13
|
+
const isAllowedUrl = (rawUrl, contentType) => {
|
|
14
|
+
try {
|
|
15
|
+
const parsed = new URL(String(rawUrl || ''), location.href);
|
|
16
|
+
if (!/^https?:$/i.test(parsed.protocol)) return false;
|
|
17
|
+
if (!/(^|\\.)douyin\\.com$/i.test(parsed.hostname)) return false;
|
|
18
|
+
const lower = (parsed.pathname + parsed.search).toLowerCase();
|
|
19
|
+
if (staticExtRe.test(lower)) return false;
|
|
20
|
+
const lowerType = String(contentType || '').toLowerCase();
|
|
21
|
+
if (lowerType && !/(^|[/+])json\\b|text\\/plain|application\\/octet-stream/.test(lowerType)) return false;
|
|
22
|
+
if (parsed.pathname.indexOf('/aweme/v1/web/search/item/') >= 0) return true;
|
|
23
|
+
return (
|
|
24
|
+
lower.indexOf('search') >= 0 &&
|
|
25
|
+
lower.indexOf('aweme') >= 0 &&
|
|
26
|
+
(lower.indexOf('item') >= 0 || lower.indexOf('general') >= 0)
|
|
27
|
+
);
|
|
28
|
+
} catch (_) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const getBridge = () => {
|
|
34
|
+
const bridge = globalThis[BRIDGE_NAME];
|
|
35
|
+
return typeof bridge === 'function' ? bridge : null;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const publish = (source, url, status, contentType, bodyText) => {
|
|
39
|
+
try {
|
|
40
|
+
if (!isAllowedUrl(url, contentType)) return;
|
|
41
|
+
if (typeof bodyText !== 'string' || bodyText.length === 0 || bodyText.length > MAX_BODY_CHARS) return;
|
|
42
|
+
const bridge = getBridge();
|
|
43
|
+
if (!bridge) return;
|
|
44
|
+
bridge(JSON.stringify({
|
|
45
|
+
v: 1,
|
|
46
|
+
source,
|
|
47
|
+
url: String(url || ''),
|
|
48
|
+
status: Number(status || 0),
|
|
49
|
+
contentType: String(contentType || ''),
|
|
50
|
+
body: bodyText,
|
|
51
|
+
}));
|
|
52
|
+
} catch (_) {
|
|
53
|
+
// 页面 hook 必须完全静默,不能影响抖音自身脚本。
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const inspectFetchResponse = (response) => {
|
|
58
|
+
try {
|
|
59
|
+
if (!response || typeof response.clone !== 'function') return;
|
|
60
|
+
const url = String(response.url || '');
|
|
61
|
+
const contentType = String((response.headers && response.headers.get && response.headers.get('content-type')) || '');
|
|
62
|
+
if (!isAllowedUrl(url, contentType)) return;
|
|
63
|
+
const contentLength = Number((response.headers && response.headers.get && response.headers.get('content-length')) || 0);
|
|
64
|
+
if (contentLength > MAX_BODY_CHARS) return;
|
|
65
|
+
response.clone().text().then((text) => {
|
|
66
|
+
publish('fetch', url, response.status, contentType, text);
|
|
67
|
+
}).catch(() => {});
|
|
68
|
+
} catch (_) {
|
|
69
|
+
// noop
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const nativeFetch = globalThis.fetch;
|
|
74
|
+
if (typeof nativeFetch === 'function' && !nativeFetch.__opc1SearchHooked) {
|
|
75
|
+
const hookedFetch = function(...args) {
|
|
76
|
+
try {
|
|
77
|
+
return nativeFetch.apply(this, args).then((response) => {
|
|
78
|
+
inspectFetchResponse(response);
|
|
79
|
+
return response;
|
|
80
|
+
});
|
|
81
|
+
} catch (err) {
|
|
82
|
+
return Promise.reject(err);
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
try {
|
|
86
|
+
Object.defineProperty(hookedFetch, '__opc1SearchHooked', { value: true });
|
|
87
|
+
Object.defineProperty(hookedFetch, 'name', { value: nativeFetch.name || 'fetch' });
|
|
88
|
+
} catch (_) {}
|
|
89
|
+
globalThis.fetch = hookedFetch;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const XHR = globalThis.XMLHttpRequest;
|
|
93
|
+
if (XHR && XHR.prototype && !XHR.prototype.__opc1SearchHooked) {
|
|
94
|
+
const nativeOpen = XHR.prototype.open;
|
|
95
|
+
const nativeSend = XHR.prototype.send;
|
|
96
|
+
XHR.prototype.open = function(method, url, ...rest) {
|
|
97
|
+
try {
|
|
98
|
+
this.__opc1SearchHookUrl = String(url || '');
|
|
99
|
+
} catch (_) {}
|
|
100
|
+
return nativeOpen.call(this, method, url, ...rest);
|
|
101
|
+
};
|
|
102
|
+
XHR.prototype.send = function(...args) {
|
|
103
|
+
try {
|
|
104
|
+
this.addEventListener('loadend', () => {
|
|
105
|
+
try {
|
|
106
|
+
const url = String(this.responseURL || this.__opc1SearchHookUrl || '');
|
|
107
|
+
const contentType = String(this.getResponseHeader && this.getResponseHeader('content-type') || '');
|
|
108
|
+
if (!isAllowedUrl(url, contentType)) return;
|
|
109
|
+
const responseType = String(this.responseType || '');
|
|
110
|
+
if (responseType && responseType !== 'text' && responseType !== 'json') return;
|
|
111
|
+
let text = '';
|
|
112
|
+
if (responseType === 'json') {
|
|
113
|
+
try { text = JSON.stringify(this.response); } catch (_) { text = ''; }
|
|
114
|
+
} else {
|
|
115
|
+
text = String(this.responseText || '');
|
|
116
|
+
}
|
|
117
|
+
publish('xhr', url, this.status, contentType, text);
|
|
118
|
+
} catch (_) {
|
|
119
|
+
// noop
|
|
120
|
+
}
|
|
121
|
+
});
|
|
122
|
+
} catch (_) {}
|
|
123
|
+
return nativeSend.apply(this, args);
|
|
124
|
+
};
|
|
125
|
+
try {
|
|
126
|
+
Object.defineProperty(XHR.prototype, '__opc1SearchHooked', { value: true });
|
|
127
|
+
} catch (_) {}
|
|
128
|
+
}
|
|
129
|
+
} catch (_) {
|
|
130
|
+
// 任何失败都不影响页面加载。
|
|
131
|
+
}
|
|
132
|
+
})();`;
|
|
133
|
+
exports.DOUYIN_VERIFY_DETECTOR = `
|
|
134
|
+
function __opc1DetectVerification() {
|
|
135
|
+
try {
|
|
136
|
+
var shellReasons = [];
|
|
137
|
+
var actionReasons = [];
|
|
138
|
+
var url = String(location.href || '');
|
|
139
|
+
var urlLower = url.toLowerCase();
|
|
140
|
+
var urlHints = [
|
|
141
|
+
'/captcha/',
|
|
142
|
+
'/verify/',
|
|
143
|
+
'/security/',
|
|
144
|
+
'/safe/',
|
|
145
|
+
'/risk/',
|
|
146
|
+
'captcha',
|
|
147
|
+
'verify-bar',
|
|
148
|
+
'verifydialog',
|
|
149
|
+
'secsdk',
|
|
150
|
+
'verifycenter',
|
|
151
|
+
'security_verify',
|
|
152
|
+
'abnormal',
|
|
153
|
+
'access_denied'
|
|
154
|
+
];
|
|
155
|
+
for (var i = 0; i < urlHints.length; i++) {
|
|
156
|
+
if (urlLower.indexOf(urlHints[i]) >= 0) {
|
|
157
|
+
shellReasons.push('url:' + urlHints[i]);
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
var title = String(document.title || '');
|
|
163
|
+
var titleLower = title.toLowerCase();
|
|
164
|
+
var titleHintsCn = ['验证码中间页', '验证码', '安全验证', '人机验证', '请完成验证'];
|
|
165
|
+
for (var j = 0; j < titleHintsCn.length; j++) {
|
|
166
|
+
if (title.indexOf(titleHintsCn[j]) >= 0) {
|
|
167
|
+
shellReasons.push('title:' + titleHintsCn[j]);
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
var titleHintsEn = ['captcha', 'verify', 'security check'];
|
|
172
|
+
for (var k = 0; k < titleHintsEn.length; k++) {
|
|
173
|
+
if (titleLower.indexOf(titleHintsEn[k]) >= 0) {
|
|
174
|
+
shellReasons.push('title-en:' + titleHintsEn[k]);
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// 0.4.16: 删除过宽通配 [class*="verify"] / [id*="verify"] / [class*="verify_"]。
|
|
180
|
+
// 抖音搜索页 / 首页 DOM 正常情况就会出现 class/id 里带 verify 字样的元素
|
|
181
|
+
// (认证用户标签、安全中心入口等),通配命中会误报 verification_required,
|
|
182
|
+
// 导致一开搜就被判"已暂停新搜索"且 task 不被派出去。
|
|
183
|
+
//
|
|
184
|
+
// 0.4.18: 给"外壳"分支的 DOM 容器命中也加上可见性过滤。
|
|
185
|
+
// 抖音正常搜索页 / 首页 DOM 里会预挂一个空的 #nocaptcha-container 作为
|
|
186
|
+
// 验证码占位框(NoCaptcha 无感验证组件的预留挂载点),平时 display:none
|
|
187
|
+
// 或 0 尺寸;要求"必须真在屏幕上、非零尺寸、未被 display:none / visibility:hidden
|
|
188
|
+
// 隐藏"才算命中。
|
|
189
|
+
//
|
|
190
|
+
// 0.4.20: 修 0.4.18 没修透的同类占位 #captcha_container。
|
|
191
|
+
// 0.4.18 把 #nocaptcha-container(display:none / 0×0)的误报治了,但
|
|
192
|
+
// 0.4.19 上线后用户搜"减肥减脂代餐"又被同一类机制误报:线上 DB 里
|
|
193
|
+
// verifyReasons=["dom:#captcha_container"],URL(/search/...?type=video)、
|
|
194
|
+
// title("发现更多精彩视频 - 抖音搜索")、body 文本三层都正常——只有一个
|
|
195
|
+
// 孤零零的 #captcha_container 命中。事后看 #captcha_container 是 NoCaptcha
|
|
196
|
+
// SDK 的另一个挂载点变种:本身可见(有宽高、CSS 没隐藏)但里面是空盒子
|
|
197
|
+
// (无子元素、无文字),抖音平时把它当"如有需要就在这渲染验证组件"的预留位置。
|
|
198
|
+
// 0.4.18 加的可见性过滤拦不住这种"看得见但里面空"的元素。
|
|
199
|
+
//
|
|
200
|
+
// 0.4.20 加第三层过滤:除了可见性,还要求元素 children.length > 0
|
|
201
|
+
// 或 innerText/textContent 非空。真验证时 NoCaptcha SDK 会往容器里
|
|
202
|
+
// 注入挑战组件(滑块、按钮、文字提示、iframe 等),届时 children/innerText
|
|
203
|
+
// 都不为空,仍能识别。
|
|
204
|
+
//
|
|
205
|
+
// 0.4.21 收紧 + 加诊断:0.4.20 上线后又踩同样的坑——线上 DB 显示用户装了
|
|
206
|
+
// 0.4.20 仍然 verifyReasons=["dom:#captcha_container"]、其它三层全正常。
|
|
207
|
+
// 说明 #captcha_container 在抖音搜索页"常驻"且里面也有点东西(NoCaptcha
|
|
208
|
+
// SDK 预初始化时挂的 div / 空白文本节点),0.4.20 的"非空"过滤拦不住。
|
|
209
|
+
//
|
|
210
|
+
// 0.4.21 的双重升级:
|
|
211
|
+
// (1) 收紧过滤:除"可见 + 有内容"外,元素必须含"验证特征"之一:
|
|
212
|
+
// - 子节点里有 <iframe> 或 <canvas>(真验证组件几乎一定挂这两类)
|
|
213
|
+
// - innerText/textContent 含验证关键字(拖动 / 滑块 / 验证 /
|
|
214
|
+
// 完成 / 请按 / 点击 / 拼图)
|
|
215
|
+
// 两条都不满足直接 continue(不算命中),避免被 SDK 预占位骗到。
|
|
216
|
+
// (2) 加诊断信息:命中时把元素的 children 数、前 5 个子节点 tagName、
|
|
217
|
+
// innerText 前 60 字一起拼进 reason,方便线上 DB 复盘下次再触发
|
|
218
|
+
// 时元素里到底有什么内容,不再瞎猜。
|
|
219
|
+
//
|
|
220
|
+
// 反向风险:真验证组件如果用了非 iframe/canvas 的渲染方式 + 没出现关键字,
|
|
221
|
+
// 会被这层过滤误放。但 URL/title/body 三层独立信号一条不动——真验证页
|
|
222
|
+
// 通常会同时改 URL(/captcha/ /verify/)、title("验证码中间页")、
|
|
223
|
+
// body 文本("请拖动滑块"),交叉验证仍能识别。
|
|
224
|
+
var domCandidates = document.querySelectorAll(
|
|
225
|
+
'[id^="captcha"], [id*="captcha"], [class*="captcha"],' +
|
|
226
|
+
'[class*="captcha_container"], [class*="captcha-button"], [class*="captcha_button"],' +
|
|
227
|
+
'[class*="verify-bar"], [class*="verifyContainer"],' +
|
|
228
|
+
'[class*="secsdk"], [id*="verify_center"], [class*="verify_center"]'
|
|
229
|
+
);
|
|
230
|
+
var visibleDomHit = null;
|
|
231
|
+
var visibleDomHitDiag = null;
|
|
232
|
+
var verifyKeywords = ['拖动', '滑块', '验证', '完成', '请按', '点击', '拼图'];
|
|
233
|
+
for (var d = 0; d < domCandidates.length; d++) {
|
|
234
|
+
var candidate = domCandidates[d];
|
|
235
|
+
if (!candidate || !candidate.getBoundingClientRect) continue;
|
|
236
|
+
try {
|
|
237
|
+
var candidateRect = candidate.getBoundingClientRect();
|
|
238
|
+
if (!candidateRect || candidateRect.width <= 0 || candidateRect.height <= 0) continue;
|
|
239
|
+
var candidateStyle = window.getComputedStyle ? window.getComputedStyle(candidate) : null;
|
|
240
|
+
if (candidateStyle && (candidateStyle.display === 'none' || candidateStyle.visibility === 'hidden')) continue;
|
|
241
|
+
var candidateChildren = candidate.children || [];
|
|
242
|
+
var candidateChildCount = candidateChildren.length;
|
|
243
|
+
var candidateText = '';
|
|
244
|
+
try {
|
|
245
|
+
candidateText = String(candidate.innerText || candidate.textContent || '').trim();
|
|
246
|
+
} catch (_) {}
|
|
247
|
+
if (candidateChildCount <= 0 && candidateText.length <= 0) continue;
|
|
248
|
+
// 0.4.21 新增:必须含验证特征(iframe/canvas 子节点 或 验证关键词)
|
|
249
|
+
var childTags = [];
|
|
250
|
+
var hasVerifyChild = false;
|
|
251
|
+
for (var cc = 0; cc < candidateChildCount && cc < 10; cc++) {
|
|
252
|
+
var childTag = String((candidateChildren[cc] && candidateChildren[cc].tagName) || '').toUpperCase();
|
|
253
|
+
if (childTags.length < 5) childTags.push(childTag);
|
|
254
|
+
if (childTag === 'IFRAME' || childTag === 'CANVAS') hasVerifyChild = true;
|
|
255
|
+
}
|
|
256
|
+
var hasVerifyText = false;
|
|
257
|
+
for (var kk = 0; kk < verifyKeywords.length; kk++) {
|
|
258
|
+
if (candidateText.indexOf(verifyKeywords[kk]) >= 0) {
|
|
259
|
+
hasVerifyText = true;
|
|
260
|
+
break;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (!hasVerifyChild && !hasVerifyText) continue;
|
|
264
|
+
visibleDomHit = candidate;
|
|
265
|
+
visibleDomHitDiag = {
|
|
266
|
+
c: candidateChildCount,
|
|
267
|
+
tags: childTags.join(','),
|
|
268
|
+
text: candidateText.slice(0, 60),
|
|
269
|
+
};
|
|
270
|
+
break;
|
|
271
|
+
} catch (_) {
|
|
272
|
+
visibleDomHit = candidate;
|
|
273
|
+
break;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
if (visibleDomHit) {
|
|
277
|
+
try {
|
|
278
|
+
var domHitId = String(visibleDomHit.id || '').slice(0, 60);
|
|
279
|
+
var domHitClass = String((visibleDomHit.className && visibleDomHit.className.baseVal) || visibleDomHit.className || '').slice(0, 60);
|
|
280
|
+
var baseReason = 'dom:' + (domHitId ? '#' + domHitId : '.' + domHitClass);
|
|
281
|
+
if (visibleDomHitDiag) {
|
|
282
|
+
var safeText = String(visibleDomHitDiag.text || '').replace(/"/g, "'").replace(/[\\r\\n\\t]/g, ' ');
|
|
283
|
+
baseReason += '[c=' + visibleDomHitDiag.c
|
|
284
|
+
+ ',tags=' + (visibleDomHitDiag.tags || '')
|
|
285
|
+
+ ',text="' + safeText + '"]';
|
|
286
|
+
}
|
|
287
|
+
shellReasons.push(baseReason);
|
|
288
|
+
} catch (_) {
|
|
289
|
+
shellReasons.push('dom');
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
var actionDom = document.querySelector(
|
|
294
|
+
'button[class*="verify"], button[class*="captcha"],' +
|
|
295
|
+
'[class*="captcha-button"], [class*="captcha_button"],' +
|
|
296
|
+
'canvas[class*="captcha"], canvas[class*="verify"]'
|
|
297
|
+
);
|
|
298
|
+
if (actionDom) {
|
|
299
|
+
try {
|
|
300
|
+
var rect = actionDom.getBoundingClientRect && actionDom.getBoundingClientRect();
|
|
301
|
+
if (!rect || (rect.width > 0 && rect.height > 0)) actionReasons.push('action-dom');
|
|
302
|
+
} catch (_) {
|
|
303
|
+
actionReasons.push('action-dom');
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// body 文本兜底:不读全文,截前 2000 字就够覆盖验证码文案
|
|
308
|
+
var bodyText = '';
|
|
309
|
+
try {
|
|
310
|
+
bodyText = String(
|
|
311
|
+
(document.body && (document.body.innerText || document.body.textContent)) || ''
|
|
312
|
+
).slice(0, 2000);
|
|
313
|
+
} catch (_) {
|
|
314
|
+
bodyText = '';
|
|
315
|
+
}
|
|
316
|
+
// 0.4.16: 删掉 '验证码' / '安全验证' / '人机验证' 三个泛词。
|
|
317
|
+
// 抖音页脚、设置页、安全中心入口、客服链接等正常 UI 里很容易出现
|
|
318
|
+
// 这些字样,扫到就误判成验证页。
|
|
319
|
+
// 只保留"真正的验证页 / 中转页 / 错误页才会出现"的精准文案。
|
|
320
|
+
var shellBodyHints = [
|
|
321
|
+
'验证码中间页',
|
|
322
|
+
'请完成验证',
|
|
323
|
+
'访问异常',
|
|
324
|
+
'环境异常',
|
|
325
|
+
'滑动验证',
|
|
326
|
+
'请拖动滑块',
|
|
327
|
+
];
|
|
328
|
+
for (var s = 0; s < shellBodyHints.length; s++) {
|
|
329
|
+
if (bodyText.indexOf(shellBodyHints[s]) >= 0) {
|
|
330
|
+
shellReasons.push('body-shell:' + shellBodyHints[s]);
|
|
331
|
+
break;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
var actionBodyHints = [
|
|
336
|
+
'拖动滑块',
|
|
337
|
+
'点击按钮进行验证',
|
|
338
|
+
'滑动完成验证',
|
|
339
|
+
'按住滑块',
|
|
340
|
+
'向右滑动',
|
|
341
|
+
'完成拼图',
|
|
342
|
+
'请拖动滑块',
|
|
343
|
+
'滑动验证',
|
|
344
|
+
];
|
|
345
|
+
for (var n = 0; n < actionBodyHints.length; n++) {
|
|
346
|
+
if (bodyText.indexOf(actionBodyHints[n]) >= 0) {
|
|
347
|
+
actionReasons.push('body:' + actionBodyHints[n]);
|
|
348
|
+
break;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return {
|
|
353
|
+
hit: actionReasons.length > 0 || shellReasons.length > 0,
|
|
354
|
+
reasons: actionReasons.concat(shellReasons),
|
|
355
|
+
shellHit: shellReasons.length > 0,
|
|
356
|
+
shellReasons: shellReasons,
|
|
357
|
+
};
|
|
358
|
+
} catch (err) {
|
|
359
|
+
return { hit: false, reasons: ['detector_throw'], shellHit: false, shellReasons: [] };
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
`;
|
|
363
|
+
exports.DOUYIN_PROBE_SCRIPT = `(async () => {
|
|
364
|
+
try {
|
|
365
|
+
${exports.DOUYIN_VERIFY_DETECTOR}
|
|
366
|
+
|
|
367
|
+
const url = location.href || '';
|
|
368
|
+
const title = (document.title || '').slice(0, 200);
|
|
369
|
+
// ---- 页面内 cookie 只做辅助手段,真实登录态由主进程读 partition cookies 判断
|
|
370
|
+
const cookieText = String(document.cookie || '');
|
|
371
|
+
const hasSessionId = /(?:^|;\\s*)(sessionid|sessionid_ss|sid_guard)=/.test(cookieText);
|
|
372
|
+
|
|
373
|
+
// ---- 验证码 / 安全验证页判定(优先级最高)
|
|
374
|
+
const verifyDetect = __opc1DetectVerification();
|
|
375
|
+
const hasVerify = Boolean(verifyDetect.hit);
|
|
376
|
+
|
|
377
|
+
// ---- 登录弹窗(覆盖在搜索页上方时也算 login_required)
|
|
378
|
+
const loginModal = document.querySelector(
|
|
379
|
+
'[data-e2e="login-modal"], [class*="login-mask"], [id="login-pannel"]'
|
|
380
|
+
);
|
|
381
|
+
|
|
382
|
+
// ---- 搜索结果页:是否能找到任何 /video/<id> 链接
|
|
383
|
+
const anyVideoLink = document.querySelector(
|
|
384
|
+
'a[href*="/video/"], a[href*="//www.douyin.com/video/"]'
|
|
385
|
+
);
|
|
386
|
+
|
|
387
|
+
// verify 优先级高于 login / search_ready:验证码页上也可能同时有登录弹窗 DOM 残留,
|
|
388
|
+
// 如果漏判成 login_required 会让用户去扫码,扫完还是会被拦下来,体验更差。
|
|
389
|
+
let status = 'unknown';
|
|
390
|
+
if (hasVerify) status = 'verification_required';
|
|
391
|
+
else if (loginModal) status = 'login_required';
|
|
392
|
+
else if (anyVideoLink) status = 'search_ready';
|
|
393
|
+
else if (hasSessionId) status = 'logged_in';
|
|
394
|
+
else status = 'unknown';
|
|
395
|
+
|
|
396
|
+
return {
|
|
397
|
+
ok: true,
|
|
398
|
+
status,
|
|
399
|
+
details: {
|
|
400
|
+
url,
|
|
401
|
+
title,
|
|
402
|
+
hasSessionId,
|
|
403
|
+
hasVerify,
|
|
404
|
+
hasVerifyShell: Boolean(verifyDetect.shellHit),
|
|
405
|
+
hasLoginModal: Boolean(loginModal),
|
|
406
|
+
hasResultLink: Boolean(anyVideoLink),
|
|
407
|
+
verifyReasons: verifyDetect.reasons || [],
|
|
408
|
+
verifyShellReasons: verifyDetect.shellReasons || [],
|
|
409
|
+
},
|
|
410
|
+
};
|
|
411
|
+
} catch (err) {
|
|
412
|
+
return {
|
|
413
|
+
ok: false,
|
|
414
|
+
code: 'page_throw',
|
|
415
|
+
message: (err && err.message) || String(err),
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
})();`;
|
|
419
|
+
exports.DOUYIN_USER_LIKE_SEARCH_SCRIPT = `(async () => {
|
|
420
|
+
try {
|
|
421
|
+
${exports.DOUYIN_VERIFY_DETECTOR}
|
|
422
|
+
|
|
423
|
+
const opts = (globalThis.__OPC1_USER_LIKE_SEARCH_OPTS || {});
|
|
424
|
+
const keyword = String(opts.keyword || '').trim();
|
|
425
|
+
const maxWaitMs = Math.max(3000, Math.min(20000, Number(opts.maxWaitMs) || 12000));
|
|
426
|
+
const pollIntervalMs = Math.max(150, Math.min(1000, Number(opts.pollIntervalMs) || 300));
|
|
427
|
+
const steps = [];
|
|
428
|
+
const startedAt = Date.now();
|
|
429
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
430
|
+
const addStep = (name, ok, detail) => {
|
|
431
|
+
steps.push({
|
|
432
|
+
name,
|
|
433
|
+
ok: !!ok,
|
|
434
|
+
atMs: Date.now() - startedAt,
|
|
435
|
+
detail: detail ? String(detail).slice(0, 200) : '',
|
|
436
|
+
});
|
|
437
|
+
};
|
|
438
|
+
const visible = (el) => {
|
|
439
|
+
try {
|
|
440
|
+
if (!el || !el.getBoundingClientRect) return false;
|
|
441
|
+
const rect = el.getBoundingClientRect();
|
|
442
|
+
const style = window.getComputedStyle(el);
|
|
443
|
+
return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
|
|
444
|
+
} catch (_) {
|
|
445
|
+
return true;
|
|
446
|
+
}
|
|
447
|
+
};
|
|
448
|
+
const fail = (code, message) => ({
|
|
449
|
+
ok: false,
|
|
450
|
+
code,
|
|
451
|
+
message,
|
|
452
|
+
steps,
|
|
453
|
+
stats: { waitedMs: Date.now() - startedAt },
|
|
454
|
+
});
|
|
455
|
+
const checkBlocked = () => {
|
|
456
|
+
const verify = __opc1DetectVerification();
|
|
457
|
+
if (verify.hit) {
|
|
458
|
+
return fail('verification_required', '抖音搜索页要求人机校验');
|
|
459
|
+
}
|
|
460
|
+
const loginModal = document.querySelector(
|
|
461
|
+
'[data-e2e="login-modal"], [class*="login-mask"], [id="login-pannel"]'
|
|
462
|
+
);
|
|
463
|
+
if (loginModal && visible(loginModal)) {
|
|
464
|
+
return fail('login_required', '抖音未登录或登录态失效');
|
|
465
|
+
}
|
|
466
|
+
return null;
|
|
467
|
+
};
|
|
468
|
+
|
|
469
|
+
if (!keyword) return fail('invalid_keyword', 'keyword is empty');
|
|
470
|
+
|
|
471
|
+
let blocked = checkBlocked();
|
|
472
|
+
if (blocked) {
|
|
473
|
+
addStep('precheck_blocked', false, blocked.code);
|
|
474
|
+
return blocked;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
function findSearchInput() {
|
|
478
|
+
const selectors = [
|
|
479
|
+
'input[type="search"]',
|
|
480
|
+
'input[data-e2e*="search" i]',
|
|
481
|
+
'input[placeholder*="搜索"]',
|
|
482
|
+
'input[aria-label*="搜索"]',
|
|
483
|
+
'textarea[placeholder*="搜索"]',
|
|
484
|
+
'[contenteditable="true"][data-e2e*="search" i]',
|
|
485
|
+
'[contenteditable="true"][role="textbox"]',
|
|
486
|
+
'[contenteditable="true"]',
|
|
487
|
+
];
|
|
488
|
+
for (const selector of selectors) {
|
|
489
|
+
const found = Array.from(document.querySelectorAll(selector)).filter(visible);
|
|
490
|
+
if (found.length > 0) return found[0];
|
|
491
|
+
}
|
|
492
|
+
const clickables = Array.from(document.querySelectorAll('button, a, [role="button"], [tabindex]')).filter(visible);
|
|
493
|
+
for (const el of clickables) {
|
|
494
|
+
const text = String(
|
|
495
|
+
el.textContent || el.getAttribute('aria-label') || el.getAttribute('title') || ''
|
|
496
|
+
).replace(/\\s+/g, ' ').trim();
|
|
497
|
+
if (/搜索|search/i.test(text)) {
|
|
498
|
+
try { el.click(); } catch (_) { /* ignore */ }
|
|
499
|
+
break;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
return Array.from(document.querySelectorAll(selectors.join(','))).filter(visible)[0] || null;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
const input = findSearchInput();
|
|
506
|
+
if (!input) {
|
|
507
|
+
addStep('focus_search_box', false, 'search input not found');
|
|
508
|
+
return fail('search_input_not_found', 'search input not found');
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
try {
|
|
512
|
+
input.scrollIntoView({ block: 'center', inline: 'center' });
|
|
513
|
+
input.dispatchEvent(new MouseEvent('mouseover', { bubbles: true, cancelable: true, view: window }));
|
|
514
|
+
input.dispatchEvent(new MouseEvent('mousedown', { bubbles: true, cancelable: true, view: window }));
|
|
515
|
+
input.dispatchEvent(new MouseEvent('mouseup', { bubbles: true, cancelable: true, view: window }));
|
|
516
|
+
input.click();
|
|
517
|
+
input.focus();
|
|
518
|
+
addStep('focus_search_box', document.activeElement === input || visible(input), '');
|
|
519
|
+
} catch (err) {
|
|
520
|
+
addStep('focus_search_box', false, (err && err.message) || String(err));
|
|
521
|
+
return fail('focus_failed', 'failed to focus search input');
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
try {
|
|
525
|
+
if (input.isContentEditable) {
|
|
526
|
+
input.textContent = '';
|
|
527
|
+
input.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'deleteContentBackward', data: null }));
|
|
528
|
+
input.textContent = keyword;
|
|
529
|
+
input.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: keyword }));
|
|
530
|
+
} else {
|
|
531
|
+
const proto = input.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
|
|
532
|
+
const setter = Object.getOwnPropertyDescriptor(proto, 'value') && Object.getOwnPropertyDescriptor(proto, 'value').set;
|
|
533
|
+
if (setter) setter.call(input, '');
|
|
534
|
+
else input.value = '';
|
|
535
|
+
input.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'deleteContentBackward', data: null }));
|
|
536
|
+
if (setter) setter.call(input, keyword);
|
|
537
|
+
else input.value = keyword;
|
|
538
|
+
input.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: keyword }));
|
|
539
|
+
input.dispatchEvent(new Event('change', { bubbles: true }));
|
|
540
|
+
}
|
|
541
|
+
addStep('type_keyword', true, 'len=' + keyword.length);
|
|
542
|
+
} catch (err) {
|
|
543
|
+
addStep('type_keyword', false, (err && err.message) || String(err));
|
|
544
|
+
return fail('type_failed', 'failed to type keyword');
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
try {
|
|
548
|
+
input.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true, cancelable: true }));
|
|
549
|
+
input.dispatchEvent(new KeyboardEvent('keypress', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true, cancelable: true }));
|
|
550
|
+
input.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true, cancelable: true }));
|
|
551
|
+
addStep('press_enter', true, '');
|
|
552
|
+
} catch (err) {
|
|
553
|
+
addStep('press_enter', false, (err && err.message) || String(err));
|
|
554
|
+
return fail('enter_failed', 'failed to press enter');
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const waitUntil = async (predicate, timeoutMs) => {
|
|
558
|
+
const end = Date.now() + timeoutMs;
|
|
559
|
+
while (Date.now() < end) {
|
|
560
|
+
blocked = checkBlocked();
|
|
561
|
+
if (blocked) return { blocked };
|
|
562
|
+
if (predicate()) return { ok: true };
|
|
563
|
+
await sleep(pollIntervalMs);
|
|
564
|
+
}
|
|
565
|
+
return { ok: false };
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
let waited = await waitUntil(() => /\\/search\\//.test(location.pathname) || location.href.indexOf('/search?') >= 0, maxWaitMs);
|
|
569
|
+
if (waited.blocked) return waited.blocked;
|
|
570
|
+
if (!waited.ok) {
|
|
571
|
+
addStep('wait_search_navigation', false, location.href);
|
|
572
|
+
return fail('search_navigation_timeout', 'search navigation timeout');
|
|
573
|
+
}
|
|
574
|
+
addStep('wait_search_navigation', true, location.href);
|
|
575
|
+
|
|
576
|
+
try {
|
|
577
|
+
const tabCandidates = Array.from(document.querySelectorAll('a, button, [role="tab"], [role="button"], [tabindex]')).filter(visible);
|
|
578
|
+
let clickedVideoTab = false;
|
|
579
|
+
for (const el of tabCandidates) {
|
|
580
|
+
const text = String(
|
|
581
|
+
el.textContent || el.getAttribute('aria-label') || el.getAttribute('title') || ''
|
|
582
|
+
).replace(/\\s+/g, ' ').trim();
|
|
583
|
+
const href = String(el.getAttribute('href') || '');
|
|
584
|
+
if (text === '视频' || /^视频\\b/.test(text) || /type=video/.test(href)) {
|
|
585
|
+
el.click();
|
|
586
|
+
clickedVideoTab = true;
|
|
587
|
+
break;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
if (!clickedVideoTab) {
|
|
591
|
+
const next = new URL(location.href);
|
|
592
|
+
next.searchParams.set('type', 'video');
|
|
593
|
+
history.pushState(null, '', next.toString());
|
|
594
|
+
}
|
|
595
|
+
addStep('switch_video_tab', true, clickedVideoTab ? 'clicked' : 'url_type_video');
|
|
596
|
+
} catch (err) {
|
|
597
|
+
addStep('switch_video_tab', false, (err && err.message) || String(err));
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
waited = await waitUntil(
|
|
601
|
+
() => location.href.indexOf('type=video') >= 0 || !!document.querySelector('a[href*="/video/"]'),
|
|
602
|
+
Math.min(maxWaitMs, 8000),
|
|
603
|
+
);
|
|
604
|
+
if (waited.blocked) return waited.blocked;
|
|
605
|
+
if (!waited.ok) {
|
|
606
|
+
addStep('wait_video_results', false, location.href);
|
|
607
|
+
} else {
|
|
608
|
+
addStep('wait_video_results', true, location.href);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
return {
|
|
612
|
+
ok: true,
|
|
613
|
+
status: 'search_submitted',
|
|
614
|
+
finalUrl: location.href,
|
|
615
|
+
steps,
|
|
616
|
+
stats: { waitedMs: Date.now() - startedAt },
|
|
617
|
+
};
|
|
618
|
+
} catch (err) {
|
|
619
|
+
return {
|
|
620
|
+
ok: false,
|
|
621
|
+
code: 'page_throw',
|
|
622
|
+
message: (err && err.message) || String(err),
|
|
623
|
+
steps: [],
|
|
624
|
+
stats: { waitedMs: 0 },
|
|
625
|
+
};
|
|
626
|
+
}
|
|
627
|
+
})();`;
|
|
628
|
+
exports.DOUYIN_COLLECT_SCRIPT = `(async () => {
|
|
629
|
+
try {
|
|
630
|
+
${exports.DOUYIN_VERIFY_DETECTOR}
|
|
631
|
+
|
|
632
|
+
const opts = (globalThis.__OPC1_COLLECT_OPTS || {});
|
|
633
|
+
const maxWaitMs = Number(opts.maxWaitMs) > 0 ? Number(opts.maxWaitMs) : 12000;
|
|
634
|
+
const pollIntervalMs = Number(opts.pollIntervalMs) > 0 ? Number(opts.pollIntervalMs) : 400;
|
|
635
|
+
const maxItems = Number(opts.maxItems) > 0 ? Number(opts.maxItems) : 30;
|
|
636
|
+
const maxScrollRounds = Math.max(3, Math.min(8, Number(opts.maxScrollRounds) || 5));
|
|
637
|
+
const minScrollRounds = Math.max(1, Math.min(maxScrollRounds, Number(opts.minScrollRounds) || 3));
|
|
638
|
+
const renderWaitMs = Math.max(250, Math.min(2000, Number(opts.renderWaitMs) || 650));
|
|
639
|
+
|
|
640
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
641
|
+
|
|
642
|
+
// 反复探测:验证码 / 登录失效 / 终于看到视频链接,三选一立刻退出
|
|
643
|
+
const startedAt = Date.now();
|
|
644
|
+
let sawLogin = false;
|
|
645
|
+
let sawVerify = false;
|
|
646
|
+
let lastVerifyReasons = [];
|
|
647
|
+
|
|
648
|
+
function probeOnce() {
|
|
649
|
+
const verify = __opc1DetectVerification();
|
|
650
|
+
if (verify.hit) {
|
|
651
|
+
lastVerifyReasons = verify.reasons || [];
|
|
652
|
+
return 'verify';
|
|
653
|
+
}
|
|
654
|
+
const loginModal = !!document.querySelector(
|
|
655
|
+
'[data-e2e="login-modal"], [class*="login-mask"], [id="login-pannel"]'
|
|
656
|
+
);
|
|
657
|
+
if (loginModal) return 'login';
|
|
658
|
+
const anyLink = document.querySelector('a[href*="/video/"]');
|
|
659
|
+
return anyLink ? 'ready' : 'wait';
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
let state = 'wait';
|
|
663
|
+
while (Date.now() - startedAt < maxWaitMs) {
|
|
664
|
+
state = probeOnce();
|
|
665
|
+
if (state === 'verify') { sawVerify = true; break; }
|
|
666
|
+
if (state === 'login') { sawLogin = true; break; }
|
|
667
|
+
if (state === 'ready') break;
|
|
668
|
+
await sleep(pollIntervalMs);
|
|
669
|
+
}
|
|
670
|
+
const initialProbeState = state;
|
|
671
|
+
// 旧逻辑在首屏没有 /video/ 链接时会直接 no_results。DOM 兜底路径现在继续往下跑
|
|
672
|
+
// 多轮滚动采集,最终再按页面快照给明确子状态,避免首屏空白误判。
|
|
673
|
+
if (state !== 'verify' && state !== 'login') state = 'ready';
|
|
674
|
+
|
|
675
|
+
if (sawVerify) {
|
|
676
|
+
return {
|
|
677
|
+
ok: false,
|
|
678
|
+
code: 'verification_required',
|
|
679
|
+
message: '抖音搜索页要求人机校验',
|
|
680
|
+
stats: { waitedMs: Date.now() - startedAt, sawVerify: true, verifyReasons: lastVerifyReasons },
|
|
681
|
+
};
|
|
682
|
+
}
|
|
683
|
+
if (sawLogin) {
|
|
684
|
+
return {
|
|
685
|
+
ok: false,
|
|
686
|
+
code: 'login_required',
|
|
687
|
+
message: '抖音未登录或登录态失效',
|
|
688
|
+
stats: { waitedMs: Date.now() - startedAt, sawLogin: true },
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
if (state !== 'ready') {
|
|
692
|
+
// 超时兜底:结果容器没等到时,再做一次验证码检测,
|
|
693
|
+
// 因为有些中间页会"先显示 loading → 一小段后才把验证码 DOM 插进来",
|
|
694
|
+
// 若等这段插入的时间超过 maxWaitMs,就会漏判。
|
|
695
|
+
const finalVerify = __opc1DetectVerification();
|
|
696
|
+
if (finalVerify.hit) {
|
|
697
|
+
return {
|
|
698
|
+
ok: false,
|
|
699
|
+
code: 'verification_required',
|
|
700
|
+
message: '抖音搜索页要求人机校验(等待结果超时前检测命中)',
|
|
701
|
+
stats: {
|
|
702
|
+
waitedMs: Date.now() - startedAt,
|
|
703
|
+
sawVerify: true,
|
|
704
|
+
verifyReasons: finalVerify.reasons || [],
|
|
705
|
+
},
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// ---- no_results 细分诊断:基于一份"结构化页面快照",把超时拆成
|
|
710
|
+
// blank_page / loading_timeout / results_not_rendered / empty_results
|
|
711
|
+
// 这四类之一。只采集**非敏感**的统计信息,不外发正文 / cookie / 请求头。
|
|
712
|
+
const diag = (function buildDiagnostics() {
|
|
713
|
+
function safeStr(v) {
|
|
714
|
+
try { return String(v == null ? '' : v); } catch (_) { return ''; }
|
|
715
|
+
}
|
|
716
|
+
function safeNumber(n) {
|
|
717
|
+
var x = Number(n);
|
|
718
|
+
return Number.isFinite(x) ? x : 0;
|
|
719
|
+
}
|
|
720
|
+
var diagUrl = safeStr(location.href).slice(0, 500);
|
|
721
|
+
var diagTitle = safeStr(document.title).slice(0, 200);
|
|
722
|
+
var readyState = safeStr(document.readyState);
|
|
723
|
+
|
|
724
|
+
var bodyTextRaw = '';
|
|
725
|
+
try {
|
|
726
|
+
bodyTextRaw = safeStr(
|
|
727
|
+
(document.body && (document.body.innerText || document.body.textContent)) || ''
|
|
728
|
+
);
|
|
729
|
+
} catch (_) {
|
|
730
|
+
bodyTextRaw = '';
|
|
731
|
+
}
|
|
732
|
+
var bodyTextLength = bodyTextRaw.length;
|
|
733
|
+
// bodyPreview 严格限制前 120 字,只用于让人判断"页面大概在显示啥";
|
|
734
|
+
// 不做任何额外清理(验证码 / 登录已经在前面处理过了)。
|
|
735
|
+
var bodyPreview = bodyTextRaw.replace(/\\s+/g, ' ').trim().slice(0, 120);
|
|
736
|
+
|
|
737
|
+
var videoLinkCount = 0;
|
|
738
|
+
var imageCount = 0;
|
|
739
|
+
var buttonCount = 0;
|
|
740
|
+
var inputCount = 0;
|
|
741
|
+
try {
|
|
742
|
+
videoLinkCount = document.querySelectorAll('a[href*="/video/"]').length;
|
|
743
|
+
imageCount = document.querySelectorAll('img').length;
|
|
744
|
+
buttonCount = document.querySelectorAll('button').length;
|
|
745
|
+
inputCount = document.querySelectorAll('input, textarea').length;
|
|
746
|
+
} catch (_) { /* ignore */ }
|
|
747
|
+
|
|
748
|
+
function hasSelector(sel) {
|
|
749
|
+
try { return !!document.querySelector(sel); } catch (_) { return false; }
|
|
750
|
+
}
|
|
751
|
+
var hasLoadingSignal =
|
|
752
|
+
hasSelector('[class*="skeleton" i], [class*="loading" i], [class*="spinner" i], [class*="placeholder" i]');
|
|
753
|
+
// 搜索输入框 / 视频 tab / 结果主容器 —— 只要看到任一就说明"搜索页架子在"
|
|
754
|
+
var hasSearchInput =
|
|
755
|
+
hasSelector('input[type="search"], input[data-e2e*="search" i], input[placeholder*="搜索"]');
|
|
756
|
+
var hasVideoTab = false;
|
|
757
|
+
try {
|
|
758
|
+
// 视频 tab 没有稳定的 selector,退一步:url 参数已经是 type=video,
|
|
759
|
+
// 所以如果页面里能找到"视频 / Video / Videos"这类可见 tab 文本就算
|
|
760
|
+
var tabCandidates = document.querySelectorAll(
|
|
761
|
+
'a[role="tab"], [class*="tab" i], [data-e2e*="tab" i]'
|
|
762
|
+
);
|
|
763
|
+
for (var ti = 0; ti < tabCandidates.length; ti++) {
|
|
764
|
+
var t = tabCandidates[ti];
|
|
765
|
+
var txt = safeStr(t && t.textContent).trim();
|
|
766
|
+
if (txt === '视频' || txt.toLowerCase() === 'video' || txt.toLowerCase() === 'videos') {
|
|
767
|
+
hasVideoTab = true;
|
|
768
|
+
break;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
} catch (_) { /* ignore */ }
|
|
772
|
+
var hasResultContainer =
|
|
773
|
+
hasSelector('[class*="search-result" i], [class*="searchResult" i], [data-e2e*="search" i][data-e2e*="result" i], [class*="result-list" i], [class*="resultList" i]');
|
|
774
|
+
|
|
775
|
+
// 空结果文案匹配(抖音真的搜空时会有这类提示)
|
|
776
|
+
var emptyPhrases = [
|
|
777
|
+
'暂无相关内容',
|
|
778
|
+
'暂无相关结果',
|
|
779
|
+
'未找到相关结果',
|
|
780
|
+
'未找到相关内容',
|
|
781
|
+
'换个词试试',
|
|
782
|
+
'没有找到相关',
|
|
783
|
+
'没有搜到',
|
|
784
|
+
'No results',
|
|
785
|
+
'no results',
|
|
786
|
+
];
|
|
787
|
+
var emptyHit = '';
|
|
788
|
+
for (var ei = 0; ei < emptyPhrases.length; ei++) {
|
|
789
|
+
if (bodyTextRaw.indexOf(emptyPhrases[ei]) >= 0) {
|
|
790
|
+
emptyHit = emptyPhrases[ei];
|
|
791
|
+
break;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// title 像 URL 原文(浏览器未能解析页面 title)—— 资源异常的弱信号
|
|
796
|
+
var titleLooksLikeUrl =
|
|
797
|
+
diagTitle.indexOf('://') >= 0 ||
|
|
798
|
+
diagTitle.indexOf('douyin.com/') === 0 ||
|
|
799
|
+
diagTitle.indexOf('/search/') >= 0;
|
|
800
|
+
|
|
801
|
+
// ---- 判定子类型(优先级顺序见注释)----
|
|
802
|
+
// 1) empty_results:有明确"无结果"文案 —— 真空结果
|
|
803
|
+
// 2) loading_timeout:没有结果卡片,但明显在 loading / skeleton 状态
|
|
804
|
+
// 3) results_not_rendered:搜索页壳在(输入框/tab/容器任一命中),但没有视频链接
|
|
805
|
+
// 4) blank_page:页面文本极短 / 几乎没内容 / title 像 URL —— 页面根本没渲染开
|
|
806
|
+
var subtype;
|
|
807
|
+
if (emptyHit) {
|
|
808
|
+
subtype = 'empty_results';
|
|
809
|
+
} else if (videoLinkCount === 0 && hasLoadingSignal) {
|
|
810
|
+
subtype = 'loading_timeout';
|
|
811
|
+
} else if (
|
|
812
|
+
videoLinkCount === 0 &&
|
|
813
|
+
(hasSearchInput || hasVideoTab || hasResultContainer)
|
|
814
|
+
) {
|
|
815
|
+
subtype = 'results_not_rendered';
|
|
816
|
+
} else if (bodyTextLength < 200 || (bodyTextLength < 500 && titleLooksLikeUrl)) {
|
|
817
|
+
subtype = 'blank_page';
|
|
818
|
+
} else {
|
|
819
|
+
// 兜底:页面有正文、但没有上面任何特征 —— 按"结果没渲染"处理,
|
|
820
|
+
// 这比直接说 blank_page 更安全(至少 body 不空)
|
|
821
|
+
subtype = 'results_not_rendered';
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
return {
|
|
825
|
+
subtype: subtype,
|
|
826
|
+
url: diagUrl,
|
|
827
|
+
title: diagTitle,
|
|
828
|
+
readyState: readyState,
|
|
829
|
+
bodyTextLength: safeNumber(bodyTextLength),
|
|
830
|
+
bodyPreview: bodyPreview,
|
|
831
|
+
videoLinkCount: safeNumber(videoLinkCount),
|
|
832
|
+
imageCount: safeNumber(imageCount),
|
|
833
|
+
buttonCount: safeNumber(buttonCount),
|
|
834
|
+
inputCount: safeNumber(inputCount),
|
|
835
|
+
hasLoadingSignal: !!hasLoadingSignal,
|
|
836
|
+
hasSearchInput: !!hasSearchInput,
|
|
837
|
+
hasVideoTab: !!hasVideoTab,
|
|
838
|
+
hasResultContainer: !!hasResultContainer,
|
|
839
|
+
emptyResultHit: emptyHit || null,
|
|
840
|
+
titleLooksLikeUrl: !!titleLooksLikeUrl,
|
|
841
|
+
};
|
|
842
|
+
})();
|
|
843
|
+
|
|
844
|
+
var subtypeMsg = {
|
|
845
|
+
blank_page: '页面接近空白,疑似未真正渲染',
|
|
846
|
+
loading_timeout: '页面一直处于加载/骨架态,结果未进入就绪',
|
|
847
|
+
results_not_rendered: '搜索页架子在,但结果卡片未出现',
|
|
848
|
+
empty_results: '页面明确提示无相关结果(真空结果)',
|
|
849
|
+
}[diag.subtype] || '结果区域未在超时内出现';
|
|
850
|
+
|
|
851
|
+
return {
|
|
852
|
+
ok: false,
|
|
853
|
+
code: 'no_results',
|
|
854
|
+
subtype: diag.subtype,
|
|
855
|
+
message: '搜索结果区域未在超时内出现(子类型=' + diag.subtype + ':' + subtypeMsg + ')',
|
|
856
|
+
stats: {
|
|
857
|
+
waitedMs: Date.now() - startedAt,
|
|
858
|
+
sawVerify: false,
|
|
859
|
+
sawLogin: false,
|
|
860
|
+
noResultsDiagnostics: diag,
|
|
861
|
+
},
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
// ── 元数据提取辅助函数(老网页端规则筛依赖的四字段)──────────────────
|
|
866
|
+
// 注意:都是运行在抖音页面上下文里的浏览器 JS,不能依赖 Node / Electron API。
|
|
867
|
+
const nowSec = Math.floor(Date.now() / 1000);
|
|
868
|
+
|
|
869
|
+
function __opc1ParseCompactNumber(raw) {
|
|
870
|
+
if (raw == null) return null;
|
|
871
|
+
const s = String(raw).trim().replace(/[,\\s]+/g, '');
|
|
872
|
+
if (!s) return null;
|
|
873
|
+
// "1.2w" / "1.2万" / "1.5k" / "1.5千" / "123"
|
|
874
|
+
const m = s.match(/^(\\d+(?:\\.\\d+)?)(w|W|k|K|万|千)?$/);
|
|
875
|
+
if (!m) return null;
|
|
876
|
+
let n = parseFloat(m[1]);
|
|
877
|
+
const unit = (m[2] || '').toLowerCase();
|
|
878
|
+
if (unit === 'w' || unit === '万') n *= 10000;
|
|
879
|
+
else if (unit === 'k' || unit === '千') n *= 1000;
|
|
880
|
+
return Number.isFinite(n) ? Math.round(n) : null;
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
function __opc1ParseRelativeTime(raw, nowSec) {
|
|
884
|
+
if (!raw) return null;
|
|
885
|
+
const t = String(raw).trim();
|
|
886
|
+
if (!t || t.length > 30) return null;
|
|
887
|
+
if (/刚刚|刚才/.test(t)) return nowSec;
|
|
888
|
+
if (/^昨天/.test(t)) return nowSec - 86400;
|
|
889
|
+
if (/^前天/.test(t)) return nowSec - 2 * 86400;
|
|
890
|
+
// "X秒前" / "X分钟前" / "X小时前" / "X天前" / "X周前" / "X个月前" / "X月前" / "X年前"
|
|
891
|
+
let m = t.match(/(\\d+)\\s*(秒|分钟|分|小时|时|天|周|个月|月|年)前/);
|
|
892
|
+
if (m) {
|
|
893
|
+
const n = parseInt(m[1], 10);
|
|
894
|
+
const unit = m[2];
|
|
895
|
+
const mult =
|
|
896
|
+
unit === '秒' ? 1 :
|
|
897
|
+
(unit === '分钟' || unit === '分') ? 60 :
|
|
898
|
+
(unit === '小时' || unit === '时') ? 3600 :
|
|
899
|
+
unit === '天' ? 86400 :
|
|
900
|
+
unit === '周' ? 86400 * 7 :
|
|
901
|
+
(unit === '个月' || unit === '月') ? 86400 * 30 :
|
|
902
|
+
unit === '年' ? 86400 * 365 : 0;
|
|
903
|
+
if (mult > 0 && Number.isFinite(n)) return nowSec - n * mult;
|
|
904
|
+
}
|
|
905
|
+
// "YYYY-MM-DD" / "YYYY/MM/DD" / "YYYY.MM.DD"
|
|
906
|
+
m = t.match(/(\\d{4})[-\\/\\.](\\d{1,2})[-\\/\\.](\\d{1,2})/);
|
|
907
|
+
if (m) {
|
|
908
|
+
const y = parseInt(m[1], 10), mo = parseInt(m[2], 10), d = parseInt(m[3], 10);
|
|
909
|
+
if (y > 2000 && mo >= 1 && mo <= 12 && d >= 1 && d <= 31) {
|
|
910
|
+
const dt = new Date(y, mo - 1, d);
|
|
911
|
+
const ts = Math.floor(dt.getTime() / 1000);
|
|
912
|
+
if (Number.isFinite(ts) && ts > 0) return ts;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
// "MM-DD":补当年
|
|
916
|
+
m = t.match(/^(\\d{1,2})[-\\/\\.](\\d{1,2})$/);
|
|
917
|
+
if (m) {
|
|
918
|
+
const mo2 = parseInt(m[1], 10), d2 = parseInt(m[2], 10);
|
|
919
|
+
if (mo2 >= 1 && mo2 <= 12 && d2 >= 1 && d2 <= 31) {
|
|
920
|
+
const y2 = new Date().getFullYear();
|
|
921
|
+
const dt2 = new Date(y2, mo2 - 1, d2);
|
|
922
|
+
const ts2 = Math.floor(dt2.getTime() / 1000);
|
|
923
|
+
if (Number.isFinite(ts2) && ts2 > 0) return ts2;
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
return null;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
function __opc1ParseDurationText(raw) {
|
|
930
|
+
if (!raw) return null;
|
|
931
|
+
const t = String(raw).trim();
|
|
932
|
+
// mm:ss 或 hh:mm:ss;卡片上偶尔还有"00:00"占位,>0 才算
|
|
933
|
+
const m = t.match(/^(\\d{1,2}):(\\d{2})(?::(\\d{2}))?$/);
|
|
934
|
+
if (!m) return null;
|
|
935
|
+
const a = parseInt(m[1], 10);
|
|
936
|
+
const b = parseInt(m[2], 10);
|
|
937
|
+
const c = m[3] ? parseInt(m[3], 10) : null;
|
|
938
|
+
let sec;
|
|
939
|
+
if (c != null) {
|
|
940
|
+
if (b >= 60 || c >= 60) return null;
|
|
941
|
+
sec = a * 3600 + b * 60 + c;
|
|
942
|
+
} else {
|
|
943
|
+
if (b >= 60) return null;
|
|
944
|
+
sec = a * 60 + b;
|
|
945
|
+
}
|
|
946
|
+
if (!Number.isFinite(sec) || sec <= 0 || sec > 86400) return null;
|
|
947
|
+
return sec;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
function __opc1ExtractSecUid(href) {
|
|
951
|
+
if (!href) return null;
|
|
952
|
+
const m = String(href).match(/\\/user\\/([A-Za-z0-9_\\-]+)/);
|
|
953
|
+
return m ? m[1].slice(0, 128) : null;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// UTF-8 safe btoa:抖音 sec_uid 一般是 ASCII,但题外字段可能带中文(标题碎片
|
|
957
|
+
// 不会被编进 meta,只是以防万一)。
|
|
958
|
+
function __opc1Utf8Btoa(s) {
|
|
959
|
+
try {
|
|
960
|
+
const bytes = new TextEncoder().encode(s);
|
|
961
|
+
let binary = '';
|
|
962
|
+
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
963
|
+
return btoa(binary);
|
|
964
|
+
} catch (_) {
|
|
965
|
+
return null;
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// 真正的采集:多来源找视频链接/卡片,去重后取最近的卡片元素抓 title / author / cover + metadata
|
|
970
|
+
const RE_AWEMEID = /\\/video\\/(\\d{6,30})/;
|
|
971
|
+
const seen = new Set();
|
|
972
|
+
const items = [];
|
|
973
|
+
let totalCandidateLinks = 0;
|
|
974
|
+
let lastCandidateLinks = 0;
|
|
975
|
+
let scanRounds = 0;
|
|
976
|
+
let scrollRounds = 0;
|
|
977
|
+
let lastHeightGrew = false;
|
|
978
|
+
|
|
979
|
+
const metaStats = {
|
|
980
|
+
withCommentCount: 0,
|
|
981
|
+
withCreateTime: 0,
|
|
982
|
+
withAuthorSecUid: 0,
|
|
983
|
+
withDurationSec: 0,
|
|
984
|
+
};
|
|
985
|
+
|
|
986
|
+
function __opc1IsVisible(el) {
|
|
987
|
+
try {
|
|
988
|
+
if (!el || !el.getBoundingClientRect) return false;
|
|
989
|
+
const rect = el.getBoundingClientRect();
|
|
990
|
+
const style = window.getComputedStyle(el);
|
|
991
|
+
return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
|
|
992
|
+
} catch (_) {
|
|
993
|
+
return true;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
function __opc1SafeDecode(raw) {
|
|
998
|
+
const s = String(raw || '');
|
|
999
|
+
try { return decodeURIComponent(s); } catch (_) { return s; }
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
function __opc1FindVideoHref(el) {
|
|
1003
|
+
if (!el) return '';
|
|
1004
|
+
const candidates = [];
|
|
1005
|
+
try {
|
|
1006
|
+
candidates.push(el.getAttribute && el.getAttribute('href'));
|
|
1007
|
+
candidates.push(el.href);
|
|
1008
|
+
candidates.push(el.getAttribute && el.getAttribute('data-href'));
|
|
1009
|
+
candidates.push(el.getAttribute && el.getAttribute('data-url'));
|
|
1010
|
+
candidates.push(el.getAttribute && el.getAttribute('to'));
|
|
1011
|
+
if (el.attributes) {
|
|
1012
|
+
const limit = Math.min(el.attributes.length, 24);
|
|
1013
|
+
for (let i = 0; i < limit; i++) candidates.push(el.attributes[i].value);
|
|
1014
|
+
}
|
|
1015
|
+
} catch (_) { /* ignore */ }
|
|
1016
|
+
for (let i = 0; i < candidates.length; i++) {
|
|
1017
|
+
const raw = candidates[i];
|
|
1018
|
+
if (!raw) continue;
|
|
1019
|
+
const value = __opc1SafeDecode(raw);
|
|
1020
|
+
if (RE_AWEMEID.test(value)) return value;
|
|
1021
|
+
}
|
|
1022
|
+
return '';
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
function __opc1CandidateElements() {
|
|
1026
|
+
const out = [];
|
|
1027
|
+
const added = new Set();
|
|
1028
|
+
const add = (el) => {
|
|
1029
|
+
if (!el || added.has(el)) return;
|
|
1030
|
+
const href = __opc1FindVideoHref(el);
|
|
1031
|
+
if (!href) return;
|
|
1032
|
+
// 直接视频链接要求可见;卡片容器有时链接节点本身很小,允许由外层卡片可见兜底。
|
|
1033
|
+
if (!__opc1IsVisible(el)) {
|
|
1034
|
+
const card = el.closest && el.closest('li, [data-e2e], div[class*="search"], div[class*="result"], div[class*="card"], div[class*="item"]');
|
|
1035
|
+
if (!__opc1IsVisible(card)) return;
|
|
1036
|
+
}
|
|
1037
|
+
added.add(el);
|
|
1038
|
+
out.push(el);
|
|
1039
|
+
};
|
|
1040
|
+
|
|
1041
|
+
Array.from(document.querySelectorAll('a[href*="/video/"], a[href*="douyin.com/video/"], a[href]')).forEach(add);
|
|
1042
|
+
|
|
1043
|
+
const containers = Array.from(document.querySelectorAll(
|
|
1044
|
+
'li, [data-e2e*="video" i], [data-e2e*="search" i], [class*="video" i], [class*="aweme" i], [class*="card" i], [class*="item" i], [class*="result" i]',
|
|
1045
|
+
)).slice(0, 500);
|
|
1046
|
+
for (let i = 0; i < containers.length; i++) {
|
|
1047
|
+
const c = containers[i];
|
|
1048
|
+
add(c);
|
|
1049
|
+
const inner = c.querySelectorAll ? c.querySelectorAll('a[href], [data-href], [data-url], [to]') : [];
|
|
1050
|
+
const limit = Math.min(inner.length, 30);
|
|
1051
|
+
for (let j = 0; j < limit; j++) add(inner[j]);
|
|
1052
|
+
}
|
|
1053
|
+
return out;
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
function __opc1BuildDiagnostics() {
|
|
1057
|
+
function safeStr(v) {
|
|
1058
|
+
try { return String(v == null ? '' : v); } catch (_) { return ''; }
|
|
1059
|
+
}
|
|
1060
|
+
function hasSelector(sel) {
|
|
1061
|
+
try { return !!document.querySelector(sel); } catch (_) { return false; }
|
|
1062
|
+
}
|
|
1063
|
+
const bodyTextRaw = safeStr(
|
|
1064
|
+
(document.body && (document.body.innerText || document.body.textContent)) || ''
|
|
1065
|
+
);
|
|
1066
|
+
const bodyPreview = bodyTextRaw.replace(/\\s+/g, ' ').trim().slice(0, 120);
|
|
1067
|
+
const emptyPhrases = [
|
|
1068
|
+
'暂无相关内容', '暂无相关结果', '未找到相关结果', '未找到相关内容',
|
|
1069
|
+
'换个词试试', '没有找到相关', '没有搜到', 'No results', 'no results',
|
|
1070
|
+
];
|
|
1071
|
+
let emptyHit = '';
|
|
1072
|
+
for (let i = 0; i < emptyPhrases.length; i++) {
|
|
1073
|
+
if (bodyTextRaw.indexOf(emptyPhrases[i]) >= 0) { emptyHit = emptyPhrases[i]; break; }
|
|
1074
|
+
}
|
|
1075
|
+
const title = safeStr(document.title).slice(0, 200);
|
|
1076
|
+
const titleLooksLikeUrl =
|
|
1077
|
+
title.indexOf('://') >= 0 ||
|
|
1078
|
+
title.indexOf('douyin.com/') === 0 ||
|
|
1079
|
+
title.indexOf('/search/') >= 0;
|
|
1080
|
+
let videoLinkCount = 0;
|
|
1081
|
+
try { videoLinkCount = document.querySelectorAll('a[href*="/video/"]').length; } catch (_) { /* ignore */ }
|
|
1082
|
+
const hasLoadingSignal =
|
|
1083
|
+
hasSelector('[class*="skeleton" i], [class*="loading" i], [class*="spinner" i], [class*="placeholder" i]');
|
|
1084
|
+
const hasSearchInput =
|
|
1085
|
+
hasSelector('input[type="search"], input[data-e2e*="search" i], input[placeholder*="搜索"]');
|
|
1086
|
+
const hasResultContainer =
|
|
1087
|
+
hasSelector('[class*="search-result" i], [class*="searchResult" i], [data-e2e*="search" i][data-e2e*="result" i], [class*="result-list" i], [class*="resultList" i]');
|
|
1088
|
+
let subtype;
|
|
1089
|
+
if (emptyHit) subtype = 'empty_results';
|
|
1090
|
+
else if (videoLinkCount === 0 && hasLoadingSignal) subtype = 'loading_timeout';
|
|
1091
|
+
else if (videoLinkCount === 0 && (hasSearchInput || hasResultContainer)) subtype = 'results_not_rendered';
|
|
1092
|
+
else if (bodyTextRaw.length < 200 || (bodyTextRaw.length < 500 && titleLooksLikeUrl)) subtype = 'blank_page';
|
|
1093
|
+
else subtype = 'results_not_rendered';
|
|
1094
|
+
return {
|
|
1095
|
+
subtype,
|
|
1096
|
+
url: safeStr(location.href).slice(0, 500),
|
|
1097
|
+
title,
|
|
1098
|
+
readyState: safeStr(document.readyState),
|
|
1099
|
+
bodyTextLength: bodyTextRaw.length,
|
|
1100
|
+
bodyPreview,
|
|
1101
|
+
videoLinkCount,
|
|
1102
|
+
imageCount: document.querySelectorAll('img').length,
|
|
1103
|
+
buttonCount: document.querySelectorAll('button').length,
|
|
1104
|
+
inputCount: document.querySelectorAll('input, textarea').length,
|
|
1105
|
+
hasLoadingSignal: !!hasLoadingSignal,
|
|
1106
|
+
hasSearchInput: !!hasSearchInput,
|
|
1107
|
+
hasVideoTab: false,
|
|
1108
|
+
hasResultContainer: !!hasResultContainer,
|
|
1109
|
+
emptyResultHit: emptyHit || null,
|
|
1110
|
+
titleLooksLikeUrl: !!titleLooksLikeUrl,
|
|
1111
|
+
};
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
function __opc1SubtypeMessage(subtype) {
|
|
1115
|
+
return ({
|
|
1116
|
+
blank_page: '页面接近空白,疑似未真正渲染',
|
|
1117
|
+
loading_timeout: '页面一直处于加载/骨架态,结果未进入就绪',
|
|
1118
|
+
results_not_rendered: '搜索页架子在,但结果卡片未出现',
|
|
1119
|
+
empty_results: '页面明确提示无相关结果(真空结果)',
|
|
1120
|
+
})[subtype] || '结果区域未在超时内出现';
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
async function __opc1ScrollAndWait() {
|
|
1124
|
+
const before = Math.max(document.documentElement.scrollHeight || 0, document.body ? document.body.scrollHeight || 0 : 0);
|
|
1125
|
+
window.scrollBy({ top: Math.max(window.innerHeight || 800, 700), behavior: 'instant' });
|
|
1126
|
+
await sleep(renderWaitMs);
|
|
1127
|
+
window.scrollBy({ top: Math.floor(Math.max(window.innerHeight || 800, 700) * 0.85), behavior: 'instant' });
|
|
1128
|
+
await sleep(renderWaitMs);
|
|
1129
|
+
const after = Math.max(document.documentElement.scrollHeight || 0, document.body ? document.body.scrollHeight || 0 : 0);
|
|
1130
|
+
scrollRounds++;
|
|
1131
|
+
lastHeightGrew = after > before;
|
|
1132
|
+
return lastHeightGrew;
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
function __opc1CollectCandidateElements(links) {
|
|
1136
|
+
scanRounds++;
|
|
1137
|
+
lastCandidateLinks = links.length;
|
|
1138
|
+
totalCandidateLinks += links.length;
|
|
1139
|
+
let freshCount = 0;
|
|
1140
|
+
for (const a of links) {
|
|
1141
|
+
if (items.length >= maxItems) break;
|
|
1142
|
+
const href = __opc1FindVideoHref(a);
|
|
1143
|
+
const m = href.match(RE_AWEMEID);
|
|
1144
|
+
if (!m) continue;
|
|
1145
|
+
const awemeId = m[1];
|
|
1146
|
+
if (seen.has(awemeId)) continue;
|
|
1147
|
+
seen.add(awemeId);
|
|
1148
|
+
|
|
1149
|
+
// 用最近的 li / div 容器圈出整张视频卡片,从里面找 title / author / cover
|
|
1150
|
+
const card = a.closest('li, [data-e2e], div[class*="search"], div[class*="result"], div[class*="card"]') || a;
|
|
1151
|
+
|
|
1152
|
+
// title:优先 a 自己的 title 属性,再退到 alt / aria-label / text
|
|
1153
|
+
let title = String(a.getAttribute('title') || '').trim();
|
|
1154
|
+
if (!title) {
|
|
1155
|
+
const img = a.querySelector('img');
|
|
1156
|
+
const alt = img ? String(img.getAttribute('alt') || '').trim() : '';
|
|
1157
|
+
if (alt) title = alt;
|
|
1158
|
+
}
|
|
1159
|
+
if (!title) {
|
|
1160
|
+
const labeled = card.querySelector('[aria-label]');
|
|
1161
|
+
const lbl = labeled ? String(labeled.getAttribute('aria-label') || '').trim() : '';
|
|
1162
|
+
if (lbl) title = lbl;
|
|
1163
|
+
}
|
|
1164
|
+
if (!title) {
|
|
1165
|
+
// 兜底取卡片可见文本前 80 字
|
|
1166
|
+
const txt = String(card.textContent || '').replace(/\\s+/g, ' ').trim();
|
|
1167
|
+
title = txt.slice(0, 80);
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
// author:尽量找带"作者"语义的链接 / 文本
|
|
1171
|
+
let authorName;
|
|
1172
|
+
const authorLinkEl = card.querySelector('a[href*="/user/"]');
|
|
1173
|
+
const authorEl =
|
|
1174
|
+
authorLinkEl ||
|
|
1175
|
+
card.querySelector('[data-e2e*="user-name"]') ||
|
|
1176
|
+
card.querySelector('[class*="author"], [class*="user-name"]');
|
|
1177
|
+
if (authorEl) {
|
|
1178
|
+
authorName = String(authorEl.textContent || '').trim().slice(0, 80) || undefined;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
// ── 元数据:authorSecUid / createTime / durationSec / commentCount ──
|
|
1182
|
+
// 每个字段都做"可选采集",拿不到就留 null —— 后端规则筛对 undefined 是放行,
|
|
1183
|
+
// 不会误伤。宁可漏采也不要采错(用错误值替代真实值只会把规则搞歪)。
|
|
1184
|
+
|
|
1185
|
+
// 1) authorSecUid:从 /user/<sec_uid> 链接直接拿
|
|
1186
|
+
let authorSecUid = null;
|
|
1187
|
+
if (authorLinkEl) {
|
|
1188
|
+
authorSecUid = __opc1ExtractSecUid(authorLinkEl.getAttribute('href') || '');
|
|
1189
|
+
}
|
|
1190
|
+
if (!authorSecUid) {
|
|
1191
|
+
// 兜底再扫一遍卡片里所有 user 链接
|
|
1192
|
+
const allUserLinks = card.querySelectorAll('a[href*="/user/"]');
|
|
1193
|
+
for (let ui = 0; ui < allUserLinks.length; ui++) {
|
|
1194
|
+
const sec = __opc1ExtractSecUid(allUserLinks[ui].getAttribute('href') || '');
|
|
1195
|
+
if (sec) { authorSecUid = sec; break; }
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// 2) createTime:扫卡片里短文本叶子节点,命中"X前 / YYYY-MM-DD / MM-DD"
|
|
1200
|
+
let createTime = null;
|
|
1201
|
+
{
|
|
1202
|
+
// 优先找明显带 time/date class 的节点
|
|
1203
|
+
const cand = card.querySelectorAll(
|
|
1204
|
+
'[class*="time" i], [class*="date" i], [class*="publish" i], time, [datetime]',
|
|
1205
|
+
);
|
|
1206
|
+
for (let i = 0; i < cand.length; i++) {
|
|
1207
|
+
if (cand[i].children.length > 0) continue;
|
|
1208
|
+
// HTML5 <time datetime="..."> 优先用 datetime 属性
|
|
1209
|
+
const dt = cand[i].getAttribute && cand[i].getAttribute('datetime');
|
|
1210
|
+
if (dt) {
|
|
1211
|
+
const parsed = Date.parse(dt);
|
|
1212
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
1213
|
+
createTime = Math.floor(parsed / 1000);
|
|
1214
|
+
break;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
const t = __opc1ParseRelativeTime(cand[i].textContent || '', nowSec);
|
|
1218
|
+
if (t) { createTime = t; break; }
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
if (!createTime) {
|
|
1222
|
+
// 兜底扫叶子节点短文本;遍历量限制到 100 个避免拖慢
|
|
1223
|
+
const leaves = card.querySelectorAll('span, div, p, em, i');
|
|
1224
|
+
const limit = Math.min(leaves.length, 100);
|
|
1225
|
+
for (let i = 0; i < limit; i++) {
|
|
1226
|
+
if (leaves[i].children.length > 0) continue;
|
|
1227
|
+
const txt = String(leaves[i].textContent || '').trim();
|
|
1228
|
+
if (!txt || txt.length > 20) continue;
|
|
1229
|
+
const t = __opc1ParseRelativeTime(txt, nowSec);
|
|
1230
|
+
if (t) { createTime = t; break; }
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
// 3) durationSec:扫卡片里 mm:ss 文本(一般在封面右下角)
|
|
1235
|
+
let durationSec = null;
|
|
1236
|
+
{
|
|
1237
|
+
const leaves = card.querySelectorAll('span, div');
|
|
1238
|
+
const limit = Math.min(leaves.length, 80);
|
|
1239
|
+
for (let i = 0; i < limit; i++) {
|
|
1240
|
+
if (leaves[i].children.length > 0) continue;
|
|
1241
|
+
const txt = String(leaves[i].textContent || '').trim();
|
|
1242
|
+
if (!txt) continue;
|
|
1243
|
+
const d = __opc1ParseDurationText(txt);
|
|
1244
|
+
if (d) { durationSec = d; break; }
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
// 4) commentCount:抖音搜索页上经常没有这个数(只显示点赞 / 收藏 / 分享),
|
|
1249
|
+
// 但偶尔带"评论 123"类文本。尝试几种 selector + 邻近"评论"字样。
|
|
1250
|
+
let commentCount = null;
|
|
1251
|
+
{
|
|
1252
|
+
const sel = card.querySelector(
|
|
1253
|
+
'[data-e2e*="comment" i], [class*="comment-count" i], [aria-label*="评论"]',
|
|
1254
|
+
);
|
|
1255
|
+
if (sel) {
|
|
1256
|
+
const txt = String(sel.textContent || sel.getAttribute('aria-label') || '').trim();
|
|
1257
|
+
// 先尝试里面的数字
|
|
1258
|
+
const match = txt.match(/(\\d[\\d.,kwKW万千]*)/);
|
|
1259
|
+
if (match) {
|
|
1260
|
+
const n = __opc1ParseCompactNumber(match[1]);
|
|
1261
|
+
if (n != null) commentCount = n;
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
if (commentCount == null) {
|
|
1265
|
+
const leaves = card.querySelectorAll('span, div, p');
|
|
1266
|
+
const limit = Math.min(leaves.length, 80);
|
|
1267
|
+
for (let i = 0; i < limit; i++) {
|
|
1268
|
+
if (leaves[i].children.length > 0) continue;
|
|
1269
|
+
const txt = String(leaves[i].textContent || '').trim();
|
|
1270
|
+
if (!txt || txt.length > 20) continue;
|
|
1271
|
+
// "评论 123" 或 "123 评论"
|
|
1272
|
+
let mc = txt.match(/评论\\s*(\\d[\\d.,kwKW万千]*)/);
|
|
1273
|
+
if (!mc) mc = txt.match(/^(\\d[\\d.,kwKW万千]*)\\s*评论/);
|
|
1274
|
+
if (mc) {
|
|
1275
|
+
const n = __opc1ParseCompactNumber(mc[1]);
|
|
1276
|
+
if (n != null) { commentCount = n; break; }
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
// 封面
|
|
1283
|
+
let coverUrl;
|
|
1284
|
+
const coverImg = card.querySelector('img');
|
|
1285
|
+
if (coverImg) {
|
|
1286
|
+
coverUrl = coverImg.getAttribute('src') || coverImg.getAttribute('data-src') || undefined;
|
|
1287
|
+
if (coverUrl && coverUrl.startsWith('//')) coverUrl = 'https:' + coverUrl;
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
// url:构造规范的 douyin.com/video/<id>,把 metadata 藏到 fragment 里
|
|
1291
|
+
let url = 'https://www.douyin.com/video/' + awemeId;
|
|
1292
|
+
const meta = {};
|
|
1293
|
+
if (commentCount != null) { meta.cc = commentCount; metaStats.withCommentCount++; }
|
|
1294
|
+
if (createTime != null) { meta.ct = createTime; metaStats.withCreateTime++; }
|
|
1295
|
+
if (authorSecUid) { meta.sec = authorSecUid; metaStats.withAuthorSecUid++; }
|
|
1296
|
+
if (durationSec != null) { meta.dur = durationSec; metaStats.withDurationSec++; }
|
|
1297
|
+
if (Object.keys(meta).length > 0) {
|
|
1298
|
+
const b64 = __opc1Utf8Btoa(JSON.stringify(meta));
|
|
1299
|
+
if (b64) url = url + '#opc1md=' + b64;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
items.push({ awemeId, title, authorName, url, coverUrl });
|
|
1303
|
+
freshCount++;
|
|
1304
|
+
}
|
|
1305
|
+
return freshCount;
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
for (let round = 0; round <= maxScrollRounds; round++) {
|
|
1309
|
+
const verify = __opc1DetectVerification();
|
|
1310
|
+
if (verify.hit) {
|
|
1311
|
+
return {
|
|
1312
|
+
ok: false,
|
|
1313
|
+
code: 'verification_required',
|
|
1314
|
+
message: '抖音搜索页要求人机校验',
|
|
1315
|
+
stats: {
|
|
1316
|
+
waitedMs: Date.now() - startedAt,
|
|
1317
|
+
sawVerify: true,
|
|
1318
|
+
verifyReasons: verify.reasons || [],
|
|
1319
|
+
collectedCount: items.length,
|
|
1320
|
+
scrollRounds,
|
|
1321
|
+
uniqueVideoCount: seen.size,
|
|
1322
|
+
},
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
const loginModal = !!document.querySelector(
|
|
1326
|
+
'[data-e2e="login-modal"], [class*="login-mask"], [id="login-pannel"]'
|
|
1327
|
+
);
|
|
1328
|
+
if (loginModal) {
|
|
1329
|
+
return {
|
|
1330
|
+
ok: false,
|
|
1331
|
+
code: 'login_required',
|
|
1332
|
+
message: '抖音未登录或登录态失效',
|
|
1333
|
+
stats: {
|
|
1334
|
+
waitedMs: Date.now() - startedAt,
|
|
1335
|
+
sawLogin: true,
|
|
1336
|
+
collectedCount: items.length,
|
|
1337
|
+
scrollRounds,
|
|
1338
|
+
uniqueVideoCount: seen.size,
|
|
1339
|
+
},
|
|
1340
|
+
};
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
const candidates = __opc1CandidateElements();
|
|
1344
|
+
const freshCount = __opc1CollectCandidateElements(candidates);
|
|
1345
|
+
if (items.length >= maxItems) break;
|
|
1346
|
+
if (round >= maxScrollRounds) break;
|
|
1347
|
+
const diag = __opc1BuildDiagnostics();
|
|
1348
|
+
const shouldContinue =
|
|
1349
|
+
round < minScrollRounds ||
|
|
1350
|
+
freshCount > 0 ||
|
|
1351
|
+
lastHeightGrew ||
|
|
1352
|
+
diag.hasLoadingSignal === true;
|
|
1353
|
+
if (!shouldContinue) break;
|
|
1354
|
+
await __opc1ScrollAndWait();
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
if (items.length === 0) {
|
|
1358
|
+
const diag = __opc1BuildDiagnostics();
|
|
1359
|
+
return {
|
|
1360
|
+
ok: false,
|
|
1361
|
+
code: 'no_results',
|
|
1362
|
+
subtype: diag.subtype,
|
|
1363
|
+
message: '搜索结果区域未在多轮滚动后出现(子类型=' + diag.subtype + ':' + __opc1SubtypeMessage(diag.subtype) + ')',
|
|
1364
|
+
stats: {
|
|
1365
|
+
totalLinks: totalCandidateLinks,
|
|
1366
|
+
dedupItems: 0,
|
|
1367
|
+
collectedCount: 0,
|
|
1368
|
+
scrollRounds,
|
|
1369
|
+
scanRounds,
|
|
1370
|
+
uniqueVideoCount: 0,
|
|
1371
|
+
waitedMs: Date.now() - startedAt,
|
|
1372
|
+
sawVerify: false,
|
|
1373
|
+
sawLogin: false,
|
|
1374
|
+
lastCandidateLinks,
|
|
1375
|
+
lastHeightGrew,
|
|
1376
|
+
initialProbeState,
|
|
1377
|
+
noResultsDiagnostics: diag,
|
|
1378
|
+
metadataCollected: metaStats,
|
|
1379
|
+
},
|
|
1380
|
+
};
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
return {
|
|
1384
|
+
ok: true,
|
|
1385
|
+
items,
|
|
1386
|
+
stats: {
|
|
1387
|
+
totalLinks: totalCandidateLinks,
|
|
1388
|
+
dedupItems: items.length,
|
|
1389
|
+
collectedCount: items.length,
|
|
1390
|
+
scrollRounds,
|
|
1391
|
+
scanRounds,
|
|
1392
|
+
uniqueVideoCount: seen.size,
|
|
1393
|
+
lastCandidateLinks,
|
|
1394
|
+
lastHeightGrew,
|
|
1395
|
+
initialProbeState,
|
|
1396
|
+
waitedMs: Date.now() - startedAt,
|
|
1397
|
+
sawVerify,
|
|
1398
|
+
sawLogin,
|
|
1399
|
+
metadataCollected: metaStats,
|
|
1400
|
+
},
|
|
1401
|
+
};
|
|
1402
|
+
} catch (err) {
|
|
1403
|
+
return {
|
|
1404
|
+
ok: false,
|
|
1405
|
+
code: 'page_throw',
|
|
1406
|
+
message: (err && err.message) || String(err),
|
|
1407
|
+
};
|
|
1408
|
+
}
|
|
1409
|
+
})();`;
|
|
1410
|
+
exports.DOUYIN_PRESCROLL_SCRIPT = `(async () => {
|
|
1411
|
+
try {
|
|
1412
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
1413
|
+
for (let i = 0; i < 3; i++) {
|
|
1414
|
+
window.scrollBy({ top: window.innerHeight, behavior: 'instant' });
|
|
1415
|
+
await sleep(400);
|
|
1416
|
+
}
|
|
1417
|
+
return { ok: true };
|
|
1418
|
+
} catch (err) {
|
|
1419
|
+
return { ok: false, message: (err && err.message) || String(err) };
|
|
1420
|
+
}
|
|
1421
|
+
})();`;
|
|
1422
|
+
//# sourceMappingURL=index.js.map
|