@web-auto/webauto 0.1.17 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -53
- package/apps/desktop-console/dist/main/index.mjs +229 -14
- package/apps/desktop-console/dist/renderer/index.js +237 -8
- package/apps/desktop-console/entry/ui-cli.mjs +290 -21
- package/apps/desktop-console/entry/ui-console.mjs +46 -15
- package/apps/webauto/entry/account.mjs +126 -27
- package/apps/webauto/entry/lib/account-detect.mjs +399 -9
- package/apps/webauto/entry/lib/account-store.mjs +201 -109
- package/apps/webauto/entry/lib/iflow-reply.mjs +194 -0
- package/apps/webauto/entry/lib/profile-policy.mjs +48 -0
- package/apps/webauto/entry/lib/profilepool.mjs +12 -0
- package/apps/webauto/entry/lib/schedule-store.mjs +29 -2
- package/apps/webauto/entry/lib/session-init.mjs +227 -0
- package/apps/webauto/entry/lib/upgrade-check.mjs +269 -0
- package/apps/webauto/entry/lib/xhs-unified-blocks.mjs +160 -0
- package/apps/webauto/entry/lib/xhs-unified-output-blocks.mjs +83 -0
- package/apps/webauto/entry/lib/xhs-unified-plan-blocks.mjs +55 -0
- package/apps/webauto/entry/lib/xhs-unified-profile-blocks.mjs +542 -0
- package/apps/webauto/entry/lib/xhs-unified-runtime-blocks.mjs +436 -0
- package/apps/webauto/entry/profilepool.mjs +56 -9
- package/apps/webauto/entry/smart-reply-cli.mjs +267 -0
- package/apps/webauto/entry/weibo-unified.mjs +84 -11
- package/apps/webauto/entry/xhs-orchestrate.mjs +43 -1
- package/apps/webauto/entry/xhs-unified.mjs +92 -997
- package/bin/webauto.mjs +22 -4
- package/dist/modules/camo-backend/src/index.js +33 -0
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +232 -49
- package/dist/modules/camo-backend/src/internal/engine-manager.js +14 -13
- package/dist/modules/camo-backend/src/internal/ws-server.js +16 -19
- package/dist/modules/camo-runtime/src/utils/browser-service.mjs +38 -6
- package/dist/modules/workflow/blocks/EnsureSession.js +0 -8
- package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +78 -6
- package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +266 -192
- package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +2 -0
- package/dist/modules/workflow/src/runner.js +2 -0
- package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +150 -37
- package/dist/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.js +491 -0
- package/modules/camo-backend/src/index.ts +31 -0
- package/modules/camo-backend/src/internal/BrowserSession.ts +224 -53
- package/modules/camo-backend/src/internal/engine-manager.ts +14 -15
- package/modules/camo-backend/src/internal/ws-server.ts +17 -17
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/common.mjs +12 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/persistence.mjs +57 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +2475 -243
- package/modules/camo-runtime/src/autoscript/runtime.mjs +35 -30
- package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +80 -443
- package/modules/camo-runtime/src/container/runtime-core/checkpoint.mjs +39 -6
- package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +206 -39
- package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +0 -79
- package/modules/camo-runtime/src/container/runtime-core/operations/viewport.mjs +46 -0
- package/modules/camo-runtime/src/utils/browser-service.mjs +41 -6
- package/modules/camo-runtime/src/utils/js-policy.mjs +28 -0
- package/modules/workflow/blocks/EnsureSession.ts +0 -4
- package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +81 -6
- package/modules/workflow/blocks/WeiboCollectSearchLinksBlock.ts +316 -0
- package/modules/workflow/definitions/weibo-search-workflow-v1.ts +2 -0
- package/modules/workflow/src/runner.ts +2 -0
- package/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.ts +198 -53
- package/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.ts +706 -0
- package/package.json +2 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +0 -498
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/detail.mjs +0 -181
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +0 -691
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +0 -388
- package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +0 -135
|
@@ -17,6 +17,8 @@ export interface WeiboCollectFromLinksInput {
|
|
|
17
17
|
targetCount: number;
|
|
18
18
|
maxComments?: number;
|
|
19
19
|
collectComments?: boolean;
|
|
20
|
+
tabCount?: number;
|
|
21
|
+
tabOpenDelayMs?: number;
|
|
20
22
|
serviceUrl?: string;
|
|
21
23
|
}
|
|
22
24
|
|
|
@@ -30,6 +32,7 @@ export interface WeiboCollectFromLinksOutput {
|
|
|
30
32
|
postsProcessed: number;
|
|
31
33
|
totalComments: number;
|
|
32
34
|
errors: number;
|
|
35
|
+
tabsUsed: number;
|
|
33
36
|
};
|
|
34
37
|
error?: string;
|
|
35
38
|
}
|
|
@@ -140,11 +143,15 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
140
143
|
targetCount,
|
|
141
144
|
maxComments = 0,
|
|
142
145
|
collectComments: enableComments = false, // 默认不采集评论,加快速度
|
|
146
|
+
tabCount: requestedTabCount = 1,
|
|
147
|
+
tabOpenDelayMs: requestedTabOpenDelayMs = 800,
|
|
143
148
|
serviceUrl = 'http://127.0.0.1:7704',
|
|
144
149
|
} = input;
|
|
145
150
|
|
|
146
151
|
const profile = sessionId;
|
|
147
152
|
const controllerUrl = `${serviceUrl}/command`;
|
|
153
|
+
const tabCount = Math.max(1, Math.min(8, Number(requestedTabCount || 1) || 1));
|
|
154
|
+
const tabOpenDelayMs = Math.max(0, Number(requestedTabOpenDelayMs || 0) || 0);
|
|
148
155
|
|
|
149
156
|
const keywordDir = path.join(resolveDownloadRoot(), 'weibo', env, sanitizeFilenamePart(keyword));
|
|
150
157
|
const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
|
|
@@ -157,7 +164,7 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
157
164
|
linksPath,
|
|
158
165
|
processedCount: 0,
|
|
159
166
|
persistedCount: 0,
|
|
160
|
-
stats: { postsProcessed: 0, totalComments: 0, errors: 0 },
|
|
167
|
+
stats: { postsProcessed: 0, totalComments: 0, errors: 0, tabsUsed: 0 },
|
|
161
168
|
error: 'No links found in phase2-links.jsonl',
|
|
162
169
|
};
|
|
163
170
|
}
|
|
@@ -197,6 +204,65 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
197
204
|
await controllerAction('goto', { url });
|
|
198
205
|
await new Promise(r => setTimeout(r, 500)); // 减少间隔 // 减少等待时间
|
|
199
206
|
}
|
|
207
|
+
|
|
208
|
+
async function listPagesDetailed(): Promise<Array<{ index: number; url: string; active: boolean }>> {
|
|
209
|
+
const res = await controllerAction('browser:page:list', { profileId: profile });
|
|
210
|
+
const value = unwrapResult(res);
|
|
211
|
+
const pages = Array.isArray(value?.pages) ? value.pages : (Array.isArray(value) ? value : []);
|
|
212
|
+
return pages
|
|
213
|
+
.map((item: any) => ({
|
|
214
|
+
index: Number(item?.index),
|
|
215
|
+
url: String(item?.url || ''),
|
|
216
|
+
active: item?.active === true,
|
|
217
|
+
}))
|
|
218
|
+
.filter((item: { index: number }) => Number.isFinite(item.index));
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function switchToPage(index: number): Promise<void> {
|
|
222
|
+
await controllerAction('browser:page:switch', { profileId: profile, index });
|
|
223
|
+
await new Promise((r) => setTimeout(r, 260));
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function openNewTabAndResolveIndex(existingIndexes: Set<number>): Promise<number | null> {
|
|
227
|
+
await controllerAction('system:shortcut', { app: 'camoufox', shortcut: 'new-tab' });
|
|
228
|
+
if (tabOpenDelayMs > 0) {
|
|
229
|
+
await new Promise((r) => setTimeout(r, tabOpenDelayMs));
|
|
230
|
+
}
|
|
231
|
+
const after = await listPagesDetailed();
|
|
232
|
+
const active = after.find((item) => item.active);
|
|
233
|
+
if (active && !existingIndexes.has(active.index)) return active.index;
|
|
234
|
+
const added = after.find((item) => !existingIndexes.has(item.index));
|
|
235
|
+
if (added) return added.index;
|
|
236
|
+
const fallback = after
|
|
237
|
+
.map((item) => item.index)
|
|
238
|
+
.filter((idx) => !existingIndexes.has(idx))
|
|
239
|
+
.sort((a, b) => a - b);
|
|
240
|
+
return fallback.length > 0 ? fallback[fallback.length - 1] : null;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async function ensureTabPool(count: number): Promise<number[]> {
|
|
244
|
+
const pages = await listPagesDetailed().catch(() => [] as Array<{ index: number; url: string; active: boolean }>);
|
|
245
|
+
const active = pages.find((item) => item.active);
|
|
246
|
+
const pool: number[] = [];
|
|
247
|
+
if (active && Number.isFinite(active.index)) {
|
|
248
|
+
pool.push(active.index);
|
|
249
|
+
} else if (pages.length > 0) {
|
|
250
|
+
pool.push(pages[0].index);
|
|
251
|
+
} else {
|
|
252
|
+
pool.push(0);
|
|
253
|
+
}
|
|
254
|
+
while (pool.length < count) {
|
|
255
|
+
const idx = await openNewTabAndResolveIndex(new Set(pool));
|
|
256
|
+
if (!Number.isFinite(Number(idx))) break;
|
|
257
|
+
const next = Number(idx);
|
|
258
|
+
if (pool.includes(next)) break;
|
|
259
|
+
pool.push(next);
|
|
260
|
+
}
|
|
261
|
+
if (pool.length > 0) {
|
|
262
|
+
await switchToPage(pool[0]).catch((): null => null);
|
|
263
|
+
}
|
|
264
|
+
return pool;
|
|
265
|
+
}
|
|
200
266
|
|
|
201
267
|
async function extractPostContent(): Promise<string> {
|
|
202
268
|
const script = `
|
|
@@ -272,15 +338,23 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
272
338
|
let persistedCount = 0;
|
|
273
339
|
let totalComments = 0;
|
|
274
340
|
let errors = 0;
|
|
341
|
+
let tabsUsed = 1;
|
|
275
342
|
|
|
276
343
|
try {
|
|
277
|
-
const targetLinks = links.slice(0, targetCount);
|
|
278
|
-
|
|
279
|
-
|
|
344
|
+
const targetLinks = links.slice(0, Math.max(1, Number(targetCount || 0) || 1));
|
|
345
|
+
const tabPool = await ensureTabPool(tabCount).catch(() => [0]);
|
|
346
|
+
const roundRobinTabs = tabPool.length > 0 ? tabPool : [0];
|
|
347
|
+
tabsUsed = roundRobinTabs.length;
|
|
348
|
+
console.log(`[WeiboCollectFromLinks] tab pool ready: [${roundRobinTabs.join(', ')}]`);
|
|
349
|
+
|
|
350
|
+
for (let idx = 0; idx < targetLinks.length; idx += 1) {
|
|
351
|
+
const link = targetLinks[idx];
|
|
280
352
|
processedCount++;
|
|
281
|
-
|
|
353
|
+
const tabIndex = roundRobinTabs[idx % roundRobinTabs.length];
|
|
354
|
+
console.log(`[WeiboCollectFromLinks] Processing: ${link.statusId} (tab=${tabIndex})`);
|
|
282
355
|
|
|
283
356
|
try {
|
|
357
|
+
await switchToPage(tabIndex);
|
|
284
358
|
await gotoUrl(link.safeUrl);
|
|
285
359
|
let currentUrl = await getCurrentUrl();
|
|
286
360
|
if (!currentUrl) {
|
|
@@ -322,6 +396,7 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
322
396
|
postsProcessed: processedCount,
|
|
323
397
|
totalComments,
|
|
324
398
|
errors,
|
|
399
|
+
tabsUsed,
|
|
325
400
|
},
|
|
326
401
|
};
|
|
327
402
|
} catch (error: any) {
|
|
@@ -331,7 +406,7 @@ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboC
|
|
|
331
406
|
linksPath,
|
|
332
407
|
processedCount,
|
|
333
408
|
persistedCount,
|
|
334
|
-
stats: { postsProcessed: processedCount, totalComments, errors },
|
|
409
|
+
stats: { postsProcessed: processedCount, totalComments, errors, tabsUsed },
|
|
335
410
|
error: error.message,
|
|
336
411
|
};
|
|
337
412
|
}
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Block: WeiboCollectSearchLinksBlock
|
|
3
|
+
*
|
|
4
|
+
* 阶段职责:
|
|
5
|
+
* 1) 仅从搜索结果页采集帖子链接(不做详情内容提取)
|
|
6
|
+
* 2) 持久化到 phase2-links.jsonl,供后续 WeiboCollectFromLinksBlock 消费
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { promises as fs } from 'node:fs';
|
|
12
|
+
|
|
13
|
+
export interface WeiboCollectSearchLinksInput {
|
|
14
|
+
sessionId: string;
|
|
15
|
+
keyword: string;
|
|
16
|
+
env?: string;
|
|
17
|
+
targetCount: number;
|
|
18
|
+
maxPages?: number;
|
|
19
|
+
serviceUrl?: string;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface WeiboCollectSearchLinksOutput {
|
|
23
|
+
success: boolean;
|
|
24
|
+
keywordDir: string;
|
|
25
|
+
linksPath: string;
|
|
26
|
+
collectedCount: number;
|
|
27
|
+
pagesVisited: number;
|
|
28
|
+
error?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface CollectedLink {
|
|
32
|
+
statusId: string;
|
|
33
|
+
userId: string;
|
|
34
|
+
safeUrl: string;
|
|
35
|
+
searchUrl: string;
|
|
36
|
+
authorName?: string;
|
|
37
|
+
contentPreview?: string;
|
|
38
|
+
ts: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function resolveDownloadRoot(): string {
|
|
42
|
+
const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
|
|
43
|
+
if (custom && custom.trim()) return custom;
|
|
44
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
45
|
+
return path.join(home, '.webauto', 'download');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function sanitizeFilenamePart(value: string): string {
|
|
49
|
+
return String(value || '')
|
|
50
|
+
.trim()
|
|
51
|
+
.replace(/[\\/:"*?<>|]+/g, '_')
|
|
52
|
+
.replace(/\s+/g, '_')
|
|
53
|
+
.slice(0, 80);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function extractStatusId(url: string): string {
|
|
57
|
+
const text = String(url || '').trim();
|
|
58
|
+
if (!text) return '';
|
|
59
|
+
try {
|
|
60
|
+
const parsed = new URL(text);
|
|
61
|
+
const pathname = String(parsed.pathname || '');
|
|
62
|
+
const statusMatch = pathname.match(/\/status\/([^/?#]+)/i);
|
|
63
|
+
if (statusMatch?.[1]) return statusMatch[1];
|
|
64
|
+
const parts = pathname.split('/').filter(Boolean);
|
|
65
|
+
if (parts.length >= 2) {
|
|
66
|
+
return String(parts[parts.length - 1] || '').trim();
|
|
67
|
+
}
|
|
68
|
+
} catch {
|
|
69
|
+
// ignore
|
|
70
|
+
}
|
|
71
|
+
return '';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function extractUserId(authorUrl: string, safeUrl: string): string {
|
|
75
|
+
const first = String(authorUrl || '').trim();
|
|
76
|
+
const second = String(safeUrl || '').trim();
|
|
77
|
+
const fromUrl = (raw: string) => {
|
|
78
|
+
try {
|
|
79
|
+
const parsed = new URL(raw);
|
|
80
|
+
const m = String(parsed.pathname || '').match(/\/u\/([0-9]+)/i);
|
|
81
|
+
if (m?.[1]) return m[1];
|
|
82
|
+
const parts = String(parsed.pathname || '').split('/').filter(Boolean);
|
|
83
|
+
if (parts.length > 0 && /^[0-9]+$/.test(parts[0])) return parts[0];
|
|
84
|
+
} catch {
|
|
85
|
+
// ignore
|
|
86
|
+
}
|
|
87
|
+
return '';
|
|
88
|
+
};
|
|
89
|
+
return fromUrl(first) || fromUrl(second) || '';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function execute(input: WeiboCollectSearchLinksInput): Promise<WeiboCollectSearchLinksOutput> {
|
|
93
|
+
const {
|
|
94
|
+
sessionId,
|
|
95
|
+
keyword,
|
|
96
|
+
env = 'debug',
|
|
97
|
+
targetCount,
|
|
98
|
+
maxPages = 10,
|
|
99
|
+
serviceUrl = 'http://127.0.0.1:7704',
|
|
100
|
+
} = input;
|
|
101
|
+
|
|
102
|
+
const profile = sessionId;
|
|
103
|
+
const controllerUrl = `${serviceUrl}/command`;
|
|
104
|
+
const keywordDir = path.join(resolveDownloadRoot(), 'weibo', env, sanitizeFilenamePart(keyword));
|
|
105
|
+
const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
|
|
106
|
+
const maxTarget = Math.max(1, Number(targetCount || 0) || 1);
|
|
107
|
+
const maxPageCount = Math.max(1, Number(maxPages || 0) || 1);
|
|
108
|
+
|
|
109
|
+
async function controllerAction(action: string, args: any = {}): Promise<any> {
|
|
110
|
+
const res = await fetch(controllerUrl, {
|
|
111
|
+
method: 'POST',
|
|
112
|
+
headers: { 'Content-Type': 'application/json' },
|
|
113
|
+
body: JSON.stringify({ action, args: { profileId: profile, ...args } }),
|
|
114
|
+
signal: (AbortSignal as any).timeout ? (AbortSignal as any).timeout(30000) : undefined,
|
|
115
|
+
});
|
|
116
|
+
const raw = await res.text();
|
|
117
|
+
if (!res.ok) throw new Error(`HTTP ${res.status}: ${raw}`);
|
|
118
|
+
try {
|
|
119
|
+
return raw ? JSON.parse(raw) : {};
|
|
120
|
+
} catch {
|
|
121
|
+
return { raw };
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function unwrapResult(response: any): any {
|
|
126
|
+
if (response && typeof response === 'object') {
|
|
127
|
+
if ('result' in response) return response.result;
|
|
128
|
+
if (response.data && typeof response.data === 'object' && 'result' in response.data) {
|
|
129
|
+
return response.data.result;
|
|
130
|
+
}
|
|
131
|
+
if ('data' in response) return response.data;
|
|
132
|
+
}
|
|
133
|
+
return response;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function readSearchRows(): Promise<{ rows: any[]; searchUrl: string }> {
|
|
137
|
+
const script = `
|
|
138
|
+
(() => {
|
|
139
|
+
const clean = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
|
|
140
|
+
const abs = (value) => {
|
|
141
|
+
try {
|
|
142
|
+
return new URL(String(value || ''), location.href).toString();
|
|
143
|
+
} catch {
|
|
144
|
+
return '';
|
|
145
|
+
}
|
|
146
|
+
};
|
|
147
|
+
const rows = [];
|
|
148
|
+
const cards = Array.from(document.querySelectorAll('.card-wrap'));
|
|
149
|
+
for (const card of cards) {
|
|
150
|
+
const statusAnchor = card.querySelector('a[href*="/status/"]');
|
|
151
|
+
const safeUrl = abs(statusAnchor?.getAttribute?.('href') || statusAnchor?.href || '');
|
|
152
|
+
if (!safeUrl) continue;
|
|
153
|
+
const authorAnchor =
|
|
154
|
+
card.querySelector('a[href*="/u/"]') ||
|
|
155
|
+
card.querySelector('a[title][href*="weibo.com"]') ||
|
|
156
|
+
card.querySelector('.name a');
|
|
157
|
+
const authorUrl = abs(authorAnchor?.getAttribute?.('href') || authorAnchor?.href || '');
|
|
158
|
+
const authorName = clean(
|
|
159
|
+
authorAnchor?.getAttribute?.('title') ||
|
|
160
|
+
authorAnchor?.textContent ||
|
|
161
|
+
'',
|
|
162
|
+
);
|
|
163
|
+
const contentEl =
|
|
164
|
+
card.querySelector('.txt') ||
|
|
165
|
+
card.querySelector('[node-type="feed_list_content"]') ||
|
|
166
|
+
card.querySelector('.detail_wbtext') ||
|
|
167
|
+
card.querySelector('.wbtext');
|
|
168
|
+
const contentPreview = clean(contentEl?.textContent || '').slice(0, 180);
|
|
169
|
+
rows.push({
|
|
170
|
+
safeUrl,
|
|
171
|
+
authorUrl,
|
|
172
|
+
authorName,
|
|
173
|
+
contentPreview,
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
return { rows, searchUrl: String(location.href || '') };
|
|
177
|
+
})()
|
|
178
|
+
`;
|
|
179
|
+
const res = await controllerAction('evaluate', { script });
|
|
180
|
+
const value = unwrapResult(res);
|
|
181
|
+
return {
|
|
182
|
+
rows: Array.isArray(value?.rows) ? value.rows : [],
|
|
183
|
+
searchUrl: String(value?.searchUrl || ''),
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
async function findNextPageCenter(): Promise<{ ok: boolean; x?: number; y?: number }> {
|
|
188
|
+
const script = `
|
|
189
|
+
(() => {
|
|
190
|
+
const clean = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
|
|
191
|
+
const isVisible = (node) => {
|
|
192
|
+
if (!(node instanceof HTMLElement)) return false;
|
|
193
|
+
const rect = node.getBoundingClientRect();
|
|
194
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
195
|
+
if (rect.bottom <= 0 || rect.top >= window.innerHeight) return false;
|
|
196
|
+
const style = window.getComputedStyle(node);
|
|
197
|
+
if (!style) return false;
|
|
198
|
+
if (style.display === 'none' || style.visibility === 'hidden') return false;
|
|
199
|
+
if (Number(style.opacity || '1') === 0) return false;
|
|
200
|
+
return true;
|
|
201
|
+
};
|
|
202
|
+
const isDisabled = (node) => {
|
|
203
|
+
const text = clean(node.className || '');
|
|
204
|
+
if (text.includes('disable') || text.includes('disabled')) return true;
|
|
205
|
+
const ariaDisabled = String(node.getAttribute('aria-disabled') || '').toLowerCase();
|
|
206
|
+
return ariaDisabled === 'true';
|
|
207
|
+
};
|
|
208
|
+
const candidates = Array.from(document.querySelectorAll('a, button, span'))
|
|
209
|
+
.filter((node) => /下一页|下页/.test(clean(node.textContent || '')))
|
|
210
|
+
.filter((node) => isVisible(node) && !isDisabled(node));
|
|
211
|
+
const target = candidates[0];
|
|
212
|
+
if (!target) return { ok: false };
|
|
213
|
+
const rect = target.getBoundingClientRect();
|
|
214
|
+
return {
|
|
215
|
+
ok: true,
|
|
216
|
+
x: Math.round(rect.left + rect.width / 2),
|
|
217
|
+
y: Math.round(rect.top + rect.height / 2),
|
|
218
|
+
};
|
|
219
|
+
})()
|
|
220
|
+
`;
|
|
221
|
+
const res = await controllerAction('evaluate', { script });
|
|
222
|
+
const value = unwrapResult(res);
|
|
223
|
+
return {
|
|
224
|
+
ok: value?.ok === true,
|
|
225
|
+
x: Number(value?.x || 0),
|
|
226
|
+
y: Number(value?.y || 0),
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async function writeLinks(rows: CollectedLink[]): Promise<void> {
|
|
231
|
+
await fs.mkdir(keywordDir, { recursive: true });
|
|
232
|
+
const jsonl = rows.map((row) => JSON.stringify(row)).join('\n');
|
|
233
|
+
await fs.writeFile(linksPath, jsonl ? `${jsonl}\n` : '', 'utf8');
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const dedup = new Map<string, CollectedLink>();
|
|
237
|
+
let pagesVisited = 0;
|
|
238
|
+
let noProgressRounds = 0;
|
|
239
|
+
|
|
240
|
+
try {
|
|
241
|
+
for (let page = 0; page < maxPageCount; page += 1) {
|
|
242
|
+
pagesVisited += 1;
|
|
243
|
+
const snapshot = await readSearchRows();
|
|
244
|
+
let added = 0;
|
|
245
|
+
|
|
246
|
+
for (const row of snapshot.rows) {
|
|
247
|
+
const safeUrl = String(row?.safeUrl || '').trim();
|
|
248
|
+
if (!safeUrl) continue;
|
|
249
|
+
const statusId = extractStatusId(safeUrl);
|
|
250
|
+
if (!statusId) continue;
|
|
251
|
+
const key = statusId || safeUrl;
|
|
252
|
+
if (dedup.has(key)) continue;
|
|
253
|
+
dedup.set(key, {
|
|
254
|
+
statusId,
|
|
255
|
+
userId: extractUserId(String(row?.authorUrl || ''), safeUrl),
|
|
256
|
+
safeUrl,
|
|
257
|
+
searchUrl: String(snapshot.searchUrl || ''),
|
|
258
|
+
authorName: String(row?.authorName || '').trim() || undefined,
|
|
259
|
+
contentPreview: String(row?.contentPreview || '').trim() || undefined,
|
|
260
|
+
ts: new Date().toISOString(),
|
|
261
|
+
});
|
|
262
|
+
added += 1;
|
|
263
|
+
if (dedup.size >= maxTarget) break;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (added === 0) noProgressRounds += 1;
|
|
267
|
+
else noProgressRounds = 0;
|
|
268
|
+
|
|
269
|
+
if (dedup.size >= maxTarget) break;
|
|
270
|
+
if (noProgressRounds >= 2) break;
|
|
271
|
+
|
|
272
|
+
let clicked = false;
|
|
273
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
274
|
+
const next = await findNextPageCenter();
|
|
275
|
+
if (!next.ok || !Number.isFinite(next.x) || !Number.isFinite(next.y)) {
|
|
276
|
+
await controllerAction('mouse:wheel', { deltaX: 0, deltaY: 900 }).catch((): null => null);
|
|
277
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
await controllerAction('mouse:click', {
|
|
281
|
+
x: Math.round(next.x!),
|
|
282
|
+
y: Math.round(next.y!),
|
|
283
|
+
button: 'left',
|
|
284
|
+
clicks: 1,
|
|
285
|
+
});
|
|
286
|
+
clicked = true;
|
|
287
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (!clicked) break;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
const links = Array.from(dedup.values()).slice(0, maxTarget);
|
|
295
|
+
await writeLinks(links);
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
success: true,
|
|
299
|
+
keywordDir,
|
|
300
|
+
linksPath,
|
|
301
|
+
collectedCount: links.length,
|
|
302
|
+
pagesVisited,
|
|
303
|
+
};
|
|
304
|
+
} catch (error: any) {
|
|
305
|
+
const links = Array.from(dedup.values()).slice(0, maxTarget);
|
|
306
|
+
await writeLinks(links).catch((): null => null);
|
|
307
|
+
return {
|
|
308
|
+
success: false,
|
|
309
|
+
keywordDir,
|
|
310
|
+
linksPath,
|
|
311
|
+
collectedCount: links.length,
|
|
312
|
+
pagesVisited,
|
|
313
|
+
error: `WeiboCollectSearchLinks failed: ${error.message}`,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
}
|
|
@@ -17,6 +17,7 @@ import * as XiaohongshuCollectLinksBlock from '../blocks/XiaohongshuCollectLinks
|
|
|
17
17
|
import * as XiaohongshuCollectFromLinksBlock from '../blocks/XiaohongshuCollectFromLinksBlock.js';
|
|
18
18
|
import * as ErrorRecoveryBlock from '../blocks/ErrorRecoveryBlock.js';
|
|
19
19
|
import * as ExecuteWeiboSearchBlock from '../blocks/ExecuteWeiboSearchBlock.js';
|
|
20
|
+
import * as WeiboCollectSearchLinksBlock from '../blocks/WeiboCollectSearchLinksBlock.js';
|
|
20
21
|
import * as WeiboCollectFromLinksBlock from '../blocks/WeiboCollectFromLinksBlock.js';
|
|
21
22
|
import * as WeiboCollectCommentsBlock from '../blocks/WeiboCollectCommentsBlock.js';
|
|
22
23
|
import * as AnchorVerificationBlock from '../blocks/AnchorVerificationBlock.js';
|
|
@@ -47,6 +48,7 @@ export function createDefaultWorkflowExecutor(): WorkflowExecutor {
|
|
|
47
48
|
executor.registerBlock('XiaohongshuCollectFromLinksBlock', { execute: XiaohongshuCollectFromLinksBlock.execute });
|
|
48
49
|
executor.registerBlock('OrganizeXhsNotesBlock', { execute: OrganizeXhsNotesBlock.execute });
|
|
49
50
|
executor.registerBlock('ExecuteWeiboSearchBlock', { execute: ExecuteWeiboSearchBlock.execute });
|
|
51
|
+
executor.registerBlock('WeiboCollectSearchLinksBlock', { execute: WeiboCollectSearchLinksBlock.execute });
|
|
50
52
|
executor.registerBlock('WeiboCollectFromLinksBlock', { execute: WeiboCollectFromLinksBlock.execute });
|
|
51
53
|
executor.registerBlock('WeiboCollectCommentsBlock', { execute: WeiboCollectCommentsBlock.execute });
|
|
52
54
|
return executor;
|