@web-auto/webauto 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -53
- package/apps/desktop-console/dist/main/index.mjs +227 -12
- package/apps/desktop-console/dist/renderer/index.js +237 -8
- package/apps/desktop-console/entry/ui-cli.mjs +282 -16
- package/apps/desktop-console/entry/ui-console.mjs +46 -15
- package/apps/webauto/entry/account.mjs +126 -27
- package/apps/webauto/entry/lib/account-detect.mjs +399 -9
- package/apps/webauto/entry/lib/account-store.mjs +201 -109
- package/apps/webauto/entry/lib/iflow-reply.mjs +194 -0
- package/apps/webauto/entry/lib/profile-policy.mjs +48 -0
- package/apps/webauto/entry/lib/profilepool.mjs +12 -0
- package/apps/webauto/entry/lib/schedule-store.mjs +29 -2
- package/apps/webauto/entry/lib/session-init.mjs +227 -0
- package/apps/webauto/entry/lib/upgrade-check.mjs +269 -0
- package/apps/webauto/entry/lib/xhs-unified-blocks.mjs +160 -0
- package/apps/webauto/entry/lib/xhs-unified-output-blocks.mjs +83 -0
- package/apps/webauto/entry/lib/xhs-unified-plan-blocks.mjs +55 -0
- package/apps/webauto/entry/lib/xhs-unified-profile-blocks.mjs +542 -0
- package/apps/webauto/entry/lib/xhs-unified-runtime-blocks.mjs +436 -0
- package/apps/webauto/entry/profilepool.mjs +56 -9
- package/apps/webauto/entry/smart-reply-cli.mjs +267 -0
- package/apps/webauto/entry/weibo-unified.mjs +84 -11
- package/apps/webauto/entry/xhs-orchestrate.mjs +43 -1
- package/apps/webauto/entry/xhs-unified.mjs +92 -997
- package/bin/webauto.mjs +22 -4
- package/dist/modules/camo-backend/src/index.js +33 -0
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +232 -49
- package/dist/modules/camo-backend/src/internal/engine-manager.js +14 -13
- package/dist/modules/camo-backend/src/internal/ws-server.js +16 -19
- package/dist/modules/camo-runtime/src/utils/browser-service.mjs +38 -6
- package/dist/modules/workflow/blocks/EnsureSession.js +0 -8
- package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +78 -6
- package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +266 -192
- package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +2 -0
- package/dist/modules/workflow/src/runner.js +2 -0
- package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +150 -37
- package/dist/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.js +491 -0
- package/modules/camo-backend/src/index.ts +31 -0
- package/modules/camo-backend/src/internal/BrowserSession.ts +224 -53
- package/modules/camo-backend/src/internal/engine-manager.ts +14 -15
- package/modules/camo-backend/src/internal/ws-server.ts +17 -17
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/common.mjs +12 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/persistence.mjs +57 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +2475 -243
- package/modules/camo-runtime/src/autoscript/runtime.mjs +35 -30
- package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +80 -443
- package/modules/camo-runtime/src/container/runtime-core/checkpoint.mjs +39 -6
- package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +206 -39
- package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +0 -79
- package/modules/camo-runtime/src/container/runtime-core/operations/viewport.mjs +46 -0
- package/modules/camo-runtime/src/utils/browser-service.mjs +41 -6
- package/modules/camo-runtime/src/utils/js-policy.mjs +28 -0
- package/modules/workflow/blocks/EnsureSession.ts +0 -4
- package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +81 -6
- package/modules/workflow/blocks/WeiboCollectSearchLinksBlock.ts +316 -0
- package/modules/workflow/definitions/weibo-search-workflow-v1.ts +2 -0
- package/modules/workflow/src/runner.ts +2 -0
- package/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.ts +198 -53
- package/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.ts +706 -0
- package/package.json +2 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +0 -498
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/detail.mjs +0 -181
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +0 -691
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +0 -388
- package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +0 -135
|
@@ -1,208 +1,282 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
* Collects post links from Weibo search results
|
|
2
|
+
* Workflow Block: WeiboCollectSearchLinksBlock
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* - fresh: Clear existing data and recollect
|
|
9
|
-
* - incremental: Keep existing data and add new posts
|
|
4
|
+
* 阶段职责:
|
|
5
|
+
* 1) 仅从搜索结果页采集帖子链接(不做详情内容提取)
|
|
6
|
+
* 2) 持久化到 phase2-links.jsonl,供后续 WeiboCollectFromLinksBlock 消费
|
|
10
7
|
*/
|
|
11
|
-
import
|
|
12
|
-
import
|
|
13
|
-
import {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
});
|
|
42
|
-
// Get rate limiter
|
|
43
|
-
const rateLimiter = RateLimiter.getInstance();
|
|
44
|
-
const page = context.page;
|
|
45
|
-
if (!page) {
|
|
46
|
-
return {
|
|
47
|
-
success: false,
|
|
48
|
-
error: 'Page not available',
|
|
49
|
-
posts: 0,
|
|
50
|
-
linksFile: paths.linksPath,
|
|
51
|
-
collectionId: dataManager.getCollectionId(),
|
|
52
|
-
stats: {
|
|
53
|
-
totalPosts: 0,
|
|
54
|
-
newPosts: 0,
|
|
55
|
-
duplicatesSkipped: 0
|
|
56
|
-
}
|
|
57
|
-
};
|
|
8
|
+
import os from 'node:os';
|
|
9
|
+
import path from 'node:path';
|
|
10
|
+
import { promises as fs } from 'node:fs';
|
|
11
|
+
function resolveDownloadRoot() {
|
|
12
|
+
const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
|
|
13
|
+
if (custom && custom.trim())
|
|
14
|
+
return custom;
|
|
15
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
16
|
+
return path.join(home, '.webauto', 'download');
|
|
17
|
+
}
|
|
18
|
+
function sanitizeFilenamePart(value) {
|
|
19
|
+
return String(value || '')
|
|
20
|
+
.trim()
|
|
21
|
+
.replace(/[\\/:"*?<>|]+/g, '_')
|
|
22
|
+
.replace(/\s+/g, '_')
|
|
23
|
+
.slice(0, 80);
|
|
24
|
+
}
|
|
25
|
+
function extractStatusId(url) {
|
|
26
|
+
const text = String(url || '').trim();
|
|
27
|
+
if (!text)
|
|
28
|
+
return '';
|
|
29
|
+
try {
|
|
30
|
+
const parsed = new URL(text);
|
|
31
|
+
const pathname = String(parsed.pathname || '');
|
|
32
|
+
const statusMatch = pathname.match(/\/status\/([^/?#]+)/i);
|
|
33
|
+
if (statusMatch?.[1])
|
|
34
|
+
return statusMatch[1];
|
|
35
|
+
const parts = pathname.split('/').filter(Boolean);
|
|
36
|
+
if (parts.length >= 2) {
|
|
37
|
+
return String(parts[parts.length - 1] || '').trim();
|
|
58
38
|
}
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
// ignore
|
|
42
|
+
}
|
|
43
|
+
return '';
|
|
44
|
+
}
|
|
45
|
+
function extractUserId(authorUrl, safeUrl) {
|
|
46
|
+
const first = String(authorUrl || '').trim();
|
|
47
|
+
const second = String(safeUrl || '').trim();
|
|
48
|
+
const fromUrl = (raw) => {
|
|
59
49
|
try {
|
|
60
|
-
|
|
61
|
-
const
|
|
62
|
-
if (
|
|
63
|
-
return
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}
|
|
96
|
-
let collectedPosts = [];
|
|
97
|
-
let currentPage = 1;
|
|
98
|
-
let noNewPostsCount = 0;
|
|
99
|
-
const maxNoNewPosts = 3;
|
|
100
|
-
while (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
|
|
101
|
-
// Check heartbeat
|
|
102
|
-
if (!processRegistry.heartbeat(processId)) {
|
|
103
|
-
break;
|
|
104
|
-
}
|
|
105
|
-
// Collect visible posts
|
|
106
|
-
const visiblePosts = await this.collectVisiblePosts(page);
|
|
107
|
-
let newPostsThisRound = 0;
|
|
108
|
-
for (const post of visiblePosts) {
|
|
109
|
-
if (!dataManager.hasPost(post.id)) {
|
|
110
|
-
await dataManager.addPost({
|
|
111
|
-
id: post.id,
|
|
112
|
-
url: post.url,
|
|
113
|
-
collectedAt: new Date().toISOString()
|
|
114
|
-
});
|
|
115
|
-
collectedPosts.push(post);
|
|
116
|
-
newPostsThisRound++;
|
|
117
|
-
if (collectedPosts.length >= target)
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
// Check if we found new posts
|
|
122
|
-
if (newPostsThisRound === 0) {
|
|
123
|
-
noNewPostsCount++;
|
|
124
|
-
}
|
|
125
|
-
else {
|
|
126
|
-
noNewPostsCount = 0;
|
|
127
|
-
}
|
|
128
|
-
// Check max pages
|
|
129
|
-
if (maxPages > 0 && currentPage >= maxPages) {
|
|
130
|
-
break;
|
|
131
|
-
}
|
|
132
|
-
// Try to go to next page
|
|
133
|
-
if (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
|
|
134
|
-
const hasNextPage = await this.goToNextPage(page, currentPage);
|
|
135
|
-
if (!hasNextPage) {
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
currentPage++;
|
|
139
|
-
await page.waitForTimeout(2000);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
// Persist data
|
|
143
|
-
await dataManager.persist();
|
|
144
|
-
const stats = dataManager.getStats();
|
|
145
|
-
return {
|
|
146
|
-
success: true,
|
|
147
|
-
posts: collectedPosts.length,
|
|
148
|
-
linksFile: paths.linksPath,
|
|
149
|
-
collectionId: dataManager.getCollectionId(),
|
|
150
|
-
stats: {
|
|
151
|
-
totalPosts: stats.totalPosts,
|
|
152
|
-
newPosts: stats.newPosts,
|
|
153
|
-
duplicatesSkipped: stats.duplicatesSkipped
|
|
154
|
-
}
|
|
155
|
-
};
|
|
50
|
+
const parsed = new URL(raw);
|
|
51
|
+
const m = String(parsed.pathname || '').match(/\/u\/([0-9]+)/i);
|
|
52
|
+
if (m?.[1])
|
|
53
|
+
return m[1];
|
|
54
|
+
const parts = String(parsed.pathname || '').split('/').filter(Boolean);
|
|
55
|
+
if (parts.length > 0 && /^[0-9]+$/.test(parts[0]))
|
|
56
|
+
return parts[0];
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
// ignore
|
|
60
|
+
}
|
|
61
|
+
return '';
|
|
62
|
+
};
|
|
63
|
+
return fromUrl(first) || fromUrl(second) || '';
|
|
64
|
+
}
|
|
65
|
+
export async function execute(input) {
|
|
66
|
+
const { sessionId, keyword, env = 'debug', targetCount, maxPages = 10, serviceUrl = 'http://127.0.0.1:7704', } = input;
|
|
67
|
+
const profile = sessionId;
|
|
68
|
+
const controllerUrl = `${serviceUrl}/command`;
|
|
69
|
+
const keywordDir = path.join(resolveDownloadRoot(), 'weibo', env, sanitizeFilenamePart(keyword));
|
|
70
|
+
const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
|
|
71
|
+
const maxTarget = Math.max(1, Number(targetCount || 0) || 1);
|
|
72
|
+
const maxPageCount = Math.max(1, Number(maxPages || 0) || 1);
|
|
73
|
+
async function controllerAction(action, args = {}) {
|
|
74
|
+
const res = await fetch(controllerUrl, {
|
|
75
|
+
method: 'POST',
|
|
76
|
+
headers: { 'Content-Type': 'application/json' },
|
|
77
|
+
body: JSON.stringify({ action, args: { profileId: profile, ...args } }),
|
|
78
|
+
signal: AbortSignal.timeout ? AbortSignal.timeout(30000) : undefined,
|
|
79
|
+
});
|
|
80
|
+
const raw = await res.text();
|
|
81
|
+
if (!res.ok)
|
|
82
|
+
throw new Error(`HTTP ${res.status}: ${raw}`);
|
|
83
|
+
try {
|
|
84
|
+
return raw ? JSON.parse(raw) : {};
|
|
156
85
|
}
|
|
157
|
-
|
|
158
|
-
|
|
86
|
+
catch {
|
|
87
|
+
return { raw };
|
|
159
88
|
}
|
|
160
89
|
}
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
// Find post link
|
|
168
|
-
const link = card.querySelector('a[href*="/status/"]');
|
|
169
|
-
if (link) {
|
|
170
|
-
const href = link.href;
|
|
171
|
-
const match = href.match(/status\/(\d+)/);
|
|
172
|
-
if (match) {
|
|
173
|
-
results.push({
|
|
174
|
-
id: match[1],
|
|
175
|
-
url: href
|
|
176
|
-
});
|
|
177
|
-
}
|
|
178
|
-
}
|
|
90
|
+
function unwrapResult(response) {
|
|
91
|
+
if (response && typeof response === 'object') {
|
|
92
|
+
if ('result' in response)
|
|
93
|
+
return response.result;
|
|
94
|
+
if (response.data && typeof response.data === 'object' && 'result' in response.data) {
|
|
95
|
+
return response.data.result;
|
|
179
96
|
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
97
|
+
if ('data' in response)
|
|
98
|
+
return response.data;
|
|
99
|
+
}
|
|
100
|
+
return response;
|
|
183
101
|
}
|
|
184
|
-
async
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
102
|
+
async function readSearchRows() {
|
|
103
|
+
const script = `
|
|
104
|
+
(() => {
|
|
105
|
+
const clean = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
|
|
106
|
+
const abs = (value) => {
|
|
107
|
+
try {
|
|
108
|
+
return new URL(String(value || ''), location.href).toString();
|
|
109
|
+
} catch {
|
|
110
|
+
return '';
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
const rows = [];
|
|
114
|
+
const cards = Array.from(document.querySelectorAll('.card-wrap'));
|
|
115
|
+
for (const card of cards) {
|
|
116
|
+
const statusAnchor = card.querySelector('a[href*="/status/"]');
|
|
117
|
+
const safeUrl = abs(statusAnchor?.getAttribute?.('href') || statusAnchor?.href || '');
|
|
118
|
+
if (!safeUrl) continue;
|
|
119
|
+
const authorAnchor =
|
|
120
|
+
card.querySelector('a[href*="/u/"]') ||
|
|
121
|
+
card.querySelector('a[title][href*="weibo.com"]') ||
|
|
122
|
+
card.querySelector('.name a');
|
|
123
|
+
const authorUrl = abs(authorAnchor?.getAttribute?.('href') || authorAnchor?.href || '');
|
|
124
|
+
const authorName = clean(
|
|
125
|
+
authorAnchor?.getAttribute?.('title') ||
|
|
126
|
+
authorAnchor?.textContent ||
|
|
127
|
+
'',
|
|
128
|
+
);
|
|
129
|
+
const contentEl =
|
|
130
|
+
card.querySelector('.txt') ||
|
|
131
|
+
card.querySelector('[node-type="feed_list_content"]') ||
|
|
132
|
+
card.querySelector('.detail_wbtext') ||
|
|
133
|
+
card.querySelector('.wbtext');
|
|
134
|
+
const contentPreview = clean(contentEl?.textContent || '').slice(0, 180);
|
|
135
|
+
rows.push({
|
|
136
|
+
safeUrl,
|
|
137
|
+
authorUrl,
|
|
138
|
+
authorName,
|
|
139
|
+
contentPreview,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
return { rows, searchUrl: String(location.href || '') };
|
|
143
|
+
})()
|
|
144
|
+
`;
|
|
145
|
+
const res = await controllerAction('evaluate', { script });
|
|
146
|
+
const value = unwrapResult(res);
|
|
147
|
+
return {
|
|
148
|
+
rows: Array.isArray(value?.rows) ? value.rows : [],
|
|
149
|
+
searchUrl: String(value?.searchUrl || ''),
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
async function findNextPageCenter() {
|
|
153
|
+
const script = `
|
|
154
|
+
(() => {
|
|
155
|
+
const clean = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
|
|
156
|
+
const isVisible = (node) => {
|
|
157
|
+
if (!(node instanceof HTMLElement)) return false;
|
|
158
|
+
const rect = node.getBoundingClientRect();
|
|
159
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
160
|
+
if (rect.bottom <= 0 || rect.top >= window.innerHeight) return false;
|
|
161
|
+
const style = window.getComputedStyle(node);
|
|
162
|
+
if (!style) return false;
|
|
163
|
+
if (style.display === 'none' || style.visibility === 'hidden') return false;
|
|
164
|
+
if (Number(style.opacity || '1') === 0) return false;
|
|
165
|
+
return true;
|
|
166
|
+
};
|
|
167
|
+
const isDisabled = (node) => {
|
|
168
|
+
const text = clean(node.className || '');
|
|
169
|
+
if (text.includes('disable') || text.includes('disabled')) return true;
|
|
170
|
+
const ariaDisabled = String(node.getAttribute('aria-disabled') || '').toLowerCase();
|
|
171
|
+
return ariaDisabled === 'true';
|
|
172
|
+
};
|
|
173
|
+
const candidates = Array.from(document.querySelectorAll('a, button, span'))
|
|
174
|
+
.filter((node) => /下一页|下页/.test(clean(node.textContent || '')))
|
|
175
|
+
.filter((node) => isVisible(node) && !isDisabled(node));
|
|
176
|
+
const target = candidates[0];
|
|
177
|
+
if (!target) return { ok: false };
|
|
178
|
+
const rect = target.getBoundingClientRect();
|
|
179
|
+
return {
|
|
180
|
+
ok: true,
|
|
181
|
+
x: Math.round(rect.left + rect.width / 2),
|
|
182
|
+
y: Math.round(rect.top + rect.height / 2),
|
|
183
|
+
};
|
|
184
|
+
})()
|
|
185
|
+
`;
|
|
186
|
+
const res = await controllerAction('evaluate', { script });
|
|
187
|
+
const value = unwrapResult(res);
|
|
188
|
+
return {
|
|
189
|
+
ok: value?.ok === true,
|
|
190
|
+
x: Number(value?.x || 0),
|
|
191
|
+
y: Number(value?.y || 0),
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
async function writeLinks(rows) {
|
|
195
|
+
await fs.mkdir(keywordDir, { recursive: true });
|
|
196
|
+
const jsonl = rows.map((row) => JSON.stringify(row)).join('\n');
|
|
197
|
+
await fs.writeFile(linksPath, jsonl ? `${jsonl}\n` : '', 'utf8');
|
|
198
|
+
}
|
|
199
|
+
const dedup = new Map();
|
|
200
|
+
let pagesVisited = 0;
|
|
201
|
+
let noProgressRounds = 0;
|
|
202
|
+
try {
|
|
203
|
+
for (let page = 0; page < maxPageCount; page += 1) {
|
|
204
|
+
pagesVisited += 1;
|
|
205
|
+
const snapshot = await readSearchRows();
|
|
206
|
+
let added = 0;
|
|
207
|
+
for (const row of snapshot.rows) {
|
|
208
|
+
const safeUrl = String(row?.safeUrl || '').trim();
|
|
209
|
+
if (!safeUrl)
|
|
210
|
+
continue;
|
|
211
|
+
const statusId = extractStatusId(safeUrl);
|
|
212
|
+
if (!statusId)
|
|
213
|
+
continue;
|
|
214
|
+
const key = statusId || safeUrl;
|
|
215
|
+
if (dedup.has(key))
|
|
216
|
+
continue;
|
|
217
|
+
dedup.set(key, {
|
|
218
|
+
statusId,
|
|
219
|
+
userId: extractUserId(String(row?.authorUrl || ''), safeUrl),
|
|
220
|
+
safeUrl,
|
|
221
|
+
searchUrl: String(snapshot.searchUrl || ''),
|
|
222
|
+
authorName: String(row?.authorName || '').trim() || undefined,
|
|
223
|
+
contentPreview: String(row?.contentPreview || '').trim() || undefined,
|
|
224
|
+
ts: new Date().toISOString(),
|
|
225
|
+
});
|
|
226
|
+
added += 1;
|
|
227
|
+
if (dedup.size >= maxTarget)
|
|
228
|
+
break;
|
|
192
229
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
230
|
+
if (added === 0)
|
|
231
|
+
noProgressRounds += 1;
|
|
232
|
+
else
|
|
233
|
+
noProgressRounds = 0;
|
|
234
|
+
if (dedup.size >= maxTarget)
|
|
235
|
+
break;
|
|
236
|
+
if (noProgressRounds >= 2)
|
|
237
|
+
break;
|
|
238
|
+
let clicked = false;
|
|
239
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
240
|
+
const next = await findNextPageCenter();
|
|
241
|
+
if (!next.ok || !Number.isFinite(next.x) || !Number.isFinite(next.y)) {
|
|
242
|
+
await controllerAction('mouse:wheel', { deltaX: 0, deltaY: 900 }).catch(() => null);
|
|
243
|
+
await new Promise((r) => setTimeout(r, 400));
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
await controllerAction('mouse:click', {
|
|
247
|
+
x: Math.round(next.x),
|
|
248
|
+
y: Math.round(next.y),
|
|
249
|
+
button: 'left',
|
|
250
|
+
clicks: 1,
|
|
251
|
+
});
|
|
252
|
+
clicked = true;
|
|
253
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
254
|
+
break;
|
|
199
255
|
}
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
catch {
|
|
203
|
-
return false;
|
|
256
|
+
if (!clicked)
|
|
257
|
+
break;
|
|
204
258
|
}
|
|
259
|
+
const links = Array.from(dedup.values()).slice(0, maxTarget);
|
|
260
|
+
await writeLinks(links);
|
|
261
|
+
return {
|
|
262
|
+
success: true,
|
|
263
|
+
keywordDir,
|
|
264
|
+
linksPath,
|
|
265
|
+
collectedCount: links.length,
|
|
266
|
+
pagesVisited,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
catch (error) {
|
|
270
|
+
const links = Array.from(dedup.values()).slice(0, maxTarget);
|
|
271
|
+
await writeLinks(links).catch(() => null);
|
|
272
|
+
return {
|
|
273
|
+
success: false,
|
|
274
|
+
keywordDir,
|
|
275
|
+
linksPath,
|
|
276
|
+
collectedCount: links.length,
|
|
277
|
+
pagesVisited,
|
|
278
|
+
error: `WeiboCollectSearchLinks failed: ${error.message}`,
|
|
279
|
+
};
|
|
205
280
|
}
|
|
206
281
|
}
|
|
207
|
-
export default WeiboCollectSearchLinksBlock;
|
|
208
282
|
//# sourceMappingURL=WeiboCollectSearchLinksBlock.js.map
|
|
@@ -17,6 +17,7 @@ import * as XiaohongshuCollectLinksBlock from '../blocks/XiaohongshuCollectLinks
|
|
|
17
17
|
import * as XiaohongshuCollectFromLinksBlock from '../blocks/XiaohongshuCollectFromLinksBlock.js';
|
|
18
18
|
import * as ErrorRecoveryBlock from '../blocks/ErrorRecoveryBlock.js';
|
|
19
19
|
import * as ExecuteWeiboSearchBlock from '../blocks/ExecuteWeiboSearchBlock.js';
|
|
20
|
+
import * as WeiboCollectSearchLinksBlock from '../blocks/WeiboCollectSearchLinksBlock.js';
|
|
20
21
|
import * as WeiboCollectFromLinksBlock from '../blocks/WeiboCollectFromLinksBlock.js';
|
|
21
22
|
import * as WeiboCollectCommentsBlock from '../blocks/WeiboCollectCommentsBlock.js';
|
|
22
23
|
import * as AnchorVerificationBlock from '../blocks/AnchorVerificationBlock.js';
|
|
@@ -46,6 +47,7 @@ export function createDefaultWorkflowExecutor() {
|
|
|
46
47
|
executor.registerBlock('XiaohongshuCollectFromLinksBlock', { execute: XiaohongshuCollectFromLinksBlock.execute });
|
|
47
48
|
executor.registerBlock('OrganizeXhsNotesBlock', { execute: OrganizeXhsNotesBlock.execute });
|
|
48
49
|
executor.registerBlock('ExecuteWeiboSearchBlock', { execute: ExecuteWeiboSearchBlock.execute });
|
|
50
|
+
executor.registerBlock('WeiboCollectSearchLinksBlock', { execute: WeiboCollectSearchLinksBlock.execute });
|
|
49
51
|
executor.registerBlock('WeiboCollectFromLinksBlock', { execute: WeiboCollectFromLinksBlock.execute });
|
|
50
52
|
executor.registerBlock('WeiboCollectCommentsBlock', { execute: WeiboCollectCommentsBlock.execute });
|
|
51
53
|
return executor;
|