@web-auto/webauto 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +122 -53
  2. package/apps/desktop-console/dist/main/index.mjs +227 -12
  3. package/apps/desktop-console/dist/renderer/index.js +237 -8
  4. package/apps/desktop-console/entry/ui-cli.mjs +282 -16
  5. package/apps/desktop-console/entry/ui-console.mjs +46 -15
  6. package/apps/webauto/entry/account.mjs +126 -27
  7. package/apps/webauto/entry/lib/account-detect.mjs +399 -9
  8. package/apps/webauto/entry/lib/account-store.mjs +201 -109
  9. package/apps/webauto/entry/lib/iflow-reply.mjs +194 -0
  10. package/apps/webauto/entry/lib/profile-policy.mjs +48 -0
  11. package/apps/webauto/entry/lib/profilepool.mjs +12 -0
  12. package/apps/webauto/entry/lib/schedule-store.mjs +29 -2
  13. package/apps/webauto/entry/lib/session-init.mjs +227 -0
  14. package/apps/webauto/entry/lib/upgrade-check.mjs +269 -0
  15. package/apps/webauto/entry/lib/xhs-unified-blocks.mjs +160 -0
  16. package/apps/webauto/entry/lib/xhs-unified-output-blocks.mjs +83 -0
  17. package/apps/webauto/entry/lib/xhs-unified-plan-blocks.mjs +55 -0
  18. package/apps/webauto/entry/lib/xhs-unified-profile-blocks.mjs +542 -0
  19. package/apps/webauto/entry/lib/xhs-unified-runtime-blocks.mjs +436 -0
  20. package/apps/webauto/entry/profilepool.mjs +56 -9
  21. package/apps/webauto/entry/smart-reply-cli.mjs +267 -0
  22. package/apps/webauto/entry/weibo-unified.mjs +84 -11
  23. package/apps/webauto/entry/xhs-orchestrate.mjs +43 -1
  24. package/apps/webauto/entry/xhs-unified.mjs +92 -997
  25. package/bin/webauto.mjs +22 -4
  26. package/dist/modules/camo-backend/src/index.js +33 -0
  27. package/dist/modules/camo-backend/src/internal/BrowserSession.js +232 -49
  28. package/dist/modules/camo-backend/src/internal/engine-manager.js +14 -13
  29. package/dist/modules/camo-backend/src/internal/ws-server.js +16 -19
  30. package/dist/modules/camo-runtime/src/utils/browser-service.mjs +38 -6
  31. package/dist/modules/workflow/blocks/EnsureSession.js +0 -8
  32. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +78 -6
  33. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +266 -192
  34. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +2 -0
  35. package/dist/modules/workflow/src/runner.js +2 -0
  36. package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +150 -37
  37. package/dist/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.js +491 -0
  38. package/modules/camo-backend/src/index.ts +31 -0
  39. package/modules/camo-backend/src/internal/BrowserSession.ts +224 -53
  40. package/modules/camo-backend/src/internal/engine-manager.ts +14 -15
  41. package/modules/camo-backend/src/internal/ws-server.ts +17 -17
  42. package/modules/camo-runtime/src/autoscript/action-providers/xhs/common.mjs +12 -2
  43. package/modules/camo-runtime/src/autoscript/action-providers/xhs/persistence.mjs +57 -0
  44. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +2475 -243
  45. package/modules/camo-runtime/src/autoscript/runtime.mjs +35 -30
  46. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +80 -443
  47. package/modules/camo-runtime/src/container/runtime-core/checkpoint.mjs +39 -6
  48. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +206 -39
  49. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +0 -79
  50. package/modules/camo-runtime/src/container/runtime-core/operations/viewport.mjs +46 -0
  51. package/modules/camo-runtime/src/utils/browser-service.mjs +41 -6
  52. package/modules/camo-runtime/src/utils/js-policy.mjs +28 -0
  53. package/modules/workflow/blocks/EnsureSession.ts +0 -4
  54. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +81 -6
  55. package/modules/workflow/blocks/WeiboCollectSearchLinksBlock.ts +316 -0
  56. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +2 -0
  57. package/modules/workflow/src/runner.ts +2 -0
  58. package/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.ts +198 -53
  59. package/modules/xiaohongshu/app/src/blocks/SmartReplyBlock.ts +706 -0
  60. package/package.json +2 -2
  61. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +0 -498
  62. package/modules/camo-runtime/src/autoscript/action-providers/xhs/detail.mjs +0 -181
  63. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +0 -691
  64. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +0 -388
  65. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +0 -135
@@ -1,57 +1,44 @@
1
1
  #!/usr/bin/env node
2
2
  import minimist from 'minimist';
3
- import fs from 'node:fs';
4
3
  import fsp from 'node:fs/promises';
5
- import os from 'node:os';
6
4
  import path from 'node:path';
7
5
  import { pathToFileURL } from 'node:url';
8
- import { buildXhsUnifiedAutoscript } from '../../../modules/camo-runtime/src/autoscript/xhs-unified-template.mjs';
9
- import { normalizeAutoscript, validateAutoscript } from '../../../modules/camo-runtime/src/autoscript/schema.mjs';
10
- import { AutoscriptRunner } from '../../../modules/camo-runtime/src/autoscript/runtime.mjs';
11
6
  import { syncXhsAccountsByProfiles } from './lib/account-detect.mjs';
12
- import { listAccountProfiles, markProfileInvalid } from './lib/account-store.mjs';
7
+ import {
8
+ cleanupIncompleteProfiles,
9
+ listAccountProfiles,
10
+ listSavedProfiles,
11
+ } from './lib/account-store.mjs';
13
12
  import { listProfilesForPool } from './lib/profilepool.mjs';
14
- import { runCamo } from './lib/camo-cli.mjs';
13
+ import { assertProfilesUsable } from './lib/profile-policy.mjs';
15
14
  import { publishBusEvent } from './lib/bus-publish.mjs';
16
- import { resolvePlatformFlowGate } from './lib/flow-gate.mjs';
17
-
18
- function nowIso() {
19
- return new Date().toISOString();
20
- }
21
-
22
- function formatRunLabel() {
23
- return new Date().toISOString().replace(/[:.]/g, '-');
24
- }
25
-
26
- function parseBool(value, fallback = false) {
27
- if (value === undefined || value === null || value === '') return fallback;
28
- if (typeof value === 'boolean') return value;
29
- const text = String(value).trim().toLowerCase();
30
- if (['1', 'true', 'yes', 'on'].includes(text)) return true;
31
- if (['0', 'false', 'no', 'off'].includes(text)) return false;
32
- return fallback;
33
- }
34
-
35
- function parseIntFlag(value, fallback, min = 1) {
36
- if (value === undefined || value === null || value === '') return fallback;
37
- const num = Number(value);
38
- if (!Number.isFinite(num)) return fallback;
39
- return Math.max(min, Math.floor(num));
40
- }
41
-
42
- function parseNonNegativeInt(value, fallback = 0) {
43
- if (value === undefined || value === null || value === '') return fallback;
44
- const num = Number(value);
45
- if (!Number.isFinite(num)) return fallback;
46
- return Math.max(0, Math.floor(num));
47
- }
48
-
49
- function pickRandomInt(min, max) {
50
- const floorMin = Math.max(0, Math.floor(Number(min) || 0));
51
- const floorMax = Math.max(floorMin, Math.floor(Number(max) || 0));
52
- if (floorMax <= floorMin) return floorMin;
53
- return floorMin + Math.floor(Math.random() * (floorMax - floorMin + 1));
54
- }
15
+ import {
16
+ ensureProfileSession,
17
+ resolveXhsStage,
18
+ runProfile,
19
+ } from './lib/xhs-unified-profile-blocks.mjs';
20
+ import {
21
+ resolveDownloadRoot,
22
+ collectCompletedNoteIds,
23
+ } from './lib/xhs-unified-output-blocks.mjs';
24
+ import {
25
+ buildEvenShardPlan,
26
+ buildDynamicWavePlan,
27
+ runWithConcurrency,
28
+ } from './lib/xhs-unified-plan-blocks.mjs';
29
+ import {
30
+ nowIso,
31
+ formatRunLabel,
32
+ parseBool,
33
+ parseIntFlag,
34
+ parseNonNegativeInt,
35
+ sanitizeForPath,
36
+ resetTaskServices,
37
+ } from './lib/xhs-unified-blocks.mjs';
38
+ import {
39
+ toNumber,
40
+ mergeProfileOutputs,
41
+ } from './lib/xhs-unified-runtime-blocks.mjs';
55
42
 
56
43
  function parseProfiles(argv) {
57
44
  const profile = String(argv.profile || '').trim();
@@ -69,9 +56,14 @@ function parseProfiles(argv) {
69
56
  }
70
57
 
71
58
  function resolveDefaultXhsProfiles() {
59
+ const savedProfiles = new Set(listSavedProfiles());
72
60
  const rows = listAccountProfiles({ platform: 'xiaohongshu' }).profiles || [];
73
61
  const valid = rows
74
- .filter((row) => row?.valid === true && String(row?.accountId || '').trim())
62
+ .filter((row) => (
63
+ row?.valid === true
64
+ && String(row?.accountId || '').trim()
65
+ && savedProfiles.has(String(row?.profileId || '').trim())
66
+ ))
75
67
  .sort((a, b) => {
76
68
  const ta = Date.parse(String(a?.updatedAt || '')) || 0;
77
69
  const tb = Date.parse(String(b?.updatedAt || '')) || 0;
@@ -81,132 +73,6 @@ function resolveDefaultXhsProfiles() {
81
73
  return Array.from(new Set(valid.map((row) => String(row.profileId || '').trim()).filter(Boolean)));
82
74
  }
83
75
 
84
- function sanitizeForPath(name, fallback = 'unknown') {
85
- const text = String(name || '').trim();
86
- if (!text) return fallback;
87
- const cleaned = text.replace(/[\\/:"*?<>|]+/g, '_').trim();
88
- return cleaned || fallback;
89
- }
90
-
91
- const XHS_HOME_URL = 'https://www.xiaohongshu.com';
92
-
93
- async function ensureProfileSession(profileId) {
94
- const id = String(profileId || '').trim();
95
- if (!id) return false;
96
- const ret = runCamo(['start', id, '--url', XHS_HOME_URL], {
97
- rootDir: process.cwd(),
98
- timeoutMs: 60000,
99
- });
100
- if (ret?.ok) {
101
- runCamo(['goto', id, XHS_HOME_URL], { rootDir: process.cwd(), timeoutMs: 60000 });
102
- }
103
- return Boolean(ret?.ok);
104
- }
105
-
106
- function buildStopScreenshotPath(profileId, reason, outputDir) {
107
- const safeProfile = sanitizeForPath(profileId, 'profile');
108
- const safeReason = sanitizeForPath(reason || 'stop', 'stop');
109
- const file = `stop-${safeProfile}-${safeReason}.png`;
110
- return path.join(outputDir, file);
111
- }
112
-
113
- async function captureStopScreenshot({ profileId, reason, outputDir }) {
114
- const outDir = String(outputDir || '').trim();
115
- if (!outDir) return null;
116
- try {
117
- await fsp.mkdir(outDir, { recursive: true });
118
- } catch {}
119
- const outputPath = buildStopScreenshotPath(profileId, reason, outDir);
120
- const tryCapture = () => runCamo(['screenshot', profileId, '--output', outputPath], {
121
- rootDir: process.cwd(),
122
- timeoutMs: 60000,
123
- });
124
- let ret = tryCapture();
125
- if (!ret?.ok) {
126
- await ensureProfileSession(profileId);
127
- ret = tryCapture();
128
- }
129
- if (ret?.ok) return outputPath;
130
- return null;
131
- }
132
-
133
- function sanitizeKeywordDirParts({ env, keyword }) {
134
- return {
135
- safeEnv: sanitizeForPath(env, 'prod'),
136
- safeKeyword: sanitizeForPath(keyword, 'unknown'),
137
- };
138
- }
139
-
140
- function resolveDownloadRoot(customRoot = '') {
141
- const fromArg = String(customRoot || '').trim();
142
- if (fromArg) return path.resolve(fromArg);
143
- const fromEnv = String(process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR || '').trim();
144
- if (fromEnv) return path.resolve(fromEnv);
145
- if (process.platform === 'win32') {
146
- try {
147
- if (fs.existsSync('D:\\')) return 'D:\\webauto';
148
- } catch {
149
- // ignore
150
- }
151
- const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
152
- return path.join(home, '.webauto');
153
- }
154
- const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
155
- return path.join(home, '.webauto', 'download');
156
- }
157
-
158
- const NON_NOTE_DIR_NAMES = new Set([
159
- 'merged',
160
- 'profiles',
161
- 'like-evidence',
162
- 'virtual-like',
163
- 'smart-reply',
164
- 'comment-match',
165
- 'discover-fallback',
166
- ]);
167
-
168
- async function collectKeywordDirs(baseOutputRoot, env, keyword) {
169
- const { safeEnv, safeKeyword } = sanitizeKeywordDirParts({ env, keyword });
170
- const dirs = [
171
- path.join(baseOutputRoot, 'xiaohongshu', safeEnv, safeKeyword),
172
- ];
173
- const shardsRoot = path.join(baseOutputRoot, 'shards');
174
- try {
175
- const entries = await fsp.readdir(shardsRoot, { withFileTypes: true });
176
- for (const entry of entries) {
177
- if (!entry.isDirectory()) continue;
178
- dirs.push(path.join(shardsRoot, entry.name, 'xiaohongshu', safeEnv, safeKeyword));
179
- }
180
- } catch {
181
- // ignore
182
- }
183
- return Array.from(new Set(dirs));
184
- }
185
-
186
- async function collectCompletedNoteIds(baseOutputRoot, env, keyword) {
187
- const keywordDirs = await collectKeywordDirs(baseOutputRoot, env, keyword);
188
- const completed = new Set();
189
- for (const keywordDir of keywordDirs) {
190
- let entries = [];
191
- try {
192
- entries = await fsp.readdir(keywordDir, { withFileTypes: true });
193
- } catch {
194
- continue;
195
- }
196
- for (const entry of entries) {
197
- if (!entry.isDirectory()) continue;
198
- const noteId = String(entry.name || '').trim();
199
- if (!noteId || noteId.startsWith('.') || noteId.startsWith('_')) continue;
200
- if (NON_NOTE_DIR_NAMES.has(noteId)) continue;
201
- completed.add(noteId);
202
- }
203
- }
204
- return {
205
- count: completed.size,
206
- noteIds: Array.from(completed),
207
- };
208
- }
209
-
210
76
  async function ensureDir(dirPath) {
211
77
  await fsp.mkdir(dirPath, { recursive: true });
212
78
  }
@@ -221,820 +87,12 @@ async function appendJsonl(filePath, payload) {
221
87
  await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, 'utf8');
222
88
  }
223
89
 
224
- function resolveUnifiedApiBaseUrl() {
225
- const raw = String(
226
- process.env.WEBAUTO_UNIFIED_API
227
- || process.env.WEBAUTO_UNIFIED_URL
228
- || 'http://127.0.0.1:7701',
229
- ).trim();
230
- return raw.replace(/\/+$/, '');
231
- }
232
-
233
- async function postUnifiedTaskRequest(baseUrl, pathname, payload) {
234
- try {
235
- const response = await fetch(`${baseUrl}${pathname}`, {
236
- method: 'POST',
237
- headers: { 'Content-Type': 'application/json' },
238
- body: JSON.stringify(payload || {}),
239
- signal: AbortSignal.timeout(2000),
240
- });
241
- if (!response.ok) return false;
242
- return true;
243
- } catch {
244
- return false;
245
- }
246
- }
247
-
248
- function createTaskReporter(seed = {}) {
249
- const baseUrl = resolveUnifiedApiBaseUrl();
250
- const staticSeed = {
251
- profileId: String(seed.profileId || 'unknown').trim() || 'unknown',
252
- keyword: String(seed.keyword || '').trim(),
253
- phase: 'unified',
254
- uiTriggerId: String(seed.uiTriggerId || '').trim(),
255
- };
256
- const createdRunIds = new Set();
257
-
258
- const ensureCreated = async (runId, extra = {}) => {
259
- const rid = String(runId || '').trim();
260
- if (!rid) return false;
261
- if (createdRunIds.has(rid)) return true;
262
- const ok = await postUnifiedTaskRequest(baseUrl, '/api/v1/tasks', {
263
- runId: rid,
264
- ...staticSeed,
265
- ...extra,
266
- });
267
- if (ok) createdRunIds.add(rid);
268
- return ok;
269
- };
270
-
271
- const update = async (runId, patch = {}) => {
272
- const rid = String(runId || '').trim();
273
- if (!rid) return false;
274
- await ensureCreated(rid, patch);
275
- return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/update`, {
276
- ...staticSeed,
277
- ...patch,
278
- });
279
- };
280
-
281
- const pushEvent = async (runId, type, data = {}) => {
282
- const rid = String(runId || '').trim();
283
- if (!rid) return false;
284
- await ensureCreated(rid, data);
285
- return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/events`, {
286
- type: String(type || 'event').trim() || 'event',
287
- data,
288
- });
289
- };
290
-
291
- const setError = async (runId, message, code = 'TASK_ERROR', recoverable = false) => {
292
- const rid = String(runId || '').trim();
293
- if (!rid) return false;
294
- return update(rid, {
295
- error: {
296
- message: String(message || 'task_error'),
297
- code: String(code || 'TASK_ERROR'),
298
- timestamp: Date.now(),
299
- recoverable: recoverable === true,
300
- },
301
- });
302
- };
303
-
304
- return {
305
- ensureCreated,
306
- update,
307
- pushEvent,
308
- setError,
309
- };
310
- }
311
-
312
- async function buildTemplateOptions(argv, profileId, overrides = {}) {
313
- const keyword = String(argv.keyword || argv.k || '').trim();
314
- const env = String(argv.env || 'prod').trim() || 'prod';
315
- const inputMode = String(argv['input-mode'] || 'protocol').trim() || 'protocol';
316
- const headless = parseBool(argv.headless, false);
317
- const ocrCommand = String(argv['ocr-command'] || '').trim();
318
- const maxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
319
- const maxComments = parseNonNegativeInt(argv['max-comments'], 0);
320
- let flowGate = null;
321
- try {
322
- flowGate = await resolvePlatformFlowGate('xiaohongshu');
323
- } catch {
324
- flowGate = null;
325
- }
326
-
327
- const throttleMin = parseIntFlag(flowGate?.throttle?.minMs, 900, 100);
328
- const throttleMax = parseIntFlag(flowGate?.throttle?.maxMs, 1800, throttleMin);
329
- const noteIntervalMin = parseIntFlag(flowGate?.noteInterval?.minMs, 2200, 200);
330
- const noteIntervalMax = parseIntFlag(flowGate?.noteInterval?.maxMs, 4200, noteIntervalMin);
331
- const tabCountDefault = parseIntFlag(flowGate?.tabPool?.tabCount, 1, 1);
332
- const tabOpenDelayMin = parseIntFlag(flowGate?.tabPool?.openDelayMinMs, 1400, 0);
333
- const tabOpenDelayMax = parseIntFlag(flowGate?.tabPool?.openDelayMaxMs, 2800, tabOpenDelayMin);
334
- const submitMethodDefault = String(flowGate?.submitSearch?.method || 'click').trim().toLowerCase() || 'click';
335
- const submitActionDelayMinDefault = parseIntFlag(flowGate?.submitSearch?.actionDelayMinMs, 180, 20);
336
- const submitActionDelayMaxDefault = parseIntFlag(flowGate?.submitSearch?.actionDelayMaxMs, 620, submitActionDelayMinDefault);
337
- const submitSettleMinDefault = parseIntFlag(flowGate?.submitSearch?.settleMinMs, 1200, 60);
338
- const submitSettleMaxDefault = parseIntFlag(flowGate?.submitSearch?.settleMaxMs, 2600, submitSettleMinDefault);
339
- const openDetailPreClickMinDefault = parseIntFlag(flowGate?.openDetail?.preClickMinMs, 220, 60);
340
- const openDetailPreClickMaxDefault = parseIntFlag(flowGate?.openDetail?.preClickMaxMs, 700, openDetailPreClickMinDefault);
341
- const openDetailPollDelayMinDefault = parseIntFlag(flowGate?.openDetail?.pollDelayMinMs, 130, 80);
342
- const openDetailPollDelayMaxDefault = parseIntFlag(flowGate?.openDetail?.pollDelayMaxMs, 320, openDetailPollDelayMinDefault);
343
- const openDetailPostOpenMinDefault = parseIntFlag(flowGate?.openDetail?.postOpenMinMs, 420, 120);
344
- const openDetailPostOpenMaxDefault = parseIntFlag(flowGate?.openDetail?.postOpenMaxMs, 1100, openDetailPostOpenMinDefault);
345
- const commentsScrollStepMinDefault = parseIntFlag(flowGate?.commentsHarvest?.scrollStepMin, 280, 120);
346
- const commentsScrollStepMaxDefault = parseIntFlag(flowGate?.commentsHarvest?.scrollStepMax, 420, commentsScrollStepMinDefault);
347
- const commentsSettleMinDefault = parseIntFlag(flowGate?.commentsHarvest?.settleMinMs, 280, 80);
348
- const commentsSettleMaxDefault = parseIntFlag(flowGate?.commentsHarvest?.settleMaxMs, 820, commentsSettleMinDefault);
349
- const defaultOperationMinIntervalDefault = parseIntFlag(flowGate?.pacing?.defaultOperationMinIntervalMs, 1200, 0);
350
- const defaultEventCooldownDefault = parseIntFlag(flowGate?.pacing?.defaultEventCooldownMs, 700, 0);
351
- const defaultPacingJitterDefault = parseIntFlag(flowGate?.pacing?.defaultJitterMs, 900, 0);
352
- const navigationMinIntervalDefault = parseIntFlag(flowGate?.pacing?.navigationMinIntervalMs, 2200, 0);
353
-
354
- const throttle = parseIntFlag(argv.throttle, pickRandomInt(throttleMin, throttleMax), 100);
355
- const tabCount = parseIntFlag(argv['tab-count'], tabCountDefault, 1);
356
- const noteIntervalMs = parseIntFlag(argv['note-interval'], pickRandomInt(noteIntervalMin, noteIntervalMax), 200);
357
- const tabOpenDelayMs = parseIntFlag(argv['tab-open-delay'], pickRandomInt(tabOpenDelayMin, tabOpenDelayMax), 0);
358
- const submitMethod = String(argv['search-submit-method'] || submitMethodDefault).trim().toLowerCase() || 'click';
359
- const submitActionDelayMinMs = parseIntFlag(argv['submit-action-delay-min'], submitActionDelayMinDefault, 20);
360
- const submitActionDelayMaxMs = parseIntFlag(argv['submit-action-delay-max'], submitActionDelayMaxDefault, submitActionDelayMinMs);
361
- const submitSettleMinMs = parseIntFlag(argv['submit-settle-min'], submitSettleMinDefault, 60);
362
- const submitSettleMaxMs = parseIntFlag(argv['submit-settle-max'], submitSettleMaxDefault, submitSettleMinMs);
363
- const openDetailPreClickMinMs = parseIntFlag(argv['open-detail-preclick-min'], openDetailPreClickMinDefault, 60);
364
- const openDetailPreClickMaxMs = parseIntFlag(argv['open-detail-preclick-max'], openDetailPreClickMaxDefault, openDetailPreClickMinMs);
365
- const openDetailPollDelayMinMs = parseIntFlag(argv['open-detail-poll-min'], openDetailPollDelayMinDefault, 80);
366
- const openDetailPollDelayMaxMs = parseIntFlag(argv['open-detail-poll-max'], openDetailPollDelayMaxDefault, openDetailPollDelayMinMs);
367
- const openDetailPostOpenMinMs = parseIntFlag(argv['open-detail-postopen-min'], openDetailPostOpenMinDefault, 120);
368
- const openDetailPostOpenMaxMs = parseIntFlag(argv['open-detail-postopen-max'], openDetailPostOpenMaxDefault, openDetailPostOpenMinMs);
369
- const commentsScrollStepMin = parseIntFlag(argv['comments-scroll-step-min'], commentsScrollStepMinDefault, 120);
370
- const commentsScrollStepMax = parseIntFlag(argv['comments-scroll-step-max'], commentsScrollStepMaxDefault, commentsScrollStepMin);
371
- const commentsSettleMinMs = parseIntFlag(argv['comments-settle-min'], commentsSettleMinDefault, 80);
372
- const commentsSettleMaxMs = parseIntFlag(argv['comments-settle-max'], commentsSettleMaxDefault, commentsSettleMinMs);
373
- const defaultOperationMinIntervalMs = parseIntFlag(argv['operation-min-interval'], defaultOperationMinIntervalDefault, 0);
374
- const defaultEventCooldownMs = parseIntFlag(argv['event-cooldown'], defaultEventCooldownDefault, 0);
375
- const defaultPacingJitterMs = parseIntFlag(argv['pacing-jitter'], defaultPacingJitterDefault, 0);
376
- const navigationMinIntervalMs = parseIntFlag(argv['navigation-min-interval'], navigationMinIntervalDefault, 0);
377
- const maxLikesPerRound = parseNonNegativeInt(argv['max-likes'], 0);
378
- const matchMode = String(argv['match-mode'] || 'any').trim() || 'any';
379
- const matchMinHits = parseIntFlag(argv['match-min-hits'], 1, 1);
380
- const matchKeywords = String(argv['match-keywords'] || keyword).trim();
381
- const likeKeywords = String(argv['like-keywords'] || '').trim();
382
- const replyText = String(argv['reply-text'] || '感谢分享,已关注').trim() || '感谢分享,已关注';
383
- const outputRoot = String(argv['output-root'] || '').trim();
384
- const uiTriggerId = String(argv['ui-trigger-id'] || process.env.WEBAUTO_UI_TRIGGER_ID || '').trim();
385
- const resume = parseBool(argv.resume, false);
386
- const incrementalMax = parseBool(argv['incremental-max'], true);
387
- const sharedHarvestPath = String(overrides.sharedHarvestPath ?? argv['shared-harvest-path'] ?? '').trim();
388
- const searchSerialKey = String(overrides.searchSerialKey ?? argv['search-serial-key'] ?? '').trim();
389
- const seedCollectCount = parseNonNegativeInt(
390
- overrides.seedCollectCount ?? argv['seed-collect-count'],
391
- 0,
392
- );
393
- const seedCollectMaxRounds = parseNonNegativeInt(
394
- overrides.seedCollectMaxRounds ?? argv['seed-collect-rounds'],
395
- 0,
396
- );
397
-
398
- const dryRun = parseBool(argv['dry-run'], false);
399
- const disableDryRun = parseBool(argv['no-dry-run'], false);
400
- const effectiveDryRun = disableDryRun ? false : dryRun;
401
-
402
- const base = {
403
- profileId,
404
- keyword,
405
- env,
406
- inputMode,
407
- headless,
408
- ocrCommand,
409
- uiTriggerId,
410
- outputRoot,
411
- throttle,
412
- tabCount,
413
- tabOpenDelayMs,
414
- noteIntervalMs,
415
- submitMethod,
416
- submitActionDelayMinMs,
417
- submitActionDelayMaxMs,
418
- submitSettleMinMs,
419
- submitSettleMaxMs,
420
- openDetailPreClickMinMs,
421
- openDetailPreClickMaxMs,
422
- openDetailPollDelayMinMs,
423
- openDetailPollDelayMaxMs,
424
- openDetailPostOpenMinMs,
425
- openDetailPostOpenMaxMs,
426
- commentsScrollStepMin,
427
- commentsScrollStepMax,
428
- commentsSettleMinMs,
429
- commentsSettleMaxMs,
430
- defaultOperationMinIntervalMs,
431
- defaultEventCooldownMs,
432
- defaultPacingJitterMs,
433
- navigationMinIntervalMs,
434
- maxNotes,
435
- maxComments,
436
- maxLikesPerRound,
437
- resume,
438
- incrementalMax,
439
- matchMode,
440
- matchMinHits,
441
- matchKeywords,
442
- likeKeywords,
443
- replyText,
444
- doHomepage: parseBool(argv['do-homepage'], true),
445
- doImages: parseBool(argv['do-images'], false),
446
- doComments: parseBool(argv['do-comments'], true),
447
- doLikes: parseBool(argv['do-likes'], false) && !effectiveDryRun,
448
- doReply: parseBool(argv['do-reply'], false) && !effectiveDryRun,
449
- doOcr: parseBool(argv['do-ocr'], false),
450
- persistComments: parseBool(argv['persist-comments'], !effectiveDryRun),
451
- sharedHarvestPath,
452
- searchSerialKey,
453
- seedCollectCount,
454
- seedCollectMaxRounds,
455
- };
456
- return { ...base, ...overrides };
457
- }
458
-
459
- function buildEvenShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
460
- const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
461
- if (uniqueProfiles.length === 0) return [];
462
-
463
- if (!Number.isFinite(totalNotes) || totalNotes <= 0) {
464
- return uniqueProfiles.map((profileId) => ({ profileId, assignedNotes: defaultMaxNotes }));
465
- }
466
-
467
- const base = Math.floor(totalNotes / uniqueProfiles.length);
468
- const remainder = totalNotes % uniqueProfiles.length;
469
- const plan = uniqueProfiles.map((profileId, index) => ({
470
- profileId,
471
- assignedNotes: base + (index < remainder ? 1 : 0),
472
- }));
473
- return plan.filter((item) => item.assignedNotes > 0);
474
- }
475
-
476
- function buildDynamicWavePlan({ profiles, remainingNotes }) {
477
- const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
478
- if (uniqueProfiles.length === 0) return [];
479
- const remaining = Math.max(0, Number(remainingNotes) || 0);
480
- if (remaining <= 0) return [];
481
-
482
- if (remaining < uniqueProfiles.length) {
483
- return uniqueProfiles.slice(0, remaining).map((profileId) => ({
484
- profileId,
485
- assignedNotes: 1,
486
- }));
487
- }
488
-
489
- const waveTotal = remaining - (remaining % uniqueProfiles.length);
490
- return buildEvenShardPlan({
491
- profiles: uniqueProfiles,
492
- totalNotes: waveTotal > 0 ? waveTotal : remaining,
493
- defaultMaxNotes: 1,
494
- });
495
- }
496
-
497
- function createProfileStats(spec) {
498
- return {
499
- assignedNotes: spec.assignedNotes,
500
- openedNotes: 0,
501
- commentsHarvestRuns: 0,
502
- commentsCollected: 0,
503
- commentsExpected: 0,
504
- commentsReachedBottomCount: 0,
505
- likesHitCount: 0,
506
- likesNewCount: 0,
507
- likesSkippedCount: 0,
508
- likesAlreadyCount: 0,
509
- likesDedupCount: 0,
510
- searchCount: 0,
511
- rollbackCount: 0,
512
- returnToSearchCount: 0,
513
- operationErrors: 0,
514
- recoveryFailed: 0,
515
- terminalCode: null,
516
- commentPaths: [],
517
- likeSummaryPaths: [],
518
- likeStatePaths: [],
519
- };
520
- }
521
-
522
- function pushUnique(arr, value) {
523
- const text = String(value || '').trim();
524
- if (!text) return;
525
- if (!arr.includes(text)) arr.push(text);
526
- }
527
-
528
- function toNumber(value, fallback = 0) {
529
- const num = Number(value);
530
- return Number.isFinite(num) ? num : fallback;
531
- }
532
-
533
- function updateProfileStatsFromEvent(stats, payload) {
534
- const event = String(payload?.event || '').trim();
535
- if (!event) return;
536
-
537
- if (event === 'autoscript:operation_error') {
538
- stats.operationErrors += 1;
539
- return;
540
- }
541
- if (event === 'autoscript:operation_recovery_failed') {
542
- stats.recoveryFailed += 1;
543
- return;
544
- }
545
- if (event === 'autoscript:operation_terminal') {
546
- stats.terminalCode = String(payload.code || '').trim() || stats.terminalCode;
547
- return;
548
- }
549
- if (event !== 'autoscript:operation_done') return;
550
-
551
- const operationId = String(payload.operationId || '').trim();
552
- const rawResult = payload.result && typeof payload.result === 'object' ? payload.result : {};
553
- const result = rawResult.result && typeof rawResult.result === 'object'
554
- ? rawResult.result
555
- : rawResult;
556
-
557
- if (operationId === 'open_first_detail' || operationId === 'open_next_detail') {
558
- if (result.opened === true) {
559
- stats.openedNotes += 1;
560
- }
561
- return;
562
- }
563
-
564
- if (operationId === 'submit_search') {
565
- stats.searchCount = Math.max(stats.searchCount, toNumber(result.searchCount, stats.searchCount));
566
- return;
567
- }
568
-
569
- if (operationId === 'comments_harvest') {
570
- stats.commentsHarvestRuns += 1;
571
- stats.commentsCollected += toNumber(result.collected, 0);
572
- stats.commentsExpected += Math.max(0, toNumber(result.expectedCommentsCount, 0));
573
- if (result.reachedBottom === true) stats.commentsReachedBottomCount += 1;
574
- pushUnique(stats.commentPaths, result.commentsPath);
575
- return;
576
- }
577
-
578
- if (operationId === 'comment_like') {
579
- stats.likesHitCount += toNumber(result.hitCount, 0);
580
- stats.likesNewCount += toNumber(result.likedCount, 0);
581
- stats.likesSkippedCount += toNumber(result.skippedCount, 0);
582
- stats.likesAlreadyCount += toNumber(result.alreadyLikedSkipped, 0);
583
- stats.likesDedupCount += toNumber(result.dedupSkipped, 0);
584
- pushUnique(stats.likeSummaryPaths, result.summaryPath);
585
- pushUnique(stats.likeStatePaths, result.likeStatePath);
586
- pushUnique(stats.commentPaths, result.commentsPath);
587
- return;
588
- }
589
-
590
- if (operationId === 'close_detail') {
591
- stats.rollbackCount = Math.max(stats.rollbackCount, toNumber(result.rollbackCount, stats.rollbackCount));
592
- stats.returnToSearchCount = Math.max(stats.returnToSearchCount, toNumber(result.returnToSearchCount, stats.returnToSearchCount));
593
- }
594
- }
595
-
596
- function isObject(value) {
597
- return value !== null && typeof value === 'object' && !Array.isArray(value);
598
- }
599
-
600
- async function runProfile(spec, argv, baseOverrides = {}) {
601
- const profileId = spec.profileId;
602
- const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
603
- const busPublishable = new Set([
604
- 'xhs.unified.start',
605
- 'xhs.unified.stop',
606
- 'xhs.unified.stop_screenshot',
607
- 'xhs.unified.profile_failed',
608
- 'autoscript:operation_done',
609
- 'autoscript:operation_progress',
610
- 'autoscript:operation_error',
611
- 'autoscript:operation_terminal',
612
- 'autoscript:operation_recovery_failed',
613
- ]);
614
- let currentRunId = null;
615
- const overrides = {
616
- ...baseOverrides,
617
- maxNotes: spec.assignedNotes,
618
- outputRoot: spec.outputRoot,
619
- };
620
- if (spec.sharedHarvestPath) overrides.sharedHarvestPath = spec.sharedHarvestPath;
621
- if (spec.searchSerialKey) overrides.searchSerialKey = spec.searchSerialKey;
622
- if (spec.seedCollectCount !== undefined && spec.seedCollectCount !== null) {
623
- overrides.seedCollectCount = parseNonNegativeInt(spec.seedCollectCount, 0);
624
- }
625
- if (spec.seedCollectMaxRounds !== undefined && spec.seedCollectMaxRounds !== null) {
626
- overrides.seedCollectMaxRounds = parseNonNegativeInt(spec.seedCollectMaxRounds, 0);
627
- }
628
- const options = await buildTemplateOptions(argv, profileId, overrides);
629
- console.log(JSON.stringify({
630
- event: 'xhs.unified.flow_gate',
631
- profileId,
632
- throttle: options.throttle,
633
- noteIntervalMs: options.noteIntervalMs,
634
- tabCount: options.tabCount,
635
- tabOpenDelayMs: options.tabOpenDelayMs,
636
- submitMethod: options.submitMethod,
637
- submitActionDelayMinMs: options.submitActionDelayMinMs,
638
- submitActionDelayMaxMs: options.submitActionDelayMaxMs,
639
- submitSettleMinMs: options.submitSettleMinMs,
640
- submitSettleMaxMs: options.submitSettleMaxMs,
641
- commentsScrollStepMin: options.commentsScrollStepMin,
642
- commentsScrollStepMax: options.commentsScrollStepMax,
643
- commentsSettleMinMs: options.commentsSettleMinMs,
644
- commentsSettleMaxMs: options.commentsSettleMaxMs,
645
- }));
646
- const script = buildXhsUnifiedAutoscript(options);
647
- const normalized = normalizeAutoscript(script, `xhs-unified:${profileId}`);
648
- const validation = validateAutoscript(normalized);
649
- if (!validation.ok) throw new Error(`autoscript validation failed for ${profileId}: ${validation.errors.join('; ')}`);
650
-
651
- await ensureDir(path.dirname(spec.logPath));
652
- const stats = createProfileStats(spec);
653
- const reporter = createTaskReporter({
654
- profileId,
655
- keyword: options.keyword,
656
- uiTriggerId: options.uiTriggerId,
657
- });
658
- let activeRunId = '';
659
- const pushTaskSnapshot = (status = 'running') => {
660
- if (!activeRunId) return;
661
- void reporter.update(activeRunId, {
662
- status,
663
- phase: 'unified',
664
- progress: {
665
- total: Math.max(0, Number(spec.assignedNotes) || 0),
666
- processed: Math.max(0, Number(stats.openedNotes) || 0),
667
- failed: Math.max(0, Number(stats.operationErrors) || 0),
668
- },
669
- stats: {
670
- notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
671
- commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
672
- likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
673
- repliesGenerated: 0,
674
- imagesDownloaded: 0,
675
- ocrProcessed: 0,
676
- },
677
- });
678
- };
679
-
680
- const logEvent = (payload) => {
681
- const eventPayload = isObject(payload) ? payload : { event: 'autoscript:raw', payload };
682
- const merged = {
683
- ts: eventPayload.ts || nowIso(),
684
- profileId,
685
- ...eventPayload,
686
- };
687
- if (!merged.runId && currentRunId) merged.runId = currentRunId;
688
- fs.appendFileSync(spec.logPath, `${JSON.stringify(merged)}\n`, 'utf8');
689
- console.log(JSON.stringify(merged));
690
- updateProfileStatsFromEvent(stats, merged);
691
- if (busEnabled && busPublishable.has(String(merged.event || '').trim())) {
692
- void publishBusEvent(merged);
693
- }
694
- const eventName = String(merged.event || '').trim();
695
- const mergedRunId = String(merged.runId || '').trim();
696
- if (mergedRunId) activeRunId = mergedRunId;
697
- const shouldReportEvent = (
698
- eventName === 'xhs.unified.start'
699
- || eventName === 'xhs.unified.stop'
700
- || eventName === 'autoscript:start'
701
- || eventName === 'autoscript:stop'
702
- || eventName === 'autoscript:impact'
703
- || eventName === 'autoscript:operation_start'
704
- || eventName === 'autoscript:operation_progress'
705
- || eventName === 'autoscript:operation_done'
706
- || eventName === 'autoscript:operation_error'
707
- || eventName === 'autoscript:operation_recovery_failed'
708
- );
709
- if (activeRunId && shouldReportEvent) {
710
- void reporter.pushEvent(activeRunId, eventName, merged);
711
- }
712
- if (
713
- eventName === 'autoscript:operation_done'
714
- || eventName === 'autoscript:operation_error'
715
- || eventName === 'autoscript:operation_recovery_failed'
716
- || eventName === 'autoscript:impact'
717
- ) {
718
- pushTaskSnapshot('running');
719
- }
720
- if (
721
- merged.event === 'autoscript:operation_error'
722
- && String(merged.operationId || '').trim() === 'abort_on_login_guard'
723
- && String(merged.message || '').includes('LOGIN_GUARD_DETECTED')
724
- ) {
725
- try {
726
- markProfileInvalid(profileId, 'login_guard_runtime');
727
- } catch {
728
- // ignore account state update errors during runtime logging
729
- }
730
- }
731
- };
732
-
733
- const runner = new AutoscriptRunner(normalized, {
734
- profileId,
735
- log: logEvent,
736
- });
737
-
738
- const running = await runner.start();
739
- currentRunId = running?.runId || currentRunId;
740
- activeRunId = String(running?.runId || '').trim();
741
- if (activeRunId) {
742
- await reporter.ensureCreated(activeRunId, {
743
- status: 'starting',
744
- phase: 'unified',
745
- progress: {
746
- total: Math.max(0, Number(spec.assignedNotes) || 0),
747
- processed: 0,
748
- failed: 0,
749
- },
750
- });
751
- await reporter.update(activeRunId, {
752
- status: 'running',
753
- phase: 'unified',
754
- progress: {
755
- total: Math.max(0, Number(spec.assignedNotes) || 0),
756
- processed: 0,
757
- failed: 0,
758
- },
759
- stats: {
760
- notesProcessed: 0,
761
- commentsCollected: 0,
762
- likesPerformed: 0,
763
- repliesGenerated: 0,
764
- imagesDownloaded: 0,
765
- ocrProcessed: 0,
766
- },
767
- });
768
- }
769
- logEvent({
770
- event: 'xhs.unified.start',
771
- runId: running?.runId || null,
772
- keyword: options.keyword,
773
- env: options.env,
774
- maxNotes: options.maxNotes,
775
- assignedNotes: spec.assignedNotes,
776
- outputRoot: options.outputRoot,
777
- parallelRunLabel: spec.runLabel,
778
- });
779
- const done = await running.done;
780
-
781
- const stopPayload = {
782
- event: 'xhs.unified.stop',
783
- profileId,
784
- runId: done?.runId || running.runId,
785
- reason: done?.reason || null,
786
- startedAt: done?.startedAt || null,
787
- stoppedAt: done?.stoppedAt || null,
788
- };
789
- logEvent(stopPayload);
790
-
791
- const stopScreenshotPath = await captureStopScreenshot({
792
- profileId,
793
- reason: stopPayload.reason || 'stop',
794
- outputDir: path.dirname(spec.logPath),
795
- });
796
- if (stopScreenshotPath) {
797
- logEvent({
798
- event: 'xhs.unified.stop_screenshot',
799
- profileId,
800
- runId: stopPayload.runId,
801
- reason: stopPayload.reason || null,
802
- path: stopScreenshotPath,
803
- });
804
- }
805
-
806
- stats.stopReason = stopPayload.reason;
807
- const finalRunId = String(stopPayload.runId || activeRunId || '').trim();
808
- if (finalRunId) {
809
- activeRunId = finalRunId;
810
- const failed = stopPayload.reason === 'script_failure';
811
- await reporter.update(finalRunId, {
812
- status: failed ? 'failed' : 'completed',
813
- phase: 'unified',
814
- progress: {
815
- total: Math.max(0, Number(spec.assignedNotes) || 0),
816
- processed: Math.max(0, Number(stats.openedNotes) || 0),
817
- failed: Math.max(0, Number(stats.operationErrors) || 0),
818
- },
819
- stats: {
820
- notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
821
- commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
822
- likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
823
- repliesGenerated: 0,
824
- imagesDownloaded: 0,
825
- ocrProcessed: 0,
826
- },
827
- });
828
- if (failed) {
829
- await reporter.setError(finalRunId, `autoscript stopped: ${stopPayload.reason || 'script_failure'}`, 'SCRIPT_FAILURE', false);
830
- }
831
- }
832
-
833
- const profileResult = {
834
- ok: stopPayload.reason !== 'script_failure',
835
- profileId,
836
- runId: stopPayload.runId,
837
- reason: stopPayload.reason,
838
- assignedNotes: spec.assignedNotes,
839
- outputRoot: options.outputRoot,
840
- logPath: spec.logPath,
841
- stopScreenshotPath: stopScreenshotPath || null,
842
- stats,
843
- };
844
-
845
- await writeJson(spec.summaryPath, profileResult);
846
- return profileResult;
847
- }
848
-
849
- async function runWithConcurrency(items, concurrency, worker) {
850
- const limit = Math.max(1, Math.min(items.length || 1, concurrency || 1));
851
- const results = new Array(items.length);
852
- let cursor = 0;
853
-
854
- async function consume() {
855
- for (;;) {
856
- const index = cursor;
857
- cursor += 1;
858
- if (index >= items.length) return;
859
- results[index] = await worker(items[index], index);
860
- }
861
- }
862
-
863
- await Promise.all(Array.from({ length: limit }, () => consume()));
864
- return results;
865
- }
866
-
867
- async function readJsonlRows(filePath) {
868
- try {
869
- const text = await fsp.readFile(filePath, 'utf8');
870
- return text
871
- .split('\n')
872
- .map((line) => line.trim())
873
- .filter(Boolean)
874
- .map((line) => {
875
- try {
876
- return JSON.parse(line);
877
- } catch {
878
- return null;
879
- }
880
- })
881
- .filter(Boolean);
882
- } catch {
883
- return [];
884
- }
885
- }
886
-
887
- function buildCommentDedupKey(row) {
888
- const noteId = String(row?.noteId || '').trim();
889
- const userId = String(row?.userId || '').trim();
890
- const content = String(row?.content || '').replace(/\s+/g, ' ').trim();
891
- return `${noteId}|${userId}|${content}`;
892
- }
893
-
894
- async function mergeProfileOutputs({
895
- results,
896
- mergedDir,
897
- keyword,
898
- env,
899
- totalNotes,
900
- parallel,
901
- concurrency,
902
- skippedProfiles = [],
903
- }) {
904
- const success = results.filter((item) => item && item.ok);
905
- const failed = results.filter((item) => !item || item.ok === false);
906
-
907
- const mergedComments = [];
908
- const seenCommentKeys = new Set();
909
- const mergedLikeSummaries = [];
910
-
911
- for (const result of success) {
912
- for (const commentsPath of result.stats.commentPaths || []) {
913
- const rows = await readJsonlRows(commentsPath);
914
- for (const row of rows) {
915
- const key = buildCommentDedupKey(row);
916
- if (!key || seenCommentKeys.has(key)) continue;
917
- seenCommentKeys.add(key);
918
- mergedComments.push({
919
- profileId: result.profileId,
920
- ...row,
921
- });
922
- }
923
- }
924
-
925
- for (const summaryPath of result.stats.likeSummaryPaths || []) {
926
- try {
927
- const raw = await fsp.readFile(summaryPath, 'utf8');
928
- const summary = JSON.parse(raw);
929
- mergedLikeSummaries.push({ profileId: result.profileId, summaryPath, summary });
930
- } catch {
931
- continue;
932
- }
933
- }
934
- }
935
-
936
- await ensureDir(mergedDir);
937
- const mergedCommentsPath = path.join(mergedDir, 'comments.merged.jsonl');
938
- if (mergedComments.length > 0) {
939
- const payload = mergedComments.map((row) => JSON.stringify(row)).join('\n');
940
- await fsp.writeFile(mergedCommentsPath, `${payload}\n`, 'utf8');
941
- }
942
-
943
- const mergedLikeSummaryPath = path.join(mergedDir, 'likes.merged.json');
944
- const likeTotals = {
945
- noteSummaries: mergedLikeSummaries.length,
946
- scannedCount: 0,
947
- hitCount: 0,
948
- likedCount: 0,
949
- skippedCount: 0,
950
- reachedBottomCount: 0,
951
- };
952
- for (const item of mergedLikeSummaries) {
953
- const summary = item.summary || {};
954
- likeTotals.scannedCount += toNumber(summary.scannedCount, 0);
955
- likeTotals.hitCount += toNumber(summary.hitCount, 0);
956
- likeTotals.likedCount += toNumber(summary.likedCount, 0);
957
- likeTotals.skippedCount += toNumber(summary.skippedCount, 0);
958
- if (summary.reachedBottom === true) likeTotals.reachedBottomCount += 1;
959
- }
960
- await writeJson(mergedLikeSummaryPath, {
961
- generatedAt: nowIso(),
962
- totals: likeTotals,
963
- items: mergedLikeSummaries,
964
- });
965
-
966
- const totals = {
967
- profilesTotal: results.length,
968
- profilesSucceeded: success.length,
969
- profilesFailed: failed.length,
970
- assignedNotes: 0,
971
- openedNotes: 0,
972
- commentsHarvestRuns: 0,
973
- commentsCollected: 0,
974
- commentsExpected: 0,
975
- commentsReachedBottomCount: 0,
976
- likesHitCount: 0,
977
- likesNewCount: 0,
978
- likesSkippedCount: 0,
979
- likesAlreadyCount: 0,
980
- likesDedupCount: 0,
981
- searchCount: 0,
982
- rollbackCount: 0,
983
- returnToSearchCount: 0,
984
- operationErrors: 0,
985
- recoveryFailed: 0,
986
- };
987
-
988
- for (const result of results) {
989
- const stats = result?.stats || {};
990
- totals.assignedNotes += toNumber(result?.assignedNotes ?? stats.assignedNotes, 0);
991
- totals.openedNotes += toNumber(stats.openedNotes, 0);
992
- totals.commentsHarvestRuns += toNumber(stats.commentsHarvestRuns, 0);
993
- totals.commentsCollected += toNumber(stats.commentsCollected, 0);
994
- totals.commentsExpected += toNumber(stats.commentsExpected, 0);
995
- totals.commentsReachedBottomCount += toNumber(stats.commentsReachedBottomCount, 0);
996
- totals.likesHitCount += toNumber(stats.likesHitCount, 0);
997
- totals.likesNewCount += toNumber(stats.likesNewCount, 0);
998
- totals.likesSkippedCount += toNumber(stats.likesSkippedCount, 0);
999
- totals.likesAlreadyCount += toNumber(stats.likesAlreadyCount, 0);
1000
- totals.likesDedupCount += toNumber(stats.likesDedupCount, 0);
1001
- totals.searchCount += toNumber(stats.searchCount, 0);
1002
- totals.rollbackCount += toNumber(stats.rollbackCount, 0);
1003
- totals.returnToSearchCount += toNumber(stats.returnToSearchCount, 0);
1004
- totals.operationErrors += toNumber(stats.operationErrors, 0);
1005
- totals.recoveryFailed += toNumber(stats.recoveryFailed, 0);
1006
- }
1007
-
1008
- const mergedSummary = {
1009
- generatedAt: nowIso(),
1010
- keyword,
1011
- env,
1012
- totalNotes: Number.isFinite(totalNotes) ? totalNotes : null,
1013
- execution: {
1014
- parallel,
1015
- concurrency,
1016
- },
1017
- skippedProfiles,
1018
- totals,
1019
- artifacts: {
1020
- mergedCommentsPath: mergedComments.length > 0 ? mergedCommentsPath : null,
1021
- mergedLikeSummaryPath,
1022
- },
1023
- profiles: results,
1024
- };
1025
-
1026
- const summaryPath = path.join(mergedDir, 'summary.json');
1027
- await writeJson(summaryPath, mergedSummary);
1028
- return {
1029
- summaryPath,
1030
- mergedSummary,
1031
- };
1032
- }
1033
-
1034
90
  export async function runUnified(argv, overrides = {}) {
1035
91
  const keyword = String(argv.keyword || argv.k || '').trim();
1036
92
  if (!keyword) throw new Error('missing --keyword');
93
+ cleanupIncompleteProfiles();
1037
94
 
95
+ const stage = resolveXhsStage(argv);
1038
96
  const env = String(argv.env || 'prod').trim() || 'prod';
1039
97
  const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
1040
98
  let profiles = parseProfiles(argv);
@@ -1049,16 +107,31 @@ export async function runUnified(argv, overrides = {}) {
1049
107
  }
1050
108
  }
1051
109
  if (profiles.length === 0) throw new Error('missing --profile/--profiles/--profilepool and no valid xiaohongshu account profile found');
1052
- await Promise.all(profiles.map((profileId) => ensureProfileSession(profileId)));
110
+ profiles = assertProfilesUsable(profiles);
111
+ const planOnly = parseBool(argv['plan-only'], false);
112
+ const headless = parseBool(argv.headless, false);
1053
113
  const defaultMaxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
1054
114
  const totalNotes = parseNonNegativeInt(argv['total-notes'] ?? argv['total-target'], 0);
1055
115
  const hasTotalTarget = totalNotes > 0;
1056
- const maxWaves = parseIntFlag(argv['max-waves'], 40, 1);
1057
- const parallelRequested = parseBool(argv.parallel, false);
1058
- const configuredConcurrency = parseIntFlag(argv.concurrency, profiles.length || 1, 1);
1059
- const planOnly = parseBool(argv['plan-only'], false);
1060
- const seedCollectCountFlag = parseNonNegativeInt(argv['seed-collect-count'], 0);
1061
- const seedCollectRoundsFlag = parseNonNegativeInt(argv['seed-collect-rounds'], 6);
116
+ let maxWaves = parseIntFlag(argv['max-waves'], 40, 1);
117
+ let parallelRequested = parseBool(argv.parallel, false);
118
+ let configuredConcurrency = parseIntFlag(argv.concurrency, profiles.length || 1, 1);
119
+ const hasSeedCollectCountFlag = argv['seed-collect-count'] !== undefined && argv['seed-collect-count'] !== null && argv['seed-collect-count'] !== '';
120
+ const hasSeedCollectRoundsFlag = argv['seed-collect-rounds'] !== undefined && argv['seed-collect-rounds'] !== null && argv['seed-collect-rounds'] !== '';
121
+ const seedCollectCountFlag = hasSeedCollectCountFlag ? parseNonNegativeInt(argv['seed-collect-count'], 0) : 0;
122
+ const seedCollectRoundsFlag = hasSeedCollectRoundsFlag ? parseNonNegativeInt(argv['seed-collect-rounds'], 0) : 0;
123
+
124
+ if (stage === 'links') {
125
+ if (profiles.length !== 1) {
126
+ throw new Error('stage=links requires exactly one profile (no sharding)');
127
+ }
128
+ if (hasTotalTarget) {
129
+ throw new Error('stage=links does not support --total-notes/--total-target sharding');
130
+ }
131
+ maxWaves = 1;
132
+ parallelRequested = false;
133
+ configuredConcurrency = 1;
134
+ }
1062
135
 
1063
136
  const runLabel = formatRunLabel();
1064
137
  const baseOutputRoot = resolveDownloadRoot(argv['output-root']);
@@ -1076,6 +149,21 @@ export async function runUnified(argv, overrides = {}) {
1076
149
  'merged',
1077
150
  `run-${runLabel}`,
1078
151
  );
152
+ if (!planOnly) {
153
+ const serviceReset = await resetTaskServices(argv, {
154
+ rootDir: process.cwd(),
155
+ debugActionLogPath: path.join(mergedDir, 'profiles', 'input-actions.jsonl'),
156
+ });
157
+ console.log(JSON.stringify({
158
+ event: 'xhs.unified.service_reset',
159
+ ok: serviceReset.ok,
160
+ skipped: serviceReset.skipped === true,
161
+ reason: serviceReset.reason || null,
162
+ actionLogPath: serviceReset.actionLogPath || null,
163
+ statusReady: Boolean(serviceReset.status?.json?.ready),
164
+ }));
165
+ await Promise.all(profiles.map((profileId) => ensureProfileSession(profileId, { headless })));
166
+ }
1079
167
  const planPath = path.join(mergedDir, 'plan.json');
1080
168
  const completedAtStart = hasTotalTarget
1081
169
  ? await collectCompletedNoteIds(baseOutputRoot, env, keyword)
@@ -1103,6 +191,8 @@ export async function runUnified(argv, overrides = {}) {
1103
191
  error: error?.message || String(error),
1104
192
  stats: {
1105
193
  assignedNotes: spec.assignedNotes,
194
+ linksCollected: 0,
195
+ linksPaths: [],
1106
196
  openedNotes: 0,
1107
197
  commentsHarvestRuns: 0,
1108
198
  commentsCollected: 0,
@@ -1192,18 +282,20 @@ export async function runUnified(argv, overrides = {}) {
1192
282
  ? Math.min(plan.length, configuredConcurrency)
1193
283
  : 1;
1194
284
  const waveTag = `wave-${String(wave).padStart(3, '0')}`;
285
+ const waveAssignedNotes = plan.reduce((sum, item) => sum + Math.max(0, Number(item?.assignedNotes || 0)), 0);
1195
286
  const specs = plan.map((item, index) => {
1196
287
  const shardId = sanitizeForPath(item.profileId, 'profile');
1197
288
  const shardOutputRoot = useShardRoots
1198
289
  ? path.join(baseOutputRoot, 'shards', shardId)
1199
290
  : outputRootArg;
1200
- const defaultSeedCollectCount = Math.max(1, Math.min(
1201
- Number(item.assignedNotes || 1),
1202
- Math.max(1, plan.length * 2),
1203
- ));
291
+ const defaultSeedCollectCount = Math.max(1, waveAssignedNotes || Number(item.assignedNotes || 1) || 1);
1204
292
  const seedCollectCount = index === 0
1205
293
  ? (seedCollectCountFlag > 0 ? seedCollectCountFlag : defaultSeedCollectCount)
1206
294
  : 0;
295
+ const defaultSeedCollectMaxRounds = Math.max(6, Math.ceil(Math.max(1, seedCollectCount) / 2));
296
+ const seedCollectMaxRounds = index === 0
297
+ ? (seedCollectRoundsFlag > 0 ? seedCollectRoundsFlag : defaultSeedCollectMaxRounds)
298
+ : 0;
1207
299
  return {
1208
300
  ...item,
1209
301
  runLabel,
@@ -1214,7 +306,7 @@ export async function runUnified(argv, overrides = {}) {
1214
306
  sharedHarvestPath,
1215
307
  searchSerialKey,
1216
308
  seedCollectCount,
1217
- seedCollectMaxRounds: index === 0 ? seedCollectRoundsFlag : 0,
309
+ seedCollectMaxRounds,
1218
310
  };
1219
311
  });
1220
312
 
@@ -1376,10 +468,12 @@ async function main() {
1376
468
  ' --concurrency <n> 并行度(默认=账号数)',
1377
469
  ' --resume <bool> 断点续传(默认 false)',
1378
470
  ' --incremental-max <bool> max-notes 作为增量配额(默认 true)',
471
+ ' --stage <name> 阶段:full|links|content|like|reply(默认 full)',
472
+ ' links: 搜索+逐条点开采链(xsec_token, 单账号不分片); content: 搜索+采链+内容; like: 搜索+采链+内容+点赞; reply: 搜索+采链+内容+回复',
1379
473
  ' --plan-only 只生成分片计划,不执行',
1380
474
  ' --output-root <path> 输出根目录(并行时自动分 profile shard)',
1381
- ' --seed-collect-count <n> 首账号预采样去重ID数量(默认按分片自动)',
1382
- ' --seed-collect-rounds <n> 首账号预采样滚动轮数(默认6',
475
+ ' --seed-collect-count <n> 链接预采样数量(默认=max-notes)',
476
+ ' --seed-collect-rounds <n> 链接预采样滚动轮数(默认=max(6,ceil(max-notes/2)))',
1383
477
  ' --search-serial-key <key> 搜索阶段串行锁key(默认自动生成)',
1384
478
  ' --shared-harvest-path <path> 共享harvest去重列表路径(默认自动生成)',
1385
479
  ' --search-submit-method <m> 搜索提交方式 click|enter|form(默认 flow-gate)',
@@ -1387,6 +481,7 @@ async function main() {
1387
481
  ' --operation-min-interval <ms> 基础操作最小间隔(默认 flow-gate)',
1388
482
  ' --event-cooldown <ms> 基础事件冷却(默认 flow-gate)',
1389
483
  ' --pacing-jitter <ms> 基础抖动区间(默认 flow-gate)',
484
+ ' --service-reset <bool> 任务前复位并重启 ui cli 服务(默认 true)',
1390
485
  ].join('\n'));
1391
486
  return;
1392
487
  }