@web-auto/webauto 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +983 -128
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2423 -469
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +256 -31
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +13 -4
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -11,6 +11,8 @@ import { AutoscriptRunner } from '../../../modules/camo-runtime/src/autoscript/r
11
11
  import { syncXhsAccountsByProfiles } from './lib/account-detect.mjs';
12
12
  import { markProfileInvalid } from './lib/account-store.mjs';
13
13
  import { listProfilesForPool } from './lib/profilepool.mjs';
14
+ import { runCamo } from './lib/camo-cli.mjs';
15
+ import { publishBusEvent } from './lib/bus-publish.mjs';
14
16
 
15
17
  function nowIso() {
16
18
  return new Date().toISOString();
@@ -65,15 +67,125 @@ function sanitizeForPath(name, fallback = 'unknown') {
65
67
  return cleaned || fallback;
66
68
  }
67
69
 
70
+ const XHS_HOME_URL = 'https://www.xiaohongshu.com';
71
+
72
+ async function ensureProfileSession(profileId) {
73
+ const id = String(profileId || '').trim();
74
+ if (!id) return false;
75
+ const ret = runCamo(['start', id, '--url', XHS_HOME_URL], {
76
+ rootDir: process.cwd(),
77
+ timeoutMs: 60000,
78
+ });
79
+ if (ret?.ok) {
80
+ runCamo(['goto', id, XHS_HOME_URL], { rootDir: process.cwd(), timeoutMs: 60000 });
81
+ }
82
+ return Boolean(ret?.ok);
83
+ }
84
+
85
+ function buildStopScreenshotPath(profileId, reason, outputDir) {
86
+ const safeProfile = sanitizeForPath(profileId, 'profile');
87
+ const safeReason = sanitizeForPath(reason || 'stop', 'stop');
88
+ const file = `stop-${safeProfile}-${safeReason}.png`;
89
+ return path.join(outputDir, file);
90
+ }
91
+
92
+ async function captureStopScreenshot({ profileId, reason, outputDir }) {
93
+ const outDir = String(outputDir || '').trim();
94
+ if (!outDir) return null;
95
+ try {
96
+ await fsp.mkdir(outDir, { recursive: true });
97
+ } catch {}
98
+ const outputPath = buildStopScreenshotPath(profileId, reason, outDir);
99
+ const tryCapture = () => runCamo(['screenshot', profileId, '--output', outputPath], {
100
+ rootDir: process.cwd(),
101
+ timeoutMs: 60000,
102
+ });
103
+ let ret = tryCapture();
104
+ if (!ret?.ok) {
105
+ await ensureProfileSession(profileId);
106
+ ret = tryCapture();
107
+ }
108
+ if (ret?.ok) return outputPath;
109
+ return null;
110
+ }
111
+
112
+ function sanitizeKeywordDirParts({ env, keyword }) {
113
+ return {
114
+ safeEnv: sanitizeForPath(env, 'prod'),
115
+ safeKeyword: sanitizeForPath(keyword, 'unknown'),
116
+ };
117
+ }
118
+
68
119
  function resolveDownloadRoot(customRoot = '') {
69
120
  const fromArg = String(customRoot || '').trim();
70
121
  if (fromArg) return path.resolve(fromArg);
71
122
  const fromEnv = String(process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR || '').trim();
72
123
  if (fromEnv) return path.resolve(fromEnv);
124
+ if (process.platform === 'win32') {
125
+ try {
126
+ if (fs.existsSync('D:\\')) return 'D:\\webauto';
127
+ } catch {
128
+ // ignore
129
+ }
130
+ const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
131
+ return path.join(home, '.webauto');
132
+ }
73
133
  const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
74
134
  return path.join(home, '.webauto', 'download');
75
135
  }
76
136
 
137
+ const NON_NOTE_DIR_NAMES = new Set([
138
+ 'merged',
139
+ 'profiles',
140
+ 'like-evidence',
141
+ 'virtual-like',
142
+ 'smart-reply',
143
+ 'comment-match',
144
+ 'discover-fallback',
145
+ ]);
146
+
147
+ async function collectKeywordDirs(baseOutputRoot, env, keyword) {
148
+ const { safeEnv, safeKeyword } = sanitizeKeywordDirParts({ env, keyword });
149
+ const dirs = [
150
+ path.join(baseOutputRoot, 'xiaohongshu', safeEnv, safeKeyword),
151
+ ];
152
+ const shardsRoot = path.join(baseOutputRoot, 'shards');
153
+ try {
154
+ const entries = await fsp.readdir(shardsRoot, { withFileTypes: true });
155
+ for (const entry of entries) {
156
+ if (!entry.isDirectory()) continue;
157
+ dirs.push(path.join(shardsRoot, entry.name, 'xiaohongshu', safeEnv, safeKeyword));
158
+ }
159
+ } catch {
160
+ // ignore
161
+ }
162
+ return Array.from(new Set(dirs));
163
+ }
164
+
165
+ async function collectCompletedNoteIds(baseOutputRoot, env, keyword) {
166
+ const keywordDirs = await collectKeywordDirs(baseOutputRoot, env, keyword);
167
+ const completed = new Set();
168
+ for (const keywordDir of keywordDirs) {
169
+ let entries = [];
170
+ try {
171
+ entries = await fsp.readdir(keywordDir, { withFileTypes: true });
172
+ } catch {
173
+ continue;
174
+ }
175
+ for (const entry of entries) {
176
+ if (!entry.isDirectory()) continue;
177
+ const noteId = String(entry.name || '').trim();
178
+ if (!noteId || noteId.startsWith('.') || noteId.startsWith('_')) continue;
179
+ if (NON_NOTE_DIR_NAMES.has(noteId)) continue;
180
+ completed.add(noteId);
181
+ }
182
+ }
183
+ return {
184
+ count: completed.size,
185
+ noteIds: Array.from(completed),
186
+ };
187
+ }
188
+
77
189
  async function ensureDir(dirPath) {
78
190
  await fsp.mkdir(dirPath, { recursive: true });
79
191
  }
@@ -88,23 +200,123 @@ async function appendJsonl(filePath, payload) {
88
200
  await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, 'utf8');
89
201
  }
90
202
 
203
+ function resolveUnifiedApiBaseUrl() {
204
+ const raw = String(
205
+ process.env.WEBAUTO_UNIFIED_API
206
+ || process.env.WEBAUTO_UNIFIED_URL
207
+ || 'http://127.0.0.1:7701',
208
+ ).trim();
209
+ return raw.replace(/\/+$/, '');
210
+ }
211
+
212
+ async function postUnifiedTaskRequest(baseUrl, pathname, payload) {
213
+ try {
214
+ const response = await fetch(`${baseUrl}${pathname}`, {
215
+ method: 'POST',
216
+ headers: { 'Content-Type': 'application/json' },
217
+ body: JSON.stringify(payload || {}),
218
+ signal: AbortSignal.timeout(2000),
219
+ });
220
+ if (!response.ok) return false;
221
+ return true;
222
+ } catch {
223
+ return false;
224
+ }
225
+ }
226
+
227
+ function createTaskReporter(seed = {}) {
228
+ const baseUrl = resolveUnifiedApiBaseUrl();
229
+ const staticSeed = {
230
+ profileId: String(seed.profileId || 'unknown').trim() || 'unknown',
231
+ keyword: String(seed.keyword || '').trim(),
232
+ phase: 'unified',
233
+ };
234
+ const createdRunIds = new Set();
235
+
236
+ const ensureCreated = async (runId, extra = {}) => {
237
+ const rid = String(runId || '').trim();
238
+ if (!rid) return false;
239
+ if (createdRunIds.has(rid)) return true;
240
+ const ok = await postUnifiedTaskRequest(baseUrl, '/api/v1/tasks', {
241
+ runId: rid,
242
+ ...staticSeed,
243
+ ...extra,
244
+ });
245
+ if (ok) createdRunIds.add(rid);
246
+ return ok;
247
+ };
248
+
249
+ const update = async (runId, patch = {}) => {
250
+ const rid = String(runId || '').trim();
251
+ if (!rid) return false;
252
+ await ensureCreated(rid, patch);
253
+ return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/update`, {
254
+ ...staticSeed,
255
+ ...patch,
256
+ });
257
+ };
258
+
259
+ const pushEvent = async (runId, type, data = {}) => {
260
+ const rid = String(runId || '').trim();
261
+ if (!rid) return false;
262
+ await ensureCreated(rid, data);
263
+ return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/events`, {
264
+ type: String(type || 'event').trim() || 'event',
265
+ data,
266
+ });
267
+ };
268
+
269
+ const setError = async (runId, message, code = 'TASK_ERROR', recoverable = false) => {
270
+ const rid = String(runId || '').trim();
271
+ if (!rid) return false;
272
+ return update(rid, {
273
+ error: {
274
+ message: String(message || 'task_error'),
275
+ code: String(code || 'TASK_ERROR'),
276
+ timestamp: Date.now(),
277
+ recoverable: recoverable === true,
278
+ },
279
+ });
280
+ };
281
+
282
+ return {
283
+ ensureCreated,
284
+ update,
285
+ pushEvent,
286
+ setError,
287
+ };
288
+ }
289
+
91
290
  function buildTemplateOptions(argv, profileId, overrides = {}) {
92
291
  const keyword = String(argv.keyword || argv.k || '').trim();
93
- const env = String(argv.env || 'debug').trim() || 'debug';
292
+ const env = String(argv.env || 'prod').trim() || 'prod';
94
293
  const inputMode = String(argv['input-mode'] || 'protocol').trim() || 'protocol';
95
294
  const headless = parseBool(argv.headless, false);
96
295
  const ocrCommand = String(argv['ocr-command'] || '').trim();
97
296
  const maxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
297
+ const maxComments = parseNonNegativeInt(argv['max-comments'], 0);
98
298
  const throttle = parseIntFlag(argv.throttle, 500, 100);
99
299
  const tabCount = parseIntFlag(argv['tab-count'], 4, 1);
100
300
  const noteIntervalMs = parseIntFlag(argv['note-interval'], 900, 200);
101
- const maxLikesPerRound = parseIntFlag(argv['max-likes'], 2, 1);
301
+ const maxLikesPerRound = parseNonNegativeInt(argv['max-likes'], 0);
102
302
  const matchMode = String(argv['match-mode'] || 'any').trim() || 'any';
103
303
  const matchMinHits = parseIntFlag(argv['match-min-hits'], 1, 1);
104
304
  const matchKeywords = String(argv['match-keywords'] || keyword).trim();
105
305
  const likeKeywords = String(argv['like-keywords'] || '').trim();
106
306
  const replyText = String(argv['reply-text'] || '感谢分享,已关注').trim() || '感谢分享,已关注';
107
307
  const outputRoot = String(argv['output-root'] || '').trim();
308
+ const resume = parseBool(argv.resume, false);
309
+ const incrementalMax = parseBool(argv['incremental-max'], true);
310
+ const sharedHarvestPath = String(overrides.sharedHarvestPath ?? argv['shared-harvest-path'] ?? '').trim();
311
+ const searchSerialKey = String(overrides.searchSerialKey ?? argv['search-serial-key'] ?? '').trim();
312
+ const seedCollectCount = parseNonNegativeInt(
313
+ overrides.seedCollectCount ?? argv['seed-collect-count'],
314
+ 0,
315
+ );
316
+ const seedCollectMaxRounds = parseNonNegativeInt(
317
+ overrides.seedCollectMaxRounds ?? argv['seed-collect-rounds'],
318
+ 0,
319
+ );
108
320
 
109
321
  const dryRun = parseBool(argv['dry-run'], false);
110
322
  const disableDryRun = parseBool(argv['no-dry-run'], false);
@@ -122,7 +334,10 @@ function buildTemplateOptions(argv, profileId, overrides = {}) {
122
334
  tabCount,
123
335
  noteIntervalMs,
124
336
  maxNotes,
337
+ maxComments,
125
338
  maxLikesPerRound,
339
+ resume,
340
+ incrementalMax,
126
341
  matchMode,
127
342
  matchMinHits,
128
343
  matchKeywords,
@@ -135,11 +350,15 @@ function buildTemplateOptions(argv, profileId, overrides = {}) {
135
350
  doReply: parseBool(argv['do-reply'], false) && !effectiveDryRun,
136
351
  doOcr: parseBool(argv['do-ocr'], false),
137
352
  persistComments: parseBool(argv['persist-comments'], !effectiveDryRun),
353
+ sharedHarvestPath,
354
+ searchSerialKey,
355
+ seedCollectCount,
356
+ seedCollectMaxRounds,
138
357
  };
139
358
  return { ...base, ...overrides };
140
359
  }
141
360
 
142
- function buildShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
361
+ function buildEvenShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
143
362
  const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
144
363
  if (uniqueProfiles.length === 0) return [];
145
364
 
@@ -156,6 +375,27 @@ function buildShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
156
375
  return plan.filter((item) => item.assignedNotes > 0);
157
376
  }
158
377
 
378
+ function buildDynamicWavePlan({ profiles, remainingNotes }) {
379
+ const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
380
+ if (uniqueProfiles.length === 0) return [];
381
+ const remaining = Math.max(0, Number(remainingNotes) || 0);
382
+ if (remaining <= 0) return [];
383
+
384
+ if (remaining < uniqueProfiles.length) {
385
+ return uniqueProfiles.slice(0, remaining).map((profileId) => ({
386
+ profileId,
387
+ assignedNotes: 1,
388
+ }));
389
+ }
390
+
391
+ const waveTotal = remaining - (remaining % uniqueProfiles.length);
392
+ return buildEvenShardPlan({
393
+ profiles: uniqueProfiles,
394
+ totalNotes: waveTotal > 0 ? waveTotal : remaining,
395
+ defaultMaxNotes: 1,
396
+ });
397
+ }
398
+
159
399
  function createProfileStats(spec) {
160
400
  return {
161
401
  assignedNotes: spec.assignedNotes,
@@ -211,10 +451,15 @@ function updateProfileStatsFromEvent(stats, payload) {
211
451
  if (event !== 'autoscript:operation_done') return;
212
452
 
213
453
  const operationId = String(payload.operationId || '').trim();
214
- const result = payload.result && typeof payload.result === 'object' ? payload.result : {};
454
+ const rawResult = payload.result && typeof payload.result === 'object' ? payload.result : {};
455
+ const result = rawResult.result && typeof rawResult.result === 'object'
456
+ ? rawResult.result
457
+ : rawResult;
215
458
 
216
459
  if (operationId === 'open_first_detail' || operationId === 'open_next_detail') {
217
- stats.openedNotes = Math.max(stats.openedNotes, toNumber(result.visited, stats.openedNotes));
460
+ if (result.opened === true) {
461
+ stats.openedNotes += 1;
462
+ }
218
463
  return;
219
464
  }
220
465
 
@@ -256,11 +501,31 @@ function isObject(value) {
256
501
 
257
502
  async function runProfile(spec, argv, baseOverrides = {}) {
258
503
  const profileId = spec.profileId;
504
+ const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
505
+ const busPublishable = new Set([
506
+ 'xhs.unified.start',
507
+ 'xhs.unified.stop',
508
+ 'xhs.unified.stop_screenshot',
509
+ 'xhs.unified.profile_failed',
510
+ 'autoscript:operation_done',
511
+ 'autoscript:operation_error',
512
+ 'autoscript:operation_terminal',
513
+ 'autoscript:operation_recovery_failed',
514
+ ]);
515
+ let currentRunId = null;
259
516
  const overrides = {
260
517
  ...baseOverrides,
261
518
  maxNotes: spec.assignedNotes,
262
519
  outputRoot: spec.outputRoot,
263
520
  };
521
+ if (spec.sharedHarvestPath) overrides.sharedHarvestPath = spec.sharedHarvestPath;
522
+ if (spec.searchSerialKey) overrides.searchSerialKey = spec.searchSerialKey;
523
+ if (spec.seedCollectCount !== undefined && spec.seedCollectCount !== null) {
524
+ overrides.seedCollectCount = parseNonNegativeInt(spec.seedCollectCount, 0);
525
+ }
526
+ if (spec.seedCollectMaxRounds !== undefined && spec.seedCollectMaxRounds !== null) {
527
+ overrides.seedCollectMaxRounds = parseNonNegativeInt(spec.seedCollectMaxRounds, 0);
528
+ }
264
529
  const options = buildTemplateOptions(argv, profileId, overrides);
265
530
  const script = buildXhsUnifiedAutoscript(options);
266
531
  const normalized = normalizeAutoscript(script, `xhs-unified:${profileId}`);
@@ -269,6 +534,31 @@ async function runProfile(spec, argv, baseOverrides = {}) {
269
534
 
270
535
  await ensureDir(path.dirname(spec.logPath));
271
536
  const stats = createProfileStats(spec);
537
+ const reporter = createTaskReporter({
538
+ profileId,
539
+ keyword: options.keyword,
540
+ });
541
+ let activeRunId = '';
542
+ const pushTaskSnapshot = (status = 'running') => {
543
+ if (!activeRunId) return;
544
+ void reporter.update(activeRunId, {
545
+ status,
546
+ phase: 'unified',
547
+ progress: {
548
+ total: Math.max(0, Number(spec.assignedNotes) || 0),
549
+ processed: Math.max(0, Number(stats.openedNotes) || 0),
550
+ failed: Math.max(0, Number(stats.operationErrors) || 0),
551
+ },
552
+ stats: {
553
+ notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
554
+ commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
555
+ likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
556
+ repliesGenerated: 0,
557
+ imagesDownloaded: 0,
558
+ ocrProcessed: 0,
559
+ },
560
+ });
561
+ };
272
562
 
273
563
  const logEvent = (payload) => {
274
564
  const eventPayload = isObject(payload) ? payload : { event: 'autoscript:raw', payload };
@@ -277,9 +567,38 @@ async function runProfile(spec, argv, baseOverrides = {}) {
277
567
  profileId,
278
568
  ...eventPayload,
279
569
  };
570
+ if (!merged.runId && currentRunId) merged.runId = currentRunId;
280
571
  fs.appendFileSync(spec.logPath, `${JSON.stringify(merged)}\n`, 'utf8');
281
572
  console.log(JSON.stringify(merged));
282
573
  updateProfileStatsFromEvent(stats, merged);
574
+ if (busEnabled && busPublishable.has(String(merged.event || '').trim())) {
575
+ void publishBusEvent(merged);
576
+ }
577
+ const eventName = String(merged.event || '').trim();
578
+ const mergedRunId = String(merged.runId || '').trim();
579
+ if (mergedRunId) activeRunId = mergedRunId;
580
+ const shouldReportEvent = (
581
+ eventName === 'xhs.unified.start'
582
+ || eventName === 'xhs.unified.stop'
583
+ || eventName === 'autoscript:start'
584
+ || eventName === 'autoscript:stop'
585
+ || eventName === 'autoscript:impact'
586
+ || eventName === 'autoscript:operation_start'
587
+ || eventName === 'autoscript:operation_done'
588
+ || eventName === 'autoscript:operation_error'
589
+ || eventName === 'autoscript:operation_recovery_failed'
590
+ );
591
+ if (activeRunId && shouldReportEvent) {
592
+ void reporter.pushEvent(activeRunId, eventName, merged);
593
+ }
594
+ if (
595
+ eventName === 'autoscript:operation_done'
596
+ || eventName === 'autoscript:operation_error'
597
+ || eventName === 'autoscript:operation_recovery_failed'
598
+ || eventName === 'autoscript:impact'
599
+ ) {
600
+ pushTaskSnapshot('running');
601
+ }
283
602
  if (
284
603
  merged.event === 'autoscript:event'
285
604
  && merged.subscriptionId === 'login_guard'
@@ -293,8 +612,45 @@ async function runProfile(spec, argv, baseOverrides = {}) {
293
612
  }
294
613
  };
295
614
 
615
+ const runner = new AutoscriptRunner(normalized, {
616
+ profileId,
617
+ log: logEvent,
618
+ });
619
+
620
+ const running = await runner.start();
621
+ currentRunId = running?.runId || currentRunId;
622
+ activeRunId = String(running?.runId || '').trim();
623
+ if (activeRunId) {
624
+ await reporter.ensureCreated(activeRunId, {
625
+ status: 'starting',
626
+ phase: 'unified',
627
+ progress: {
628
+ total: Math.max(0, Number(spec.assignedNotes) || 0),
629
+ processed: 0,
630
+ failed: 0,
631
+ },
632
+ });
633
+ await reporter.update(activeRunId, {
634
+ status: 'running',
635
+ phase: 'unified',
636
+ progress: {
637
+ total: Math.max(0, Number(spec.assignedNotes) || 0),
638
+ processed: 0,
639
+ failed: 0,
640
+ },
641
+ stats: {
642
+ notesProcessed: 0,
643
+ commentsCollected: 0,
644
+ likesPerformed: 0,
645
+ repliesGenerated: 0,
646
+ imagesDownloaded: 0,
647
+ ocrProcessed: 0,
648
+ },
649
+ });
650
+ }
296
651
  logEvent({
297
652
  event: 'xhs.unified.start',
653
+ runId: running?.runId || null,
298
654
  keyword: options.keyword,
299
655
  env: options.env,
300
656
  maxNotes: options.maxNotes,
@@ -302,13 +658,6 @@ async function runProfile(spec, argv, baseOverrides = {}) {
302
658
  outputRoot: options.outputRoot,
303
659
  parallelRunLabel: spec.runLabel,
304
660
  });
305
-
306
- const runner = new AutoscriptRunner(normalized, {
307
- profileId,
308
- log: logEvent,
309
- });
310
-
311
- const running = await runner.start();
312
661
  const done = await running.done;
313
662
 
314
663
  const stopPayload = {
@@ -321,7 +670,47 @@ async function runProfile(spec, argv, baseOverrides = {}) {
321
670
  };
322
671
  logEvent(stopPayload);
323
672
 
673
+ const stopScreenshotPath = await captureStopScreenshot({
674
+ profileId,
675
+ reason: stopPayload.reason || 'stop',
676
+ outputDir: path.dirname(spec.logPath),
677
+ });
678
+ if (stopScreenshotPath) {
679
+ logEvent({
680
+ event: 'xhs.unified.stop_screenshot',
681
+ profileId,
682
+ runId: stopPayload.runId,
683
+ reason: stopPayload.reason || null,
684
+ path: stopScreenshotPath,
685
+ });
686
+ }
687
+
324
688
  stats.stopReason = stopPayload.reason;
689
+ const finalRunId = String(stopPayload.runId || activeRunId || '').trim();
690
+ if (finalRunId) {
691
+ activeRunId = finalRunId;
692
+ const failed = stopPayload.reason === 'script_failure';
693
+ await reporter.update(finalRunId, {
694
+ status: failed ? 'failed' : 'completed',
695
+ phase: 'unified',
696
+ progress: {
697
+ total: Math.max(0, Number(spec.assignedNotes) || 0),
698
+ processed: Math.max(0, Number(stats.openedNotes) || 0),
699
+ failed: Math.max(0, Number(stats.operationErrors) || 0),
700
+ },
701
+ stats: {
702
+ notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
703
+ commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
704
+ likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
705
+ repliesGenerated: 0,
706
+ imagesDownloaded: 0,
707
+ ocrProcessed: 0,
708
+ },
709
+ });
710
+ if (failed) {
711
+ await reporter.setError(finalRunId, `autoscript stopped: ${stopPayload.reason || 'script_failure'}`, 'SCRIPT_FAILURE', false);
712
+ }
713
+ }
325
714
 
326
715
  const profileResult = {
327
716
  ok: stopPayload.reason !== 'script_failure',
@@ -331,6 +720,7 @@ async function runProfile(spec, argv, baseOverrides = {}) {
331
720
  assignedNotes: spec.assignedNotes,
332
721
  outputRoot: options.outputRoot,
333
722
  logPath: spec.logPath,
723
+ stopScreenshotPath: stopScreenshotPath || null,
334
724
  stats,
335
725
  };
336
726
 
@@ -527,93 +917,49 @@ export async function runUnified(argv, overrides = {}) {
527
917
  const keyword = String(argv.keyword || argv.k || '').trim();
528
918
  if (!keyword) throw new Error('missing --keyword');
529
919
 
530
- const env = String(argv.env || 'debug').trim() || 'debug';
920
+ const env = String(argv.env || 'prod').trim() || 'prod';
921
+ const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
531
922
  const profiles = parseProfiles(argv);
532
923
  if (profiles.length === 0) throw new Error('missing --profile or --profiles or --profilepool');
533
-
534
- const accountStates = await syncXhsAccountsByProfiles(profiles);
535
- const executableProfiles = accountStates
536
- .filter((item) => item?.valid === true && Boolean(String(item?.accountId || '').trim()))
537
- .map((item) => item.profileId);
538
- const invalidProfiles = accountStates.filter((item) => !item || item.valid !== true);
539
- if (executableProfiles.length === 0) {
540
- throw new Error(`no valid business accounts: ${invalidProfiles.map((item) => `${item.profileId}:${item.reason || 'invalid'}`).join(', ')}`);
541
- }
542
-
924
+ await Promise.all(profiles.map((profileId) => ensureProfileSession(profileId)));
543
925
  const defaultMaxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
544
926
  const totalNotes = parseNonNegativeInt(argv['total-notes'] ?? argv['total-target'], 0);
545
- const plan = buildShardPlan({ profiles: executableProfiles, totalNotes, defaultMaxNotes });
546
- if (plan.length === 0) throw new Error('empty shard plan');
547
-
927
+ const hasTotalTarget = totalNotes > 0;
928
+ const maxWaves = parseIntFlag(argv['max-waves'], 40, 1);
548
929
  const parallelRequested = parseBool(argv.parallel, false);
549
- const parallel = parallelRequested && plan.length > 1;
550
- const concurrency = parallel
551
- ? Math.min(plan.length, parseIntFlag(argv.concurrency, plan.length, 1))
552
- : 1;
930
+ const configuredConcurrency = parseIntFlag(argv.concurrency, profiles.length || 1, 1);
931
+ const planOnly = parseBool(argv['plan-only'], false);
932
+ const seedCollectCountFlag = parseNonNegativeInt(argv['seed-collect-count'], 0);
933
+ const seedCollectRoundsFlag = parseNonNegativeInt(argv['seed-collect-rounds'], 6);
553
934
 
554
935
  const runLabel = formatRunLabel();
555
936
  const baseOutputRoot = resolveDownloadRoot(argv['output-root']);
937
+ const outputRootArg = String(argv['output-root'] || '').trim();
938
+ const useShardRoots = profiles.length > 1;
939
+ const sharedHarvestPath = profiles.length > 1
940
+ ? path.join(baseOutputRoot, 'xiaohongshu', sanitizeForPath(env, 'prod'), sanitizeForPath(keyword, 'unknown'), 'merged', `run-${runLabel}`, 'coord', 'harvest-note-claims.json')
941
+ : '';
942
+ const searchSerialKey = `${sanitizeForPath(env, 'prod')}:${sanitizeForPath(keyword, 'unknown')}:${runLabel}`;
556
943
  const mergedDir = path.join(
557
944
  baseOutputRoot,
558
945
  'xiaohongshu',
559
- sanitizeForPath(env, 'debug'),
946
+ sanitizeForPath(env, 'prod'),
560
947
  sanitizeForPath(keyword, 'unknown'),
561
948
  'merged',
562
949
  `run-${runLabel}`,
563
950
  );
564
951
  const planPath = path.join(mergedDir, 'plan.json');
565
-
566
- const useShardRoots = plan.length > 1;
567
- const specs = plan.map((item) => {
568
- const shardId = sanitizeForPath(item.profileId, 'profile');
569
- const shardOutputRoot = useShardRoots
570
- ? path.join(baseOutputRoot, 'shards', shardId)
571
- : String(argv['output-root'] || '').trim();
572
- return {
573
- ...item,
574
- runLabel,
575
- outputRoot: shardOutputRoot,
576
- logPath: path.join(mergedDir, 'profiles', `${shardId}.events.jsonl`),
577
- summaryPath: path.join(mergedDir, 'profiles', `${shardId}.summary.json`),
578
- };
579
- });
580
-
581
- const planPayload = {
582
- event: 'xhs.unified.plan',
583
- planPath,
584
- keyword,
585
- env,
586
- totalNotes: totalNotes > 0 ? totalNotes : null,
587
- defaultMaxNotes,
588
- parallel,
589
- concurrency,
590
- accountStates,
591
- skippedProfiles: invalidProfiles.map((item) => ({
592
- profileId: item?.profileId || null,
593
- status: item?.status || 'invalid',
594
- reason: item?.reason || 'invalid',
595
- valid: item?.valid === true,
596
- accountId: item?.accountId || null,
597
- })),
598
- specs: specs.map((item) => ({
599
- profileId: item.profileId,
600
- assignedNotes: item.assignedNotes,
601
- outputRoot: item.outputRoot,
602
- logPath: item.logPath,
603
- })),
604
- };
605
- console.log(JSON.stringify(planPayload));
606
-
607
- await writeJson(planPath, planPayload);
608
-
609
- if (parseBool(argv['plan-only'], false)) {
610
- return {
611
- ok: true,
612
- planOnly: true,
613
- planPath,
614
- specs,
615
- };
616
- }
952
+ const completedAtStart = hasTotalTarget
953
+ ? await collectCompletedNoteIds(baseOutputRoot, env, keyword)
954
+ : { count: 0, noteIds: [] };
955
+ let remainingNotes = hasTotalTarget
956
+ ? Math.max(0, totalNotes - completedAtStart.count)
957
+ : defaultMaxNotes;
958
+
959
+ const skippedProfileMap = new Map();
960
+ const wavePlans = [];
961
+ const allResults = [];
962
+ let finalAccountStates = [];
617
963
 
618
964
  const execute = async (spec) => {
619
965
  try {
@@ -667,9 +1013,159 @@ export async function runUnified(argv, overrides = {}) {
667
1013
  }
668
1014
  };
669
1015
 
670
- const results = parallel
671
- ? await runWithConcurrency(specs, concurrency, execute)
672
- : await runWithConcurrency(specs, 1, execute);
1016
+ for (let wave = 1; wave <= maxWaves; wave += 1) {
1017
+ if (hasTotalTarget && remainingNotes <= 0) break;
1018
+ if (!hasTotalTarget && wave > 1) break;
1019
+
1020
+ let executableProfiles = [];
1021
+ if (planOnly) {
1022
+ executableProfiles = profiles.slice();
1023
+ finalAccountStates = executableProfiles.map((profileId) => ({
1024
+ profileId,
1025
+ status: 'plan_only_unverified',
1026
+ reason: 'plan_only_skip_account_sync',
1027
+ valid: null,
1028
+ accountId: null,
1029
+ }));
1030
+ } else {
1031
+ const accountStates = await syncXhsAccountsByProfiles(profiles);
1032
+ finalAccountStates = accountStates;
1033
+ executableProfiles = accountStates
1034
+ .filter((item) => item?.valid === true && Boolean(String(item?.accountId || '').trim()))
1035
+ .map((item) => item.profileId);
1036
+ const invalidProfiles = accountStates.filter((item) => !item || item.valid !== true);
1037
+ for (const item of invalidProfiles) {
1038
+ const profileId = String(item?.profileId || '').trim();
1039
+ if (!profileId) continue;
1040
+ skippedProfileMap.set(profileId, {
1041
+ profileId,
1042
+ status: item?.status || 'invalid',
1043
+ reason: item?.reason || 'invalid',
1044
+ valid: item?.valid === true,
1045
+ accountId: item?.accountId || null,
1046
+ });
1047
+ }
1048
+
1049
+ if (executableProfiles.length === 0) {
1050
+ if (wave === 1) {
1051
+ throw new Error(`no valid business accounts: ${invalidProfiles.map((item) => `${item.profileId}:${item.reason || 'invalid'}`).join(', ')}`);
1052
+ }
1053
+ break;
1054
+ }
1055
+ }
1056
+
1057
+ const plan = hasTotalTarget
1058
+ ? buildDynamicWavePlan({ profiles: executableProfiles, remainingNotes })
1059
+ : buildEvenShardPlan({ profiles: executableProfiles, totalNotes: 0, defaultMaxNotes });
1060
+ if (plan.length === 0) break;
1061
+
1062
+ const parallel = parallelRequested && plan.length > 1;
1063
+ const concurrency = parallel
1064
+ ? Math.min(plan.length, configuredConcurrency)
1065
+ : 1;
1066
+ const waveTag = `wave-${String(wave).padStart(3, '0')}`;
1067
+ const specs = plan.map((item, index) => {
1068
+ const shardId = sanitizeForPath(item.profileId, 'profile');
1069
+ const shardOutputRoot = useShardRoots
1070
+ ? path.join(baseOutputRoot, 'shards', shardId)
1071
+ : outputRootArg;
1072
+ const defaultSeedCollectCount = Math.max(1, Math.min(
1073
+ Number(item.assignedNotes || 1),
1074
+ Math.max(1, plan.length * 2),
1075
+ ));
1076
+ const seedCollectCount = index === 0
1077
+ ? (seedCollectCountFlag > 0 ? seedCollectCountFlag : defaultSeedCollectCount)
1078
+ : 0;
1079
+ return {
1080
+ ...item,
1081
+ runLabel,
1082
+ waveTag,
1083
+ outputRoot: shardOutputRoot,
1084
+ logPath: path.join(mergedDir, 'profiles', `${waveTag}.${shardId}.events.jsonl`),
1085
+ summaryPath: path.join(mergedDir, 'profiles', `${waveTag}.${shardId}.summary.json`),
1086
+ sharedHarvestPath,
1087
+ searchSerialKey,
1088
+ seedCollectCount,
1089
+ seedCollectMaxRounds: index === 0 ? seedCollectRoundsFlag : 0,
1090
+ };
1091
+ });
1092
+
1093
+ wavePlans.push({
1094
+ wave,
1095
+ waveTag,
1096
+ remainingBefore: remainingNotes,
1097
+ parallel,
1098
+ concurrency,
1099
+ specs: specs.map((item) => ({
1100
+ profileId: item.profileId,
1101
+ assignedNotes: item.assignedNotes,
1102
+ outputRoot: item.outputRoot,
1103
+ logPath: item.logPath,
1104
+ sharedHarvestPath: item.sharedHarvestPath || null,
1105
+ seedCollectCount: item.seedCollectCount || 0,
1106
+ seedCollectMaxRounds: item.seedCollectMaxRounds || 0,
1107
+ })),
1108
+ });
1109
+
1110
+ if (planOnly) break;
1111
+
1112
+ const waveResults = parallel
1113
+ ? await runWithConcurrency(specs, concurrency, execute)
1114
+ : await runWithConcurrency(specs, 1, execute);
1115
+ allResults.push(...waveResults);
1116
+
1117
+ if (hasTotalTarget) {
1118
+ const openedInWave = waveResults.reduce((sum, item) => sum + toNumber(item?.stats?.openedNotes, 0), 0);
1119
+ remainingNotes = Math.max(0, remainingNotes - openedInWave);
1120
+ const waveRecord = wavePlans[wavePlans.length - 1];
1121
+ waveRecord.openedInWave = openedInWave;
1122
+ waveRecord.remainingAfter = remainingNotes;
1123
+ if (openedInWave <= 0) {
1124
+ console.error(JSON.stringify({
1125
+ event: 'xhs.unified.wave_stalled',
1126
+ wave,
1127
+ remainingNotes,
1128
+ }));
1129
+ break;
1130
+ }
1131
+ }
1132
+ }
1133
+
1134
+ const skippedProfiles = Array.from(skippedProfileMap.values());
1135
+
1136
+ const planPayload = {
1137
+ event: 'xhs.unified.plan',
1138
+ planPath,
1139
+ keyword,
1140
+ env,
1141
+ totalNotes: totalNotes > 0 ? totalNotes : null,
1142
+ defaultMaxNotes,
1143
+ maxWaves,
1144
+ runLabel,
1145
+ hasTotalTarget,
1146
+ completedAtStart: completedAtStart.count,
1147
+ remainingAtPlan: remainingNotes,
1148
+ accountStates: finalAccountStates,
1149
+ skippedProfiles,
1150
+ waves: wavePlans,
1151
+ };
1152
+ console.log(JSON.stringify(planPayload));
1153
+
1154
+ await writeJson(planPath, planPayload);
1155
+
1156
+ if (planOnly) {
1157
+ return {
1158
+ ok: true,
1159
+ planOnly: true,
1160
+ planPath,
1161
+ waves: wavePlans,
1162
+ };
1163
+ }
1164
+
1165
+ const results = allResults;
1166
+ if (results.length === 0) {
1167
+ throw new Error(`no executable waves generated, see ${planPath}`);
1168
+ }
673
1169
 
674
1170
  const merged = await mergeProfileOutputs({
675
1171
  results,
@@ -677,26 +1173,55 @@ export async function runUnified(argv, overrides = {}) {
677
1173
  keyword,
678
1174
  env,
679
1175
  totalNotes,
680
- parallel,
681
- concurrency,
682
- skippedProfiles: invalidProfiles.map((item) => ({
683
- profileId: item?.profileId || null,
684
- status: item?.status || 'invalid',
685
- reason: item?.reason || 'invalid',
686
- accountId: item?.accountId || null,
687
- })),
1176
+ parallel: parallelRequested,
1177
+ concurrency: configuredConcurrency,
1178
+ skippedProfiles,
688
1179
  });
689
1180
 
690
- console.log(JSON.stringify({
1181
+ const mergedSummary = {
1182
+ ...merged.mergedSummary,
1183
+ progress: {
1184
+ completedAtStart: completedAtStart.count,
1185
+ completedDuringRun: toNumber(merged.mergedSummary?.totals?.openedNotes, 0),
1186
+ targetTotal: hasTotalTarget ? totalNotes : null,
1187
+ remainingAfterRun: hasTotalTarget ? Math.max(0, remainingNotes) : null,
1188
+ reachedTarget: hasTotalTarget ? remainingNotes <= 0 : null,
1189
+ },
1190
+ waves: wavePlans,
1191
+ };
1192
+ await writeJson(merged.summaryPath, mergedSummary);
1193
+
1194
+ const mergedEvent = {
691
1195
  event: 'xhs.unified.merged',
692
1196
  summaryPath: merged.summaryPath,
1197
+ waves: wavePlans.length,
693
1198
  profilesTotal: results.length,
694
1199
  profilesSucceeded: results.filter((item) => item.ok).length,
695
1200
  profilesFailed: results.filter((item) => !item.ok).length,
696
- }));
1201
+ remainingNotes: hasTotalTarget ? remainingNotes : null,
1202
+ };
1203
+ console.log(JSON.stringify(mergedEvent));
1204
+ if (busEnabled) {
1205
+ void publishBusEvent(mergedEvent);
1206
+ }
697
1207
 
698
- if (results.some((item) => !item.ok)) {
699
- throw new Error(`unified finished with failures, see ${merged.summaryPath}`);
1208
+ const failedResults = results.filter((item) => !item.ok);
1209
+ if (hasTotalTarget && remainingNotes > 0) {
1210
+ throw new Error(`target not reached, remaining=${remainingNotes}, see ${merged.summaryPath}`);
1211
+ }
1212
+ if (failedResults.length > 0) {
1213
+ if (hasTotalTarget && remainingNotes <= 0) {
1214
+ console.warn(JSON.stringify({
1215
+ event: 'xhs.unified.partial_failures_tolerated',
1216
+ summaryPath: merged.summaryPath,
1217
+ failedProfiles: failedResults.map((item) => ({
1218
+ profileId: item.profileId,
1219
+ reason: item.reason || null,
1220
+ })),
1221
+ }));
1222
+ } else {
1223
+ throw new Error(`unified finished with failures, see ${merged.summaryPath}`);
1224
+ }
700
1225
  }
701
1226
 
702
1227
  return {
@@ -717,10 +1242,18 @@ async function main() {
717
1242
  ' --max-notes <n> 单账号目标(未启用 total-notes 时)',
718
1243
  ' --total-notes <n> 总目标数(自动分片到账号)',
719
1244
  ' --total-target <n> total-notes 别名',
1245
+ ' --max-waves <n> 动态分片最大波次(默认40)',
720
1246
  ' --parallel 启用并行执行',
1247
+ ' --bus-events <bool> 启用 UI 事件总线推送(默认 false)',
721
1248
  ' --concurrency <n> 并行度(默认=账号数)',
1249
+ ' --resume <bool> 断点续传(默认 false)',
1250
+ ' --incremental-max <bool> max-notes 作为增量配额(默认 true)',
722
1251
  ' --plan-only 只生成分片计划,不执行',
723
1252
  ' --output-root <path> 输出根目录(并行时自动分 profile shard)',
1253
+ ' --seed-collect-count <n> 首账号预采样去重ID数量(默认按分片自动)',
1254
+ ' --seed-collect-rounds <n> 首账号预采样滚动轮数(默认6)',
1255
+ ' --search-serial-key <key> 搜索阶段串行锁key(默认自动生成)',
1256
+ ' --shared-harvest-path <path> 共享harvest去重列表路径(默认自动生成)',
724
1257
  ].join('\n'));
725
1258
  return;
726
1259
  }