@web-auto/webauto 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/desktop-console/default-settings.json +2 -2
- package/apps/desktop-console/dist/main/index.mjs +983 -128
- package/apps/desktop-console/dist/main/preload.mjs +7 -0
- package/apps/desktop-console/dist/renderer/index.html +622 -50
- package/apps/desktop-console/dist/renderer/index.js +2423 -469
- package/apps/desktop-console/dist/renderer/run.mts +6 -5
- package/apps/desktop-console/entry/ui-cli.mjs +672 -0
- package/apps/desktop-console/entry/ui-console.mjs +416 -29
- package/apps/webauto/entry/account.mjs +89 -53
- package/apps/webauto/entry/browser-status.mjs +7 -10
- package/apps/webauto/entry/lib/account-detect.mjs +254 -28
- package/apps/webauto/entry/lib/account-store.mjs +219 -30
- package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
- package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
- package/apps/webauto/entry/lib/profilepool.mjs +14 -5
- package/apps/webauto/entry/lib/quota-status.mjs +23 -0
- package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
- package/apps/webauto/entry/profilepool.mjs +106 -17
- package/apps/webauto/entry/schedule.mjs +612 -0
- package/apps/webauto/entry/weibo-unified.mjs +134 -0
- package/apps/webauto/entry/xhs-install.mjs +256 -31
- package/apps/webauto/entry/xhs-status.mjs +5 -2
- package/apps/webauto/entry/xhs-unified.mjs +631 -98
- package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
- package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
- package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
- package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
- package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
- package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
- package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
- package/bin/camoufox-cli.mjs +61 -0
- package/bin/webauto.mjs +301 -54
- package/dist/modules/camo-backend/src/index.js +49 -1
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
- package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
- package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
- package/dist/modules/collection-manager/bloom-filter.js +91 -0
- package/dist/modules/collection-manager/date-utils.js +275 -0
- package/dist/modules/collection-manager/index.js +258 -0
- package/dist/modules/collection-manager/storage.js +195 -0
- package/dist/modules/collection-manager/types.js +47 -0
- package/dist/modules/logging/src/index.js +1 -1
- package/dist/modules/process-registry/index.js +230 -0
- package/dist/modules/rate-limiter/index.js +242 -0
- package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
- package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
- package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
- package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
- package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
- package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
- package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
- package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
- package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
- package/dist/modules/workflow/config/workflowRegistry.js +2 -0
- package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
- package/dist/modules/workflow/src/runner.js +6 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
- package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
- package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
- package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
- package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
- package/dist/services/shared/serviceProcessLogger.js +1 -1
- package/dist/services/unified-api/server.js +105 -11
- package/modules/camo-backend/src/index.ts +46 -1
- package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
- package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
- package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
- package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
- package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
- package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
- package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
- package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
- package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
- package/modules/collection-manager/bloom-filter.ts +112 -0
- package/modules/collection-manager/date-utils.ts +316 -0
- package/modules/collection-manager/index.ts +309 -0
- package/modules/collection-manager/package.json +10 -0
- package/modules/collection-manager/storage.ts +174 -0
- package/modules/collection-manager/types.ts +156 -0
- package/modules/logging/src/index.ts +1 -1
- package/modules/process-registry/index.ts +284 -0
- package/modules/rate-limiter/index.ts +322 -0
- package/modules/state/src/paths.ts +9 -1
- package/modules/task-scheduler/index.ts +293 -0
- package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
- package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
- package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
- package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
- package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
- package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
- package/modules/workflow/config/workflowRegistry.ts +2 -0
- package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
- package/modules/workflow/src/runner.ts +6 -0
- package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
- package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
- package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
- package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
- package/package.json +13 -4
- package/scripts/postinstall-resources.mjs +62 -0
- package/scripts/test/run-coverage.mjs +76 -0
- package/scripts/weibo/search.ts +49 -0
- package/services/shared/serviceProcessLogger.ts +1 -1
- package/services/unified-api/server.ts +98 -12
|
@@ -11,6 +11,8 @@ import { AutoscriptRunner } from '../../../modules/camo-runtime/src/autoscript/r
|
|
|
11
11
|
import { syncXhsAccountsByProfiles } from './lib/account-detect.mjs';
|
|
12
12
|
import { markProfileInvalid } from './lib/account-store.mjs';
|
|
13
13
|
import { listProfilesForPool } from './lib/profilepool.mjs';
|
|
14
|
+
import { runCamo } from './lib/camo-cli.mjs';
|
|
15
|
+
import { publishBusEvent } from './lib/bus-publish.mjs';
|
|
14
16
|
|
|
15
17
|
function nowIso() {
|
|
16
18
|
return new Date().toISOString();
|
|
@@ -65,15 +67,125 @@ function sanitizeForPath(name, fallback = 'unknown') {
|
|
|
65
67
|
return cleaned || fallback;
|
|
66
68
|
}
|
|
67
69
|
|
|
70
|
+
const XHS_HOME_URL = 'https://www.xiaohongshu.com';
|
|
71
|
+
|
|
72
|
+
async function ensureProfileSession(profileId) {
|
|
73
|
+
const id = String(profileId || '').trim();
|
|
74
|
+
if (!id) return false;
|
|
75
|
+
const ret = runCamo(['start', id, '--url', XHS_HOME_URL], {
|
|
76
|
+
rootDir: process.cwd(),
|
|
77
|
+
timeoutMs: 60000,
|
|
78
|
+
});
|
|
79
|
+
if (ret?.ok) {
|
|
80
|
+
runCamo(['goto', id, XHS_HOME_URL], { rootDir: process.cwd(), timeoutMs: 60000 });
|
|
81
|
+
}
|
|
82
|
+
return Boolean(ret?.ok);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function buildStopScreenshotPath(profileId, reason, outputDir) {
|
|
86
|
+
const safeProfile = sanitizeForPath(profileId, 'profile');
|
|
87
|
+
const safeReason = sanitizeForPath(reason || 'stop', 'stop');
|
|
88
|
+
const file = `stop-${safeProfile}-${safeReason}.png`;
|
|
89
|
+
return path.join(outputDir, file);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function captureStopScreenshot({ profileId, reason, outputDir }) {
|
|
93
|
+
const outDir = String(outputDir || '').trim();
|
|
94
|
+
if (!outDir) return null;
|
|
95
|
+
try {
|
|
96
|
+
await fsp.mkdir(outDir, { recursive: true });
|
|
97
|
+
} catch {}
|
|
98
|
+
const outputPath = buildStopScreenshotPath(profileId, reason, outDir);
|
|
99
|
+
const tryCapture = () => runCamo(['screenshot', profileId, '--output', outputPath], {
|
|
100
|
+
rootDir: process.cwd(),
|
|
101
|
+
timeoutMs: 60000,
|
|
102
|
+
});
|
|
103
|
+
let ret = tryCapture();
|
|
104
|
+
if (!ret?.ok) {
|
|
105
|
+
await ensureProfileSession(profileId);
|
|
106
|
+
ret = tryCapture();
|
|
107
|
+
}
|
|
108
|
+
if (ret?.ok) return outputPath;
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function sanitizeKeywordDirParts({ env, keyword }) {
|
|
113
|
+
return {
|
|
114
|
+
safeEnv: sanitizeForPath(env, 'prod'),
|
|
115
|
+
safeKeyword: sanitizeForPath(keyword, 'unknown'),
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
68
119
|
function resolveDownloadRoot(customRoot = '') {
|
|
69
120
|
const fromArg = String(customRoot || '').trim();
|
|
70
121
|
if (fromArg) return path.resolve(fromArg);
|
|
71
122
|
const fromEnv = String(process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR || '').trim();
|
|
72
123
|
if (fromEnv) return path.resolve(fromEnv);
|
|
124
|
+
if (process.platform === 'win32') {
|
|
125
|
+
try {
|
|
126
|
+
if (fs.existsSync('D:\\')) return 'D:\\webauto';
|
|
127
|
+
} catch {
|
|
128
|
+
// ignore
|
|
129
|
+
}
|
|
130
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
131
|
+
return path.join(home, '.webauto');
|
|
132
|
+
}
|
|
73
133
|
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
74
134
|
return path.join(home, '.webauto', 'download');
|
|
75
135
|
}
|
|
76
136
|
|
|
137
|
+
const NON_NOTE_DIR_NAMES = new Set([
|
|
138
|
+
'merged',
|
|
139
|
+
'profiles',
|
|
140
|
+
'like-evidence',
|
|
141
|
+
'virtual-like',
|
|
142
|
+
'smart-reply',
|
|
143
|
+
'comment-match',
|
|
144
|
+
'discover-fallback',
|
|
145
|
+
]);
|
|
146
|
+
|
|
147
|
+
async function collectKeywordDirs(baseOutputRoot, env, keyword) {
|
|
148
|
+
const { safeEnv, safeKeyword } = sanitizeKeywordDirParts({ env, keyword });
|
|
149
|
+
const dirs = [
|
|
150
|
+
path.join(baseOutputRoot, 'xiaohongshu', safeEnv, safeKeyword),
|
|
151
|
+
];
|
|
152
|
+
const shardsRoot = path.join(baseOutputRoot, 'shards');
|
|
153
|
+
try {
|
|
154
|
+
const entries = await fsp.readdir(shardsRoot, { withFileTypes: true });
|
|
155
|
+
for (const entry of entries) {
|
|
156
|
+
if (!entry.isDirectory()) continue;
|
|
157
|
+
dirs.push(path.join(shardsRoot, entry.name, 'xiaohongshu', safeEnv, safeKeyword));
|
|
158
|
+
}
|
|
159
|
+
} catch {
|
|
160
|
+
// ignore
|
|
161
|
+
}
|
|
162
|
+
return Array.from(new Set(dirs));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async function collectCompletedNoteIds(baseOutputRoot, env, keyword) {
|
|
166
|
+
const keywordDirs = await collectKeywordDirs(baseOutputRoot, env, keyword);
|
|
167
|
+
const completed = new Set();
|
|
168
|
+
for (const keywordDir of keywordDirs) {
|
|
169
|
+
let entries = [];
|
|
170
|
+
try {
|
|
171
|
+
entries = await fsp.readdir(keywordDir, { withFileTypes: true });
|
|
172
|
+
} catch {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
for (const entry of entries) {
|
|
176
|
+
if (!entry.isDirectory()) continue;
|
|
177
|
+
const noteId = String(entry.name || '').trim();
|
|
178
|
+
if (!noteId || noteId.startsWith('.') || noteId.startsWith('_')) continue;
|
|
179
|
+
if (NON_NOTE_DIR_NAMES.has(noteId)) continue;
|
|
180
|
+
completed.add(noteId);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
count: completed.size,
|
|
185
|
+
noteIds: Array.from(completed),
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
77
189
|
async function ensureDir(dirPath) {
|
|
78
190
|
await fsp.mkdir(dirPath, { recursive: true });
|
|
79
191
|
}
|
|
@@ -88,23 +200,123 @@ async function appendJsonl(filePath, payload) {
|
|
|
88
200
|
await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, 'utf8');
|
|
89
201
|
}
|
|
90
202
|
|
|
203
|
+
function resolveUnifiedApiBaseUrl() {
|
|
204
|
+
const raw = String(
|
|
205
|
+
process.env.WEBAUTO_UNIFIED_API
|
|
206
|
+
|| process.env.WEBAUTO_UNIFIED_URL
|
|
207
|
+
|| 'http://127.0.0.1:7701',
|
|
208
|
+
).trim();
|
|
209
|
+
return raw.replace(/\/+$/, '');
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function postUnifiedTaskRequest(baseUrl, pathname, payload) {
|
|
213
|
+
try {
|
|
214
|
+
const response = await fetch(`${baseUrl}${pathname}`, {
|
|
215
|
+
method: 'POST',
|
|
216
|
+
headers: { 'Content-Type': 'application/json' },
|
|
217
|
+
body: JSON.stringify(payload || {}),
|
|
218
|
+
signal: AbortSignal.timeout(2000),
|
|
219
|
+
});
|
|
220
|
+
if (!response.ok) return false;
|
|
221
|
+
return true;
|
|
222
|
+
} catch {
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function createTaskReporter(seed = {}) {
|
|
228
|
+
const baseUrl = resolveUnifiedApiBaseUrl();
|
|
229
|
+
const staticSeed = {
|
|
230
|
+
profileId: String(seed.profileId || 'unknown').trim() || 'unknown',
|
|
231
|
+
keyword: String(seed.keyword || '').trim(),
|
|
232
|
+
phase: 'unified',
|
|
233
|
+
};
|
|
234
|
+
const createdRunIds = new Set();
|
|
235
|
+
|
|
236
|
+
const ensureCreated = async (runId, extra = {}) => {
|
|
237
|
+
const rid = String(runId || '').trim();
|
|
238
|
+
if (!rid) return false;
|
|
239
|
+
if (createdRunIds.has(rid)) return true;
|
|
240
|
+
const ok = await postUnifiedTaskRequest(baseUrl, '/api/v1/tasks', {
|
|
241
|
+
runId: rid,
|
|
242
|
+
...staticSeed,
|
|
243
|
+
...extra,
|
|
244
|
+
});
|
|
245
|
+
if (ok) createdRunIds.add(rid);
|
|
246
|
+
return ok;
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const update = async (runId, patch = {}) => {
|
|
250
|
+
const rid = String(runId || '').trim();
|
|
251
|
+
if (!rid) return false;
|
|
252
|
+
await ensureCreated(rid, patch);
|
|
253
|
+
return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/update`, {
|
|
254
|
+
...staticSeed,
|
|
255
|
+
...patch,
|
|
256
|
+
});
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
const pushEvent = async (runId, type, data = {}) => {
|
|
260
|
+
const rid = String(runId || '').trim();
|
|
261
|
+
if (!rid) return false;
|
|
262
|
+
await ensureCreated(rid, data);
|
|
263
|
+
return postUnifiedTaskRequest(baseUrl, `/api/v1/tasks/${encodeURIComponent(rid)}/events`, {
|
|
264
|
+
type: String(type || 'event').trim() || 'event',
|
|
265
|
+
data,
|
|
266
|
+
});
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
const setError = async (runId, message, code = 'TASK_ERROR', recoverable = false) => {
|
|
270
|
+
const rid = String(runId || '').trim();
|
|
271
|
+
if (!rid) return false;
|
|
272
|
+
return update(rid, {
|
|
273
|
+
error: {
|
|
274
|
+
message: String(message || 'task_error'),
|
|
275
|
+
code: String(code || 'TASK_ERROR'),
|
|
276
|
+
timestamp: Date.now(),
|
|
277
|
+
recoverable: recoverable === true,
|
|
278
|
+
},
|
|
279
|
+
});
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
return {
|
|
283
|
+
ensureCreated,
|
|
284
|
+
update,
|
|
285
|
+
pushEvent,
|
|
286
|
+
setError,
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
|
|
91
290
|
function buildTemplateOptions(argv, profileId, overrides = {}) {
|
|
92
291
|
const keyword = String(argv.keyword || argv.k || '').trim();
|
|
93
|
-
const env = String(argv.env || '
|
|
292
|
+
const env = String(argv.env || 'prod').trim() || 'prod';
|
|
94
293
|
const inputMode = String(argv['input-mode'] || 'protocol').trim() || 'protocol';
|
|
95
294
|
const headless = parseBool(argv.headless, false);
|
|
96
295
|
const ocrCommand = String(argv['ocr-command'] || '').trim();
|
|
97
296
|
const maxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
|
|
297
|
+
const maxComments = parseNonNegativeInt(argv['max-comments'], 0);
|
|
98
298
|
const throttle = parseIntFlag(argv.throttle, 500, 100);
|
|
99
299
|
const tabCount = parseIntFlag(argv['tab-count'], 4, 1);
|
|
100
300
|
const noteIntervalMs = parseIntFlag(argv['note-interval'], 900, 200);
|
|
101
|
-
const maxLikesPerRound =
|
|
301
|
+
const maxLikesPerRound = parseNonNegativeInt(argv['max-likes'], 0);
|
|
102
302
|
const matchMode = String(argv['match-mode'] || 'any').trim() || 'any';
|
|
103
303
|
const matchMinHits = parseIntFlag(argv['match-min-hits'], 1, 1);
|
|
104
304
|
const matchKeywords = String(argv['match-keywords'] || keyword).trim();
|
|
105
305
|
const likeKeywords = String(argv['like-keywords'] || '').trim();
|
|
106
306
|
const replyText = String(argv['reply-text'] || '感谢分享,已关注').trim() || '感谢分享,已关注';
|
|
107
307
|
const outputRoot = String(argv['output-root'] || '').trim();
|
|
308
|
+
const resume = parseBool(argv.resume, false);
|
|
309
|
+
const incrementalMax = parseBool(argv['incremental-max'], true);
|
|
310
|
+
const sharedHarvestPath = String(overrides.sharedHarvestPath ?? argv['shared-harvest-path'] ?? '').trim();
|
|
311
|
+
const searchSerialKey = String(overrides.searchSerialKey ?? argv['search-serial-key'] ?? '').trim();
|
|
312
|
+
const seedCollectCount = parseNonNegativeInt(
|
|
313
|
+
overrides.seedCollectCount ?? argv['seed-collect-count'],
|
|
314
|
+
0,
|
|
315
|
+
);
|
|
316
|
+
const seedCollectMaxRounds = parseNonNegativeInt(
|
|
317
|
+
overrides.seedCollectMaxRounds ?? argv['seed-collect-rounds'],
|
|
318
|
+
0,
|
|
319
|
+
);
|
|
108
320
|
|
|
109
321
|
const dryRun = parseBool(argv['dry-run'], false);
|
|
110
322
|
const disableDryRun = parseBool(argv['no-dry-run'], false);
|
|
@@ -122,7 +334,10 @@ function buildTemplateOptions(argv, profileId, overrides = {}) {
|
|
|
122
334
|
tabCount,
|
|
123
335
|
noteIntervalMs,
|
|
124
336
|
maxNotes,
|
|
337
|
+
maxComments,
|
|
125
338
|
maxLikesPerRound,
|
|
339
|
+
resume,
|
|
340
|
+
incrementalMax,
|
|
126
341
|
matchMode,
|
|
127
342
|
matchMinHits,
|
|
128
343
|
matchKeywords,
|
|
@@ -135,11 +350,15 @@ function buildTemplateOptions(argv, profileId, overrides = {}) {
|
|
|
135
350
|
doReply: parseBool(argv['do-reply'], false) && !effectiveDryRun,
|
|
136
351
|
doOcr: parseBool(argv['do-ocr'], false),
|
|
137
352
|
persistComments: parseBool(argv['persist-comments'], !effectiveDryRun),
|
|
353
|
+
sharedHarvestPath,
|
|
354
|
+
searchSerialKey,
|
|
355
|
+
seedCollectCount,
|
|
356
|
+
seedCollectMaxRounds,
|
|
138
357
|
};
|
|
139
358
|
return { ...base, ...overrides };
|
|
140
359
|
}
|
|
141
360
|
|
|
142
|
-
function
|
|
361
|
+
function buildEvenShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
|
|
143
362
|
const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
|
|
144
363
|
if (uniqueProfiles.length === 0) return [];
|
|
145
364
|
|
|
@@ -156,6 +375,27 @@ function buildShardPlan({ profiles, totalNotes, defaultMaxNotes }) {
|
|
|
156
375
|
return plan.filter((item) => item.assignedNotes > 0);
|
|
157
376
|
}
|
|
158
377
|
|
|
378
|
+
function buildDynamicWavePlan({ profiles, remainingNotes }) {
|
|
379
|
+
const uniqueProfiles = Array.from(new Set(profiles.map((item) => String(item || '').trim()).filter(Boolean)));
|
|
380
|
+
if (uniqueProfiles.length === 0) return [];
|
|
381
|
+
const remaining = Math.max(0, Number(remainingNotes) || 0);
|
|
382
|
+
if (remaining <= 0) return [];
|
|
383
|
+
|
|
384
|
+
if (remaining < uniqueProfiles.length) {
|
|
385
|
+
return uniqueProfiles.slice(0, remaining).map((profileId) => ({
|
|
386
|
+
profileId,
|
|
387
|
+
assignedNotes: 1,
|
|
388
|
+
}));
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const waveTotal = remaining - (remaining % uniqueProfiles.length);
|
|
392
|
+
return buildEvenShardPlan({
|
|
393
|
+
profiles: uniqueProfiles,
|
|
394
|
+
totalNotes: waveTotal > 0 ? waveTotal : remaining,
|
|
395
|
+
defaultMaxNotes: 1,
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
159
399
|
function createProfileStats(spec) {
|
|
160
400
|
return {
|
|
161
401
|
assignedNotes: spec.assignedNotes,
|
|
@@ -211,10 +451,15 @@ function updateProfileStatsFromEvent(stats, payload) {
|
|
|
211
451
|
if (event !== 'autoscript:operation_done') return;
|
|
212
452
|
|
|
213
453
|
const operationId = String(payload.operationId || '').trim();
|
|
214
|
-
const
|
|
454
|
+
const rawResult = payload.result && typeof payload.result === 'object' ? payload.result : {};
|
|
455
|
+
const result = rawResult.result && typeof rawResult.result === 'object'
|
|
456
|
+
? rawResult.result
|
|
457
|
+
: rawResult;
|
|
215
458
|
|
|
216
459
|
if (operationId === 'open_first_detail' || operationId === 'open_next_detail') {
|
|
217
|
-
|
|
460
|
+
if (result.opened === true) {
|
|
461
|
+
stats.openedNotes += 1;
|
|
462
|
+
}
|
|
218
463
|
return;
|
|
219
464
|
}
|
|
220
465
|
|
|
@@ -256,11 +501,31 @@ function isObject(value) {
|
|
|
256
501
|
|
|
257
502
|
async function runProfile(spec, argv, baseOverrides = {}) {
|
|
258
503
|
const profileId = spec.profileId;
|
|
504
|
+
const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
|
|
505
|
+
const busPublishable = new Set([
|
|
506
|
+
'xhs.unified.start',
|
|
507
|
+
'xhs.unified.stop',
|
|
508
|
+
'xhs.unified.stop_screenshot',
|
|
509
|
+
'xhs.unified.profile_failed',
|
|
510
|
+
'autoscript:operation_done',
|
|
511
|
+
'autoscript:operation_error',
|
|
512
|
+
'autoscript:operation_terminal',
|
|
513
|
+
'autoscript:operation_recovery_failed',
|
|
514
|
+
]);
|
|
515
|
+
let currentRunId = null;
|
|
259
516
|
const overrides = {
|
|
260
517
|
...baseOverrides,
|
|
261
518
|
maxNotes: spec.assignedNotes,
|
|
262
519
|
outputRoot: spec.outputRoot,
|
|
263
520
|
};
|
|
521
|
+
if (spec.sharedHarvestPath) overrides.sharedHarvestPath = spec.sharedHarvestPath;
|
|
522
|
+
if (spec.searchSerialKey) overrides.searchSerialKey = spec.searchSerialKey;
|
|
523
|
+
if (spec.seedCollectCount !== undefined && spec.seedCollectCount !== null) {
|
|
524
|
+
overrides.seedCollectCount = parseNonNegativeInt(spec.seedCollectCount, 0);
|
|
525
|
+
}
|
|
526
|
+
if (spec.seedCollectMaxRounds !== undefined && spec.seedCollectMaxRounds !== null) {
|
|
527
|
+
overrides.seedCollectMaxRounds = parseNonNegativeInt(spec.seedCollectMaxRounds, 0);
|
|
528
|
+
}
|
|
264
529
|
const options = buildTemplateOptions(argv, profileId, overrides);
|
|
265
530
|
const script = buildXhsUnifiedAutoscript(options);
|
|
266
531
|
const normalized = normalizeAutoscript(script, `xhs-unified:${profileId}`);
|
|
@@ -269,6 +534,31 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
269
534
|
|
|
270
535
|
await ensureDir(path.dirname(spec.logPath));
|
|
271
536
|
const stats = createProfileStats(spec);
|
|
537
|
+
const reporter = createTaskReporter({
|
|
538
|
+
profileId,
|
|
539
|
+
keyword: options.keyword,
|
|
540
|
+
});
|
|
541
|
+
let activeRunId = '';
|
|
542
|
+
const pushTaskSnapshot = (status = 'running') => {
|
|
543
|
+
if (!activeRunId) return;
|
|
544
|
+
void reporter.update(activeRunId, {
|
|
545
|
+
status,
|
|
546
|
+
phase: 'unified',
|
|
547
|
+
progress: {
|
|
548
|
+
total: Math.max(0, Number(spec.assignedNotes) || 0),
|
|
549
|
+
processed: Math.max(0, Number(stats.openedNotes) || 0),
|
|
550
|
+
failed: Math.max(0, Number(stats.operationErrors) || 0),
|
|
551
|
+
},
|
|
552
|
+
stats: {
|
|
553
|
+
notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
|
|
554
|
+
commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
|
|
555
|
+
likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
|
|
556
|
+
repliesGenerated: 0,
|
|
557
|
+
imagesDownloaded: 0,
|
|
558
|
+
ocrProcessed: 0,
|
|
559
|
+
},
|
|
560
|
+
});
|
|
561
|
+
};
|
|
272
562
|
|
|
273
563
|
const logEvent = (payload) => {
|
|
274
564
|
const eventPayload = isObject(payload) ? payload : { event: 'autoscript:raw', payload };
|
|
@@ -277,9 +567,38 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
277
567
|
profileId,
|
|
278
568
|
...eventPayload,
|
|
279
569
|
};
|
|
570
|
+
if (!merged.runId && currentRunId) merged.runId = currentRunId;
|
|
280
571
|
fs.appendFileSync(spec.logPath, `${JSON.stringify(merged)}\n`, 'utf8');
|
|
281
572
|
console.log(JSON.stringify(merged));
|
|
282
573
|
updateProfileStatsFromEvent(stats, merged);
|
|
574
|
+
if (busEnabled && busPublishable.has(String(merged.event || '').trim())) {
|
|
575
|
+
void publishBusEvent(merged);
|
|
576
|
+
}
|
|
577
|
+
const eventName = String(merged.event || '').trim();
|
|
578
|
+
const mergedRunId = String(merged.runId || '').trim();
|
|
579
|
+
if (mergedRunId) activeRunId = mergedRunId;
|
|
580
|
+
const shouldReportEvent = (
|
|
581
|
+
eventName === 'xhs.unified.start'
|
|
582
|
+
|| eventName === 'xhs.unified.stop'
|
|
583
|
+
|| eventName === 'autoscript:start'
|
|
584
|
+
|| eventName === 'autoscript:stop'
|
|
585
|
+
|| eventName === 'autoscript:impact'
|
|
586
|
+
|| eventName === 'autoscript:operation_start'
|
|
587
|
+
|| eventName === 'autoscript:operation_done'
|
|
588
|
+
|| eventName === 'autoscript:operation_error'
|
|
589
|
+
|| eventName === 'autoscript:operation_recovery_failed'
|
|
590
|
+
);
|
|
591
|
+
if (activeRunId && shouldReportEvent) {
|
|
592
|
+
void reporter.pushEvent(activeRunId, eventName, merged);
|
|
593
|
+
}
|
|
594
|
+
if (
|
|
595
|
+
eventName === 'autoscript:operation_done'
|
|
596
|
+
|| eventName === 'autoscript:operation_error'
|
|
597
|
+
|| eventName === 'autoscript:operation_recovery_failed'
|
|
598
|
+
|| eventName === 'autoscript:impact'
|
|
599
|
+
) {
|
|
600
|
+
pushTaskSnapshot('running');
|
|
601
|
+
}
|
|
283
602
|
if (
|
|
284
603
|
merged.event === 'autoscript:event'
|
|
285
604
|
&& merged.subscriptionId === 'login_guard'
|
|
@@ -293,8 +612,45 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
293
612
|
}
|
|
294
613
|
};
|
|
295
614
|
|
|
615
|
+
const runner = new AutoscriptRunner(normalized, {
|
|
616
|
+
profileId,
|
|
617
|
+
log: logEvent,
|
|
618
|
+
});
|
|
619
|
+
|
|
620
|
+
const running = await runner.start();
|
|
621
|
+
currentRunId = running?.runId || currentRunId;
|
|
622
|
+
activeRunId = String(running?.runId || '').trim();
|
|
623
|
+
if (activeRunId) {
|
|
624
|
+
await reporter.ensureCreated(activeRunId, {
|
|
625
|
+
status: 'starting',
|
|
626
|
+
phase: 'unified',
|
|
627
|
+
progress: {
|
|
628
|
+
total: Math.max(0, Number(spec.assignedNotes) || 0),
|
|
629
|
+
processed: 0,
|
|
630
|
+
failed: 0,
|
|
631
|
+
},
|
|
632
|
+
});
|
|
633
|
+
await reporter.update(activeRunId, {
|
|
634
|
+
status: 'running',
|
|
635
|
+
phase: 'unified',
|
|
636
|
+
progress: {
|
|
637
|
+
total: Math.max(0, Number(spec.assignedNotes) || 0),
|
|
638
|
+
processed: 0,
|
|
639
|
+
failed: 0,
|
|
640
|
+
},
|
|
641
|
+
stats: {
|
|
642
|
+
notesProcessed: 0,
|
|
643
|
+
commentsCollected: 0,
|
|
644
|
+
likesPerformed: 0,
|
|
645
|
+
repliesGenerated: 0,
|
|
646
|
+
imagesDownloaded: 0,
|
|
647
|
+
ocrProcessed: 0,
|
|
648
|
+
},
|
|
649
|
+
});
|
|
650
|
+
}
|
|
296
651
|
logEvent({
|
|
297
652
|
event: 'xhs.unified.start',
|
|
653
|
+
runId: running?.runId || null,
|
|
298
654
|
keyword: options.keyword,
|
|
299
655
|
env: options.env,
|
|
300
656
|
maxNotes: options.maxNotes,
|
|
@@ -302,13 +658,6 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
302
658
|
outputRoot: options.outputRoot,
|
|
303
659
|
parallelRunLabel: spec.runLabel,
|
|
304
660
|
});
|
|
305
|
-
|
|
306
|
-
const runner = new AutoscriptRunner(normalized, {
|
|
307
|
-
profileId,
|
|
308
|
-
log: logEvent,
|
|
309
|
-
});
|
|
310
|
-
|
|
311
|
-
const running = await runner.start();
|
|
312
661
|
const done = await running.done;
|
|
313
662
|
|
|
314
663
|
const stopPayload = {
|
|
@@ -321,7 +670,47 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
321
670
|
};
|
|
322
671
|
logEvent(stopPayload);
|
|
323
672
|
|
|
673
|
+
const stopScreenshotPath = await captureStopScreenshot({
|
|
674
|
+
profileId,
|
|
675
|
+
reason: stopPayload.reason || 'stop',
|
|
676
|
+
outputDir: path.dirname(spec.logPath),
|
|
677
|
+
});
|
|
678
|
+
if (stopScreenshotPath) {
|
|
679
|
+
logEvent({
|
|
680
|
+
event: 'xhs.unified.stop_screenshot',
|
|
681
|
+
profileId,
|
|
682
|
+
runId: stopPayload.runId,
|
|
683
|
+
reason: stopPayload.reason || null,
|
|
684
|
+
path: stopScreenshotPath,
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
|
|
324
688
|
stats.stopReason = stopPayload.reason;
|
|
689
|
+
const finalRunId = String(stopPayload.runId || activeRunId || '').trim();
|
|
690
|
+
if (finalRunId) {
|
|
691
|
+
activeRunId = finalRunId;
|
|
692
|
+
const failed = stopPayload.reason === 'script_failure';
|
|
693
|
+
await reporter.update(finalRunId, {
|
|
694
|
+
status: failed ? 'failed' : 'completed',
|
|
695
|
+
phase: 'unified',
|
|
696
|
+
progress: {
|
|
697
|
+
total: Math.max(0, Number(spec.assignedNotes) || 0),
|
|
698
|
+
processed: Math.max(0, Number(stats.openedNotes) || 0),
|
|
699
|
+
failed: Math.max(0, Number(stats.operationErrors) || 0),
|
|
700
|
+
},
|
|
701
|
+
stats: {
|
|
702
|
+
notesProcessed: Math.max(0, Number(stats.openedNotes) || 0),
|
|
703
|
+
commentsCollected: Math.max(0, Number(stats.commentsCollected) || 0),
|
|
704
|
+
likesPerformed: Math.max(0, Number(stats.likesNewCount) || 0),
|
|
705
|
+
repliesGenerated: 0,
|
|
706
|
+
imagesDownloaded: 0,
|
|
707
|
+
ocrProcessed: 0,
|
|
708
|
+
},
|
|
709
|
+
});
|
|
710
|
+
if (failed) {
|
|
711
|
+
await reporter.setError(finalRunId, `autoscript stopped: ${stopPayload.reason || 'script_failure'}`, 'SCRIPT_FAILURE', false);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
325
714
|
|
|
326
715
|
const profileResult = {
|
|
327
716
|
ok: stopPayload.reason !== 'script_failure',
|
|
@@ -331,6 +720,7 @@ async function runProfile(spec, argv, baseOverrides = {}) {
|
|
|
331
720
|
assignedNotes: spec.assignedNotes,
|
|
332
721
|
outputRoot: options.outputRoot,
|
|
333
722
|
logPath: spec.logPath,
|
|
723
|
+
stopScreenshotPath: stopScreenshotPath || null,
|
|
334
724
|
stats,
|
|
335
725
|
};
|
|
336
726
|
|
|
@@ -527,93 +917,49 @@ export async function runUnified(argv, overrides = {}) {
|
|
|
527
917
|
const keyword = String(argv.keyword || argv.k || '').trim();
|
|
528
918
|
if (!keyword) throw new Error('missing --keyword');
|
|
529
919
|
|
|
530
|
-
const env = String(argv.env || '
|
|
920
|
+
const env = String(argv.env || 'prod').trim() || 'prod';
|
|
921
|
+
const busEnabled = parseBool(argv['bus-events'], false) || process.env.WEBAUTO_BUS_EVENTS === '1';
|
|
531
922
|
const profiles = parseProfiles(argv);
|
|
532
923
|
if (profiles.length === 0) throw new Error('missing --profile or --profiles or --profilepool');
|
|
533
|
-
|
|
534
|
-
const accountStates = await syncXhsAccountsByProfiles(profiles);
|
|
535
|
-
const executableProfiles = accountStates
|
|
536
|
-
.filter((item) => item?.valid === true && Boolean(String(item?.accountId || '').trim()))
|
|
537
|
-
.map((item) => item.profileId);
|
|
538
|
-
const invalidProfiles = accountStates.filter((item) => !item || item.valid !== true);
|
|
539
|
-
if (executableProfiles.length === 0) {
|
|
540
|
-
throw new Error(`no valid business accounts: ${invalidProfiles.map((item) => `${item.profileId}:${item.reason || 'invalid'}`).join(', ')}`);
|
|
541
|
-
}
|
|
542
|
-
|
|
924
|
+
await Promise.all(profiles.map((profileId) => ensureProfileSession(profileId)));
|
|
543
925
|
const defaultMaxNotes = parseIntFlag(argv['max-notes'] ?? argv.target, 30, 1);
|
|
544
926
|
const totalNotes = parseNonNegativeInt(argv['total-notes'] ?? argv['total-target'], 0);
|
|
545
|
-
const
|
|
546
|
-
|
|
547
|
-
|
|
927
|
+
const hasTotalTarget = totalNotes > 0;
|
|
928
|
+
const maxWaves = parseIntFlag(argv['max-waves'], 40, 1);
|
|
548
929
|
const parallelRequested = parseBool(argv.parallel, false);
|
|
549
|
-
const
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
|
|
930
|
+
const configuredConcurrency = parseIntFlag(argv.concurrency, profiles.length || 1, 1);
|
|
931
|
+
const planOnly = parseBool(argv['plan-only'], false);
|
|
932
|
+
const seedCollectCountFlag = parseNonNegativeInt(argv['seed-collect-count'], 0);
|
|
933
|
+
const seedCollectRoundsFlag = parseNonNegativeInt(argv['seed-collect-rounds'], 6);
|
|
553
934
|
|
|
554
935
|
const runLabel = formatRunLabel();
|
|
555
936
|
const baseOutputRoot = resolveDownloadRoot(argv['output-root']);
|
|
937
|
+
const outputRootArg = String(argv['output-root'] || '').trim();
|
|
938
|
+
const useShardRoots = profiles.length > 1;
|
|
939
|
+
const sharedHarvestPath = profiles.length > 1
|
|
940
|
+
? path.join(baseOutputRoot, 'xiaohongshu', sanitizeForPath(env, 'prod'), sanitizeForPath(keyword, 'unknown'), 'merged', `run-${runLabel}`, 'coord', 'harvest-note-claims.json')
|
|
941
|
+
: '';
|
|
942
|
+
const searchSerialKey = `${sanitizeForPath(env, 'prod')}:${sanitizeForPath(keyword, 'unknown')}:${runLabel}`;
|
|
556
943
|
const mergedDir = path.join(
|
|
557
944
|
baseOutputRoot,
|
|
558
945
|
'xiaohongshu',
|
|
559
|
-
sanitizeForPath(env, '
|
|
946
|
+
sanitizeForPath(env, 'prod'),
|
|
560
947
|
sanitizeForPath(keyword, 'unknown'),
|
|
561
948
|
'merged',
|
|
562
949
|
`run-${runLabel}`,
|
|
563
950
|
);
|
|
564
951
|
const planPath = path.join(mergedDir, 'plan.json');
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
logPath: path.join(mergedDir, 'profiles', `${shardId}.events.jsonl`),
|
|
577
|
-
summaryPath: path.join(mergedDir, 'profiles', `${shardId}.summary.json`),
|
|
578
|
-
};
|
|
579
|
-
});
|
|
580
|
-
|
|
581
|
-
const planPayload = {
|
|
582
|
-
event: 'xhs.unified.plan',
|
|
583
|
-
planPath,
|
|
584
|
-
keyword,
|
|
585
|
-
env,
|
|
586
|
-
totalNotes: totalNotes > 0 ? totalNotes : null,
|
|
587
|
-
defaultMaxNotes,
|
|
588
|
-
parallel,
|
|
589
|
-
concurrency,
|
|
590
|
-
accountStates,
|
|
591
|
-
skippedProfiles: invalidProfiles.map((item) => ({
|
|
592
|
-
profileId: item?.profileId || null,
|
|
593
|
-
status: item?.status || 'invalid',
|
|
594
|
-
reason: item?.reason || 'invalid',
|
|
595
|
-
valid: item?.valid === true,
|
|
596
|
-
accountId: item?.accountId || null,
|
|
597
|
-
})),
|
|
598
|
-
specs: specs.map((item) => ({
|
|
599
|
-
profileId: item.profileId,
|
|
600
|
-
assignedNotes: item.assignedNotes,
|
|
601
|
-
outputRoot: item.outputRoot,
|
|
602
|
-
logPath: item.logPath,
|
|
603
|
-
})),
|
|
604
|
-
};
|
|
605
|
-
console.log(JSON.stringify(planPayload));
|
|
606
|
-
|
|
607
|
-
await writeJson(planPath, planPayload);
|
|
608
|
-
|
|
609
|
-
if (parseBool(argv['plan-only'], false)) {
|
|
610
|
-
return {
|
|
611
|
-
ok: true,
|
|
612
|
-
planOnly: true,
|
|
613
|
-
planPath,
|
|
614
|
-
specs,
|
|
615
|
-
};
|
|
616
|
-
}
|
|
952
|
+
const completedAtStart = hasTotalTarget
|
|
953
|
+
? await collectCompletedNoteIds(baseOutputRoot, env, keyword)
|
|
954
|
+
: { count: 0, noteIds: [] };
|
|
955
|
+
let remainingNotes = hasTotalTarget
|
|
956
|
+
? Math.max(0, totalNotes - completedAtStart.count)
|
|
957
|
+
: defaultMaxNotes;
|
|
958
|
+
|
|
959
|
+
const skippedProfileMap = new Map();
|
|
960
|
+
const wavePlans = [];
|
|
961
|
+
const allResults = [];
|
|
962
|
+
let finalAccountStates = [];
|
|
617
963
|
|
|
618
964
|
const execute = async (spec) => {
|
|
619
965
|
try {
|
|
@@ -667,9 +1013,159 @@ export async function runUnified(argv, overrides = {}) {
|
|
|
667
1013
|
}
|
|
668
1014
|
};
|
|
669
1015
|
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
1016
|
+
for (let wave = 1; wave <= maxWaves; wave += 1) {
|
|
1017
|
+
if (hasTotalTarget && remainingNotes <= 0) break;
|
|
1018
|
+
if (!hasTotalTarget && wave > 1) break;
|
|
1019
|
+
|
|
1020
|
+
let executableProfiles = [];
|
|
1021
|
+
if (planOnly) {
|
|
1022
|
+
executableProfiles = profiles.slice();
|
|
1023
|
+
finalAccountStates = executableProfiles.map((profileId) => ({
|
|
1024
|
+
profileId,
|
|
1025
|
+
status: 'plan_only_unverified',
|
|
1026
|
+
reason: 'plan_only_skip_account_sync',
|
|
1027
|
+
valid: null,
|
|
1028
|
+
accountId: null,
|
|
1029
|
+
}));
|
|
1030
|
+
} else {
|
|
1031
|
+
const accountStates = await syncXhsAccountsByProfiles(profiles);
|
|
1032
|
+
finalAccountStates = accountStates;
|
|
1033
|
+
executableProfiles = accountStates
|
|
1034
|
+
.filter((item) => item?.valid === true && Boolean(String(item?.accountId || '').trim()))
|
|
1035
|
+
.map((item) => item.profileId);
|
|
1036
|
+
const invalidProfiles = accountStates.filter((item) => !item || item.valid !== true);
|
|
1037
|
+
for (const item of invalidProfiles) {
|
|
1038
|
+
const profileId = String(item?.profileId || '').trim();
|
|
1039
|
+
if (!profileId) continue;
|
|
1040
|
+
skippedProfileMap.set(profileId, {
|
|
1041
|
+
profileId,
|
|
1042
|
+
status: item?.status || 'invalid',
|
|
1043
|
+
reason: item?.reason || 'invalid',
|
|
1044
|
+
valid: item?.valid === true,
|
|
1045
|
+
accountId: item?.accountId || null,
|
|
1046
|
+
});
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
if (executableProfiles.length === 0) {
|
|
1050
|
+
if (wave === 1) {
|
|
1051
|
+
throw new Error(`no valid business accounts: ${invalidProfiles.map((item) => `${item.profileId}:${item.reason || 'invalid'}`).join(', ')}`);
|
|
1052
|
+
}
|
|
1053
|
+
break;
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
const plan = hasTotalTarget
|
|
1058
|
+
? buildDynamicWavePlan({ profiles: executableProfiles, remainingNotes })
|
|
1059
|
+
: buildEvenShardPlan({ profiles: executableProfiles, totalNotes: 0, defaultMaxNotes });
|
|
1060
|
+
if (plan.length === 0) break;
|
|
1061
|
+
|
|
1062
|
+
const parallel = parallelRequested && plan.length > 1;
|
|
1063
|
+
const concurrency = parallel
|
|
1064
|
+
? Math.min(plan.length, configuredConcurrency)
|
|
1065
|
+
: 1;
|
|
1066
|
+
const waveTag = `wave-${String(wave).padStart(3, '0')}`;
|
|
1067
|
+
const specs = plan.map((item, index) => {
|
|
1068
|
+
const shardId = sanitizeForPath(item.profileId, 'profile');
|
|
1069
|
+
const shardOutputRoot = useShardRoots
|
|
1070
|
+
? path.join(baseOutputRoot, 'shards', shardId)
|
|
1071
|
+
: outputRootArg;
|
|
1072
|
+
const defaultSeedCollectCount = Math.max(1, Math.min(
|
|
1073
|
+
Number(item.assignedNotes || 1),
|
|
1074
|
+
Math.max(1, plan.length * 2),
|
|
1075
|
+
));
|
|
1076
|
+
const seedCollectCount = index === 0
|
|
1077
|
+
? (seedCollectCountFlag > 0 ? seedCollectCountFlag : defaultSeedCollectCount)
|
|
1078
|
+
: 0;
|
|
1079
|
+
return {
|
|
1080
|
+
...item,
|
|
1081
|
+
runLabel,
|
|
1082
|
+
waveTag,
|
|
1083
|
+
outputRoot: shardOutputRoot,
|
|
1084
|
+
logPath: path.join(mergedDir, 'profiles', `${waveTag}.${shardId}.events.jsonl`),
|
|
1085
|
+
summaryPath: path.join(mergedDir, 'profiles', `${waveTag}.${shardId}.summary.json`),
|
|
1086
|
+
sharedHarvestPath,
|
|
1087
|
+
searchSerialKey,
|
|
1088
|
+
seedCollectCount,
|
|
1089
|
+
seedCollectMaxRounds: index === 0 ? seedCollectRoundsFlag : 0,
|
|
1090
|
+
};
|
|
1091
|
+
});
|
|
1092
|
+
|
|
1093
|
+
wavePlans.push({
|
|
1094
|
+
wave,
|
|
1095
|
+
waveTag,
|
|
1096
|
+
remainingBefore: remainingNotes,
|
|
1097
|
+
parallel,
|
|
1098
|
+
concurrency,
|
|
1099
|
+
specs: specs.map((item) => ({
|
|
1100
|
+
profileId: item.profileId,
|
|
1101
|
+
assignedNotes: item.assignedNotes,
|
|
1102
|
+
outputRoot: item.outputRoot,
|
|
1103
|
+
logPath: item.logPath,
|
|
1104
|
+
sharedHarvestPath: item.sharedHarvestPath || null,
|
|
1105
|
+
seedCollectCount: item.seedCollectCount || 0,
|
|
1106
|
+
seedCollectMaxRounds: item.seedCollectMaxRounds || 0,
|
|
1107
|
+
})),
|
|
1108
|
+
});
|
|
1109
|
+
|
|
1110
|
+
if (planOnly) break;
|
|
1111
|
+
|
|
1112
|
+
const waveResults = parallel
|
|
1113
|
+
? await runWithConcurrency(specs, concurrency, execute)
|
|
1114
|
+
: await runWithConcurrency(specs, 1, execute);
|
|
1115
|
+
allResults.push(...waveResults);
|
|
1116
|
+
|
|
1117
|
+
if (hasTotalTarget) {
|
|
1118
|
+
const openedInWave = waveResults.reduce((sum, item) => sum + toNumber(item?.stats?.openedNotes, 0), 0);
|
|
1119
|
+
remainingNotes = Math.max(0, remainingNotes - openedInWave);
|
|
1120
|
+
const waveRecord = wavePlans[wavePlans.length - 1];
|
|
1121
|
+
waveRecord.openedInWave = openedInWave;
|
|
1122
|
+
waveRecord.remainingAfter = remainingNotes;
|
|
1123
|
+
if (openedInWave <= 0) {
|
|
1124
|
+
console.error(JSON.stringify({
|
|
1125
|
+
event: 'xhs.unified.wave_stalled',
|
|
1126
|
+
wave,
|
|
1127
|
+
remainingNotes,
|
|
1128
|
+
}));
|
|
1129
|
+
break;
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
const skippedProfiles = Array.from(skippedProfileMap.values());
|
|
1135
|
+
|
|
1136
|
+
const planPayload = {
|
|
1137
|
+
event: 'xhs.unified.plan',
|
|
1138
|
+
planPath,
|
|
1139
|
+
keyword,
|
|
1140
|
+
env,
|
|
1141
|
+
totalNotes: totalNotes > 0 ? totalNotes : null,
|
|
1142
|
+
defaultMaxNotes,
|
|
1143
|
+
maxWaves,
|
|
1144
|
+
runLabel,
|
|
1145
|
+
hasTotalTarget,
|
|
1146
|
+
completedAtStart: completedAtStart.count,
|
|
1147
|
+
remainingAtPlan: remainingNotes,
|
|
1148
|
+
accountStates: finalAccountStates,
|
|
1149
|
+
skippedProfiles,
|
|
1150
|
+
waves: wavePlans,
|
|
1151
|
+
};
|
|
1152
|
+
console.log(JSON.stringify(planPayload));
|
|
1153
|
+
|
|
1154
|
+
await writeJson(planPath, planPayload);
|
|
1155
|
+
|
|
1156
|
+
if (planOnly) {
|
|
1157
|
+
return {
|
|
1158
|
+
ok: true,
|
|
1159
|
+
planOnly: true,
|
|
1160
|
+
planPath,
|
|
1161
|
+
waves: wavePlans,
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
const results = allResults;
|
|
1166
|
+
if (results.length === 0) {
|
|
1167
|
+
throw new Error(`no executable waves generated, see ${planPath}`);
|
|
1168
|
+
}
|
|
673
1169
|
|
|
674
1170
|
const merged = await mergeProfileOutputs({
|
|
675
1171
|
results,
|
|
@@ -677,26 +1173,55 @@ export async function runUnified(argv, overrides = {}) {
|
|
|
677
1173
|
keyword,
|
|
678
1174
|
env,
|
|
679
1175
|
totalNotes,
|
|
680
|
-
parallel,
|
|
681
|
-
concurrency,
|
|
682
|
-
skippedProfiles
|
|
683
|
-
profileId: item?.profileId || null,
|
|
684
|
-
status: item?.status || 'invalid',
|
|
685
|
-
reason: item?.reason || 'invalid',
|
|
686
|
-
accountId: item?.accountId || null,
|
|
687
|
-
})),
|
|
1176
|
+
parallel: parallelRequested,
|
|
1177
|
+
concurrency: configuredConcurrency,
|
|
1178
|
+
skippedProfiles,
|
|
688
1179
|
});
|
|
689
1180
|
|
|
690
|
-
|
|
1181
|
+
const mergedSummary = {
|
|
1182
|
+
...merged.mergedSummary,
|
|
1183
|
+
progress: {
|
|
1184
|
+
completedAtStart: completedAtStart.count,
|
|
1185
|
+
completedDuringRun: toNumber(merged.mergedSummary?.totals?.openedNotes, 0),
|
|
1186
|
+
targetTotal: hasTotalTarget ? totalNotes : null,
|
|
1187
|
+
remainingAfterRun: hasTotalTarget ? Math.max(0, remainingNotes) : null,
|
|
1188
|
+
reachedTarget: hasTotalTarget ? remainingNotes <= 0 : null,
|
|
1189
|
+
},
|
|
1190
|
+
waves: wavePlans,
|
|
1191
|
+
};
|
|
1192
|
+
await writeJson(merged.summaryPath, mergedSummary);
|
|
1193
|
+
|
|
1194
|
+
const mergedEvent = {
|
|
691
1195
|
event: 'xhs.unified.merged',
|
|
692
1196
|
summaryPath: merged.summaryPath,
|
|
1197
|
+
waves: wavePlans.length,
|
|
693
1198
|
profilesTotal: results.length,
|
|
694
1199
|
profilesSucceeded: results.filter((item) => item.ok).length,
|
|
695
1200
|
profilesFailed: results.filter((item) => !item.ok).length,
|
|
696
|
-
|
|
1201
|
+
remainingNotes: hasTotalTarget ? remainingNotes : null,
|
|
1202
|
+
};
|
|
1203
|
+
console.log(JSON.stringify(mergedEvent));
|
|
1204
|
+
if (busEnabled) {
|
|
1205
|
+
void publishBusEvent(mergedEvent);
|
|
1206
|
+
}
|
|
697
1207
|
|
|
698
|
-
|
|
699
|
-
|
|
1208
|
+
const failedResults = results.filter((item) => !item.ok);
|
|
1209
|
+
if (hasTotalTarget && remainingNotes > 0) {
|
|
1210
|
+
throw new Error(`target not reached, remaining=${remainingNotes}, see ${merged.summaryPath}`);
|
|
1211
|
+
}
|
|
1212
|
+
if (failedResults.length > 0) {
|
|
1213
|
+
if (hasTotalTarget && remainingNotes <= 0) {
|
|
1214
|
+
console.warn(JSON.stringify({
|
|
1215
|
+
event: 'xhs.unified.partial_failures_tolerated',
|
|
1216
|
+
summaryPath: merged.summaryPath,
|
|
1217
|
+
failedProfiles: failedResults.map((item) => ({
|
|
1218
|
+
profileId: item.profileId,
|
|
1219
|
+
reason: item.reason || null,
|
|
1220
|
+
})),
|
|
1221
|
+
}));
|
|
1222
|
+
} else {
|
|
1223
|
+
throw new Error(`unified finished with failures, see ${merged.summaryPath}`);
|
|
1224
|
+
}
|
|
700
1225
|
}
|
|
701
1226
|
|
|
702
1227
|
return {
|
|
@@ -717,10 +1242,18 @@ async function main() {
|
|
|
717
1242
|
' --max-notes <n> 单账号目标(未启用 total-notes 时)',
|
|
718
1243
|
' --total-notes <n> 总目标数(自动分片到账号)',
|
|
719
1244
|
' --total-target <n> total-notes 别名',
|
|
1245
|
+
' --max-waves <n> 动态分片最大波次(默认40)',
|
|
720
1246
|
' --parallel 启用并行执行',
|
|
1247
|
+
' --bus-events <bool> 启用 UI 事件总线推送(默认 false)',
|
|
721
1248
|
' --concurrency <n> 并行度(默认=账号数)',
|
|
1249
|
+
' --resume <bool> 断点续传(默认 false)',
|
|
1250
|
+
' --incremental-max <bool> max-notes 作为增量配额(默认 true)',
|
|
722
1251
|
' --plan-only 只生成分片计划,不执行',
|
|
723
1252
|
' --output-root <path> 输出根目录(并行时自动分 profile shard)',
|
|
1253
|
+
' --seed-collect-count <n> 首账号预采样去重ID数量(默认按分片自动)',
|
|
1254
|
+
' --seed-collect-rounds <n> 首账号预采样滚动轮数(默认6)',
|
|
1255
|
+
' --search-serial-key <key> 搜索阶段串行锁key(默认自动生成)',
|
|
1256
|
+
' --shared-harvest-path <path> 共享harvest去重列表路径(默认自动生成)',
|
|
724
1257
|
].join('\n'));
|
|
725
1258
|
return;
|
|
726
1259
|
}
|