@web-auto/webauto 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +915 -85
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2415 -470
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +236 -29
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +14 -5
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Workflow Block: WeiboCollectCommentsBlock
3
+ *
4
+ * 微博评论采集 - 类似小红书结构
5
+ * - 点击评论图标展开评论区
6
+ * - 滚动加载主评论
7
+ * - 展开回复(支持多级)
8
+ * - 触底检测和统计
9
+ */
10
+
11
+ import os from 'node:os';
12
+ import path from 'node:path';
13
+ import { promises as fs } from 'node:fs';
14
+
15
+ export interface WeiboCollectCommentsInput {
16
+ sessionId: string;
17
+ serviceUrl?: string;
18
+ maxComments?: number;
19
+ maxRounds?: number;
20
+ expandReplies?: boolean;
21
+ }
22
+
23
+ export interface WeiboComment {
24
+ id?: string;
25
+ author: string;
26
+ content: string;
27
+ timestamp?: string;
28
+ likeCount?: number;
29
+ replyCount?: number;
30
+ isReply?: boolean;
31
+ parentId?: string;
32
+ level: number;
33
+ }
34
+
35
+ export interface WeiboCollectCommentsOutput {
36
+ success: boolean;
37
+ comments: WeiboComment[];
38
+ totalCollected: number;
39
+ reachedEnd: boolean;
40
+ emptyState: boolean;
41
+ stats: {
42
+ mainComments: number;
43
+ replies: number;
44
+ expandedCount: number;
45
+ scrollRounds: number;
46
+ };
47
+ error?: string;
48
+ }
49
+
50
+ function isDebugArtifactsEnabled(): boolean {
51
+ return (
52
+ process.env.WEBAUTO_DEBUG === '1' ||
53
+ process.env.WEBAUTO_DEBUG_ARTIFACTS === '1'
54
+ );
55
+ }
56
+
57
+ export async function execute(input: WeiboCollectCommentsInput): Promise<WeiboCollectCommentsOutput> {
58
+ const {
59
+ sessionId,
60
+ serviceUrl = 'http://127.0.0.1:7704',
61
+ maxComments = 0, // 0 = no limit
62
+ maxRounds = 30,
63
+ expandReplies = true,
64
+ } = input;
65
+
66
+ const profile = sessionId;
67
+ const controllerUrl = `${serviceUrl}/command`;
68
+ const comments: WeiboComment[] = [];
69
+ const seenIds = new Set<string>();
70
+ let scrollRounds = 0;
71
+ let expandedCount = 0;
72
+ let reachedEnd = false;
73
+ let emptyState = false;
74
+
75
+ async function controllerAction(action: string, args: any = {}): Promise<any> {
76
+ const res = await fetch(controllerUrl, {
77
+ method: 'POST',
78
+ headers: { 'Content-Type': 'application/json' },
79
+ body: JSON.stringify({ action, args: { profileId: profile, ...args } }),
80
+ signal: (AbortSignal as any).timeout ? (AbortSignal as any).timeout(30000) : undefined,
81
+ });
82
+ const raw = await res.text();
83
+ if (!res.ok) throw new Error(`HTTP ${res.status}: ${raw}`);
84
+ let data: any = {};
85
+ try { data = raw ? JSON.parse(raw) : {}; } catch { data = { raw }; }
86
+ return data;
87
+ }
88
+
89
+ function unwrapResult(response: any): any {
90
+ if (response && typeof response === 'object') {
91
+ if ('result' in response) return response.result;
92
+ if (response.data && typeof response.data === 'object' && 'result' in response.data) {
93
+ return response.data.result;
94
+ }
95
+ if ('data' in response) return response.data;
96
+ }
97
+ return response;
98
+ }
99
+
100
+ // 点击评论图标展开评论区
101
+ async function clickCommentIcon(): Promise<boolean> {
102
+ const script = `
103
+ (() => {
104
+ const icon = document.querySelector('.woo-font--comment');
105
+ if (!icon) return { success: false, error: 'comment_icon_not_found' };
106
+ const btn = icon.closest('div[role=button], .woo-box-flex, button') || icon.parentElement;
107
+ if (!btn) return { success: false, error: 'comment_button_not_found' };
108
+ btn.click();
109
+ return { success: true, rect: btn.getBoundingClientRect().toJSON() };
110
+ })()
111
+ `;
112
+ const res = await controllerAction('evaluate', { script });
113
+ const result = unwrapResult(res);
114
+ return result?.success === true;
115
+ }
116
+
117
+ // 获取评论区容器
118
+ async function getCommentContainer(): Promise<Element | null> {
119
+ const script = `
120
+ (() => {
121
+ // 微博评论区可能在展开后的不同位置
122
+ const containers = [
123
+ document.querySelector('[class*="comment_list"]'),
124
+ document.querySelector('[class*="Comment_list"]'),
125
+ document.querySelector('[class*="comment-list"]'),
126
+ document.querySelector('section[class*="comment"]'),
127
+ document.querySelector('article + div'), // 文章后面的评论区
128
+ ];
129
+ const found = containers.find(el => el && el.children.length > 0);
130
+ return found ? { found: true, className: found.className } : { found: false };
131
+ })()
132
+ `;
133
+ const res = await controllerAction('evaluate', { script });
134
+ return unwrapResult(res);
135
+ }
136
+
137
+ // 提取当前可见的评论
138
+ async function extractComments(): Promise<WeiboComment[]> {
139
+ const script = `
140
+ (() => {
141
+ const results = [];
142
+ const items = document.querySelectorAll('[class*="Comment_item"], [class*="comment-item"], .list_li, [class*="reply"]');
143
+ items.forEach((item, index) => {
144
+ const authorEl = item.querySelector('[class*="author"], .user-name, .name, [class*="nick"]');
145
+ const contentEl = item.querySelector('[class*="content"], .WB_text, .txt, [class*="text"]');
146
+ const likeEl = item.querySelector('[class*="like"], .praised, [class*="agree"]');
147
+ const timeEl = item.querySelector('time, [class*="time"], [class*="date"]');
148
+
149
+ // 检测是否为回复
150
+ const isReply = item.className.toLowerCase().includes('reply') ||
151
+ item.closest('[class*="reply"]') !== null;
152
+
153
+ const id = item.getAttribute('data-id') || item.id || 'item_' + index;
154
+
155
+ results.push({
156
+ id,
157
+ author: authorEl?.textContent?.trim() || '匿名',
158
+ content: contentEl?.textContent?.trim() || '',
159
+ timestamp: timeEl?.textContent?.trim() || timeEl?.getAttribute('datetime') || '',
160
+ likeCount: parseInt(likeEl?.textContent?.match(/\\d+/)?.[0] || '0'),
161
+ isReply,
162
+ level: isReply ? 2 : 1,
163
+ });
164
+ });
165
+ return results;
166
+ })()
167
+ `;
168
+ const res = await controllerAction('evaluate', { script });
169
+ const result = unwrapResult(res);
170
+ return Array.isArray(result) ? result : [];
171
+ }
172
+
173
+ // 查找并点击展开回复按钮
174
+ async function expandReplyButtons(): Promise<number> {
175
+ const script = `
176
+ (() => {
177
+ const buttons = Array.from(document.querySelectorAll('a, button, span'))
178
+ .filter(el => {
179
+ const text = el.textContent?.trim() || '';
180
+ return text.includes('收起回复') || text.includes('回复') || text.includes('展开');
181
+ })
182
+ .filter(el => {
183
+ const rect = el.getBoundingClientRect();
184
+ return rect.top > 0 && rect.bottom < window.innerHeight && rect.width > 0;
185
+ });
186
+
187
+ let clicked = 0;
188
+ buttons.slice(0, 3).forEach(btn => {
189
+ btn.click();
190
+ clicked++;
191
+ });
192
+ return clicked;
193
+ })()
194
+ `;
195
+ const res = await controllerAction('evaluate', { script });
196
+ const result = unwrapResult(res);
197
+ return typeof result === 'number' ? result : 0;
198
+ }
199
+
200
+ // 滚动评论区
201
+ async function scrollComments(): Promise<boolean> {
202
+ const script = `
203
+ (() => {
204
+ const container = document.querySelector('[class*="comment_list"], [class*="Comment_list"]') ||
205
+ document.scrollingElement;
206
+ if (!container) return { hasMore: false };
207
+
208
+ const beforeScroll = container.scrollTop;
209
+ container.scrollTop += 500;
210
+
211
+ // 检查是否还能滚动
212
+ const hasMore = container.scrollTop > beforeScroll ||
213
+ container.scrollHeight - container.scrollTop > container.clientHeight + 100;
214
+
215
+ return { hasMore, scrollTop: container.scrollTop, scrollHeight: container.scrollHeight };
216
+ })()
217
+ `;
218
+ await controllerAction('evaluate', { script });
219
+ await new Promise(r => setTimeout(r, 1000));
220
+ return true;
221
+ }
222
+
223
+ // 检测是否到底
224
+ async function checkEndState(): Promise<{ reachedEnd: boolean; emptyState: boolean }> {
225
+ const script = `
226
+ (() => {
227
+ const noMoreText = Array.from(document.querySelectorAll('*'))
228
+ .find(el => el.textContent?.includes('没有更多'));
229
+ const loading = document.querySelector('[class*="loading"], [class*="Loading"]');
230
+ return {
231
+ reachedEnd: !!noMoreText,
232
+ emptyState: document.querySelectorAll('[class*="comment"]').length === 0
233
+ };
234
+ })()
235
+ `;
236
+ const res = await controllerAction('evaluate', { script });
237
+ const result = unwrapResult(res);
238
+ return typeof result === 'object' && result !== null ? result : { reachedEnd: false, emptyState: false };
239
+ }
240
+
241
+ try {
242
+ // 1. 点击评论图标展开评论区
243
+ const clicked = await clickCommentIcon();
244
+ if (!clicked) {
245
+ return {
246
+ success: false,
247
+ comments: [],
248
+ totalCollected: 0,
249
+ reachedEnd: false,
250
+ emptyState: true,
251
+ stats: { mainComments: 0, replies: 0, expandedCount: 0, scrollRounds: 0 },
252
+ error: 'Failed to click comment icon',
253
+ };
254
+ }
255
+
256
+ // 等待评论区加载
257
+ await new Promise(r => setTimeout(r, 2000));
258
+
259
+ // 2. 循环滚动和采集
260
+ while (scrollRounds < maxRounds) {
261
+ if (maxComments > 0 && comments.length >= maxComments) break;
262
+
263
+ // 展开回复按钮
264
+ if (expandReplies) {
265
+ const expanded = await expandReplyButtons();
266
+ expandedCount += expanded;
267
+ if (expanded > 0) await new Promise(r => setTimeout(r, 800));
268
+ }
269
+
270
+ // 提取评论
271
+ const newComments = await extractComments();
272
+ let addedCount = 0;
273
+
274
+ for (const comment of newComments) {
275
+ const key = `${comment.author}:${comment.content.slice(0, 30)}`;
276
+ if (!seenIds.has(key)) {
277
+ seenIds.add(key);
278
+ comments.push(comment);
279
+ addedCount++;
280
+ }
281
+ }
282
+
283
+ console.log(`[WeiboComments] Round ${scrollRounds + 1}: collected ${comments.length} comments, new: ${addedCount}`);
284
+
285
+ // 检测是否到底
286
+ const endState = await checkEndState();
287
+ if (endState.reachedEnd || endState.emptyState) {
288
+ reachedEnd = endState.reachedEnd;
289
+ emptyState = endState.emptyState;
290
+ break;
291
+ }
292
+
293
+ // 滚动
294
+ await scrollComments();
295
+ scrollRounds++;
296
+
297
+ // 如果没有新增,再试一次后退出
298
+ if (addedCount === 0 && scrollRounds > 3) {
299
+ const endCheck = await checkEndState();
300
+ if (endCheck.reachedEnd) {
301
+ reachedEnd = true;
302
+ break;
303
+ }
304
+ }
305
+ }
306
+
307
+ const mainComments = comments.filter(c => c.level === 1).length;
308
+ const replies = comments.filter(c => c.level > 1).length;
309
+
310
+ return {
311
+ success: true,
312
+ comments,
313
+ totalCollected: comments.length,
314
+ reachedEnd,
315
+ emptyState,
316
+ stats: {
317
+ mainComments,
318
+ replies,
319
+ expandedCount,
320
+ scrollRounds,
321
+ },
322
+ };
323
+ } catch (error: any) {
324
+ return {
325
+ success: false,
326
+ comments,
327
+ totalCollected: comments.length,
328
+ reachedEnd,
329
+ emptyState,
330
+ stats: {
331
+ mainComments: comments.filter(c => c.level === 1).length,
332
+ replies: comments.filter(c => c.level > 1).length,
333
+ expandedCount,
334
+ scrollRounds,
335
+ },
336
+ error: error.message,
337
+ };
338
+ }
339
+ }
@@ -0,0 +1,338 @@
1
+ /**
2
+ * Workflow Block: WeiboCollectFromLinksBlock
3
+ *
4
+ * Phase3/4: 从 phase2-links.jsonl 读取链接,逐个打开详情页采集内容和评论
5
+ * 使用 WeiboCollectCommentsBlock 进行评论采集(触底检测 + 展开回复)
6
+ */
7
+
8
+ import os from 'node:os';
9
+ import path from 'node:path';
10
+ import { promises as fs } from 'node:fs';
11
+ import { parsePlatformDate, getCurrentTimestamp } from '../../collection-manager/date-utils.js';
12
+
13
+ export interface WeiboCollectFromLinksInput {
14
+ sessionId: string;
15
+ keyword: string;
16
+ env?: string;
17
+ targetCount: number;
18
+ maxComments?: number;
19
+ collectComments?: boolean;
20
+ serviceUrl?: string;
21
+ }
22
+
23
+ export interface WeiboCollectFromLinksOutput {
24
+ success: boolean;
25
+ keywordDir: string;
26
+ linksPath: string;
27
+ processedCount: number;
28
+ persistedCount: number;
29
+ stats: {
30
+ postsProcessed: number;
31
+ totalComments: number;
32
+ errors: number;
33
+ };
34
+ error?: string;
35
+ }
36
+
37
+ interface WeiboLinkEntry {
38
+ statusId: string;
39
+ userId: string;
40
+ safeUrl: string;
41
+ searchUrl: string;
42
+ authorName?: string;
43
+ contentPreview?: string;
44
+ ts: string;
45
+ }
46
+
47
+ function sanitizeFilenamePart(value: string): string {
48
+ return String(value || '')
49
+ .trim()
50
+ .replace(/[\\/:"*?<>|]+/g, '_')
51
+ .replace(/\s+/g, '_')
52
+ .slice(0, 80);
53
+ }
54
+
55
+ function resolveDownloadRoot(): string {
56
+ const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
57
+ if (custom && custom.trim()) return custom;
58
+ const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
59
+ return path.join(home, '.webauto', 'download');
60
+ }
61
+
62
+ async function readJsonl(filePath: string): Promise<any[]> {
63
+ try {
64
+ const content = await fs.readFile(filePath, 'utf-8');
65
+ return content
66
+ .split('\n')
67
+ .map(l => l.trim())
68
+ .filter(Boolean)
69
+ .map(line => {
70
+ try { return JSON.parse(line); } catch { return null; }
71
+ })
72
+ .filter(Boolean);
73
+ } catch (e: any) {
74
+ if (e?.code === 'ENOENT') return [];
75
+ throw e;
76
+ }
77
+ }
78
+
79
+ async function saveMarkdown(keywordDir: string, entry: WeiboLinkEntry, content: string, comments: any[], commentStats?: any, publishedAtInfo?: { date: string; time: string; fullText: string } | null): Promise<void> {
80
+ const safeStatusId = sanitizeFilenamePart(entry.statusId);
81
+ const mdPath = path.join(keywordDir, `${safeStatusId}.md`);
82
+ const ts = getCurrentTimestamp();
83
+
84
+ const lines = [
85
+ `# ${entry.authorName || '未知作者'}的微博`,
86
+ '',
87
+ `**作者**: ${entry.authorName || '未知'}`,
88
+ `**链接**: ${entry.safeUrl}`,
89
+ `**采集时间**: ${ts.collectedAt}`,
90
+ `**采集时间(本地)**: ${ts.collectedAtLocal}`,
91
+ ...(publishedAtInfo ? [
92
+ `**发布时间**: ${publishedAtInfo.fullText}`,
93
+ `**发布日期**: ${publishedAtInfo.date}`,
94
+ ...(publishedAtInfo.time ? [`**发布时间(时分)**: ${publishedAtInfo.time}`] : []),
95
+ ] : []),
96
+ '',
97
+ '---',
98
+ '',
99
+ '## 内容',
100
+ '',
101
+ content,
102
+ '',
103
+ '---',
104
+ '',
105
+ ];
106
+
107
+ if (comments.length > 0) {
108
+ const mainComments = comments.filter(c => c.level === 1).length;
109
+ const replies = comments.filter(c => c.level > 1).length;
110
+
111
+ lines.push(`## 评论 (主评论: ${mainComments}, 回复: ${replies})`);
112
+
113
+ if (commentStats) {
114
+ lines.push('');
115
+ lines.push(`**统计**: 滚动轮数 ${commentStats.scrollRounds}, 展开回复 ${commentStats.expandedCount} 次`);
116
+ lines.push(`**状态**: ${commentStats.reachedEnd ? '已触底' : '未触底'}`);
117
+ }
118
+
119
+ lines.push('');
120
+
121
+ comments.forEach((c, i) => {
122
+ const indent = c.level > 1 ? ' '.repeat(c.level - 1) : '';
123
+ lines.push(`${indent}### ${i + 1}. ${c.author || '匿名'} ${c.isReply ? '(回复)' : ''}`);
124
+ lines.push('');
125
+ lines.push(`${indent}${c.content || ''}`);
126
+ lines.push('');
127
+ if (c.likeCount > 0) lines.push(`${indent}👍 ${c.likeCount}`);
128
+ lines.push('');
129
+ });
130
+ }
131
+
132
+ await fs.writeFile(mdPath, lines.join('\n'), 'utf-8');
133
+ }
134
+
135
+ export async function execute(input: WeiboCollectFromLinksInput): Promise<WeiboCollectFromLinksOutput> {
136
+ const {
137
+ sessionId,
138
+ keyword,
139
+ env = 'debug',
140
+ targetCount,
141
+ maxComments = 0,
142
+ collectComments: enableComments = false, // 默认不采集评论,加快速度
143
+ serviceUrl = 'http://127.0.0.1:7704',
144
+ } = input;
145
+
146
+ const profile = sessionId;
147
+ const controllerUrl = `${serviceUrl}/command`;
148
+
149
+ const keywordDir = path.join(resolveDownloadRoot(), 'weibo', env, sanitizeFilenamePart(keyword));
150
+ const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
151
+
152
+ const links: WeiboLinkEntry[] = await readJsonl(linksPath);
153
+ if (links.length === 0) {
154
+ return {
155
+ success: false,
156
+ keywordDir,
157
+ linksPath,
158
+ processedCount: 0,
159
+ persistedCount: 0,
160
+ stats: { postsProcessed: 0, totalComments: 0, errors: 0 },
161
+ error: 'No links found in phase2-links.jsonl',
162
+ };
163
+ }
164
+
165
+ async function controllerAction(action: string, args: any = {}): Promise<any> {
166
+ const res = await fetch(controllerUrl, {
167
+ method: 'POST',
168
+ headers: { 'Content-Type': 'application/json' },
169
+ body: JSON.stringify({ action, args: { profileId: profile, ...args } }),
170
+ signal: (AbortSignal as any).timeout ? (AbortSignal as any).timeout(30000) : undefined,
171
+ });
172
+ const raw = await res.text();
173
+ if (!res.ok) throw new Error(`HTTP ${res.status}: ${raw}`);
174
+ let data: any = {};
175
+ try { data = raw ? JSON.parse(raw) : {}; } catch { data = { raw }; }
176
+ return data;
177
+ }
178
+
179
+ function unwrapResult(response: any): any {
180
+ if (response && typeof response === 'object') {
181
+ if ('result' in response) return response.result;
182
+ if (response.data && typeof response.data === 'object' && 'result' in response.data) {
183
+ return response.data.result;
184
+ }
185
+ if ('data' in response) return response.data;
186
+ }
187
+ return response;
188
+ }
189
+
190
+ async function getCurrentUrl(): Promise<string> {
191
+ const res = await controllerAction('evaluate', { script: 'window.location.href' });
192
+ const value = unwrapResult(res);
193
+ return typeof value === 'string' ? value : '';
194
+ }
195
+
196
+ async function gotoUrl(url: string): Promise<void> {
197
+ await controllerAction('goto', { url });
198
+ await new Promise(r => setTimeout(r, 500)); // 减少间隔 // 减少等待时间
199
+ }
200
+
201
+ async function extractPostContent(): Promise<string> {
202
+ const script = `
203
+ (() => {
204
+ const el = document.querySelector('[class*="wbtext"], .detail_wbtext, article [class*="text"]');
205
+ return el?.textContent?.trim() || '';
206
+ })()
207
+ `;
208
+ const res = await controllerAction('evaluate', { script });
209
+ const value = unwrapResult(res);
210
+ return typeof value === 'string' ? value : '';
211
+ }
212
+
213
+
214
+
215
+ // 提取帖子发布时间
216
+ async function extractPostTime(): Promise<{ date: string; time: string; fullText: string } | null> {
217
+ const script = `
218
+ (() => {
219
+ const selectors = [
220
+ 'time',
221
+ '[class*="time"]',
222
+ '[class*="date"]',
223
+ '.from a',
224
+ '[class*="head-info_time"]'
225
+ ];
226
+ for (const sel of selectors) {
227
+ const el = document.querySelector(sel);
228
+ if (el) {
229
+ return el.textContent?.trim() || el.getAttribute('datetime') || '';
230
+ }
231
+ }
232
+ return '';
233
+ })()
234
+ `;
235
+ try {
236
+ const res = await controllerAction('evaluate', { script });
237
+ const value = unwrapResult(res);
238
+ if (typeof value === 'string' && value) {
239
+ return parsePlatformDate(value);
240
+ }
241
+ } catch (e) {
242
+ console.log('[WeiboCollectFromLinks] Could not extract post time:', e);
243
+ }
244
+ return null;
245
+ }
246
+
247
+ // 动态导入 WeiboCollectCommentsBlock
248
+ async function collectPostComments(): Promise<{ comments: any[], stats?: any }> {
249
+ if (!enableComments) return { comments: [], stats: undefined };
250
+
251
+ try {
252
+ const { execute: collectWeiboComments } = await import('./WeiboCollectCommentsBlock.js');
253
+ const result = await collectWeiboComments({
254
+ sessionId: profile,
255
+ serviceUrl,
256
+ maxComments,
257
+ maxRounds: 20,
258
+ expandReplies: true,
259
+ });
260
+
261
+ return {
262
+ comments: result.comments || [],
263
+ stats: result.stats,
264
+ };
265
+ } catch (e: any) {
266
+ console.error(`[WeiboCollectFromLinks] Comment collection error: ${e.message}`);
267
+ return { comments: [], stats: undefined };
268
+ }
269
+ }
270
+
271
+ let processedCount = 0;
272
+ let persistedCount = 0;
273
+ let totalComments = 0;
274
+ let errors = 0;
275
+
276
+ try {
277
+ const targetLinks = links.slice(0, targetCount);
278
+
279
+ for (const link of targetLinks) {
280
+ processedCount++;
281
+ console.log(`[WeiboCollectFromLinks] Processing: ${link.statusId}`);
282
+
283
+ try {
284
+ await gotoUrl(link.safeUrl);
285
+ let currentUrl = await getCurrentUrl();
286
+ if (!currentUrl) {
287
+ await new Promise(r => setTimeout(r, 1200));
288
+ currentUrl = await getCurrentUrl();
289
+ }
290
+ console.log(`[WeiboCollectFromLinks] currentUrl=${currentUrl || '<empty>'}, target=${link.statusId}`);
291
+
292
+ if (currentUrl && !currentUrl.includes(link.statusId)) {
293
+ console.warn(`[WeiboCollectFromLinks] URL mismatch: ${currentUrl}`);
294
+ errors++;
295
+ continue;
296
+ }
297
+
298
+ const content = await extractPostContent();
299
+ const publishedAtInfo = await extractPostTime();
300
+ const { comments, stats } = await collectPostComments();
301
+
302
+ await saveMarkdown(keywordDir, link, content, comments, stats, publishedAtInfo);
303
+ persistedCount++;
304
+ totalComments += comments.length;
305
+
306
+ console.log(`[WeiboCollectFromLinks] Saved: ${link.statusId}, content: ${content.length} chars, comments: ${comments.length}`);
307
+ } catch (e: any) {
308
+ console.error(`[WeiboCollectFromLinks] Error processing ${link.statusId}: ${e.message}`);
309
+ errors++;
310
+ }
311
+
312
+ await new Promise(r => setTimeout(r, 500)); // 减少间隔
313
+ }
314
+
315
+ return {
316
+ success: true,
317
+ keywordDir,
318
+ linksPath,
319
+ processedCount,
320
+ persistedCount,
321
+ stats: {
322
+ postsProcessed: processedCount,
323
+ totalComments,
324
+ errors,
325
+ },
326
+ };
327
+ } catch (error: any) {
328
+ return {
329
+ success: false,
330
+ keywordDir,
331
+ linksPath,
332
+ processedCount,
333
+ persistedCount,
334
+ stats: { postsProcessed: processedCount, totalComments, errors },
335
+ error: error.message,
336
+ };
337
+ }
338
+ }
@@ -1,5 +1,6 @@
1
1
  import os from 'node:os';
2
2
  import path from 'node:path';
3
+ import { existsSync } from 'node:fs';
3
4
 
4
5
  export interface DownloadPathInput {
5
6
  platform: string;
@@ -16,6 +17,21 @@ export function sanitizeForPath(name: string): string {
16
17
 
17
18
  export function resolveDownloadRoot(custom?: string, homeDir?: string): string {
18
19
  if (custom && custom.trim()) return custom;
20
+ if (process.platform === 'win32') {
21
+ try {
22
+ if (existsSync('D:\\')) return 'D:\\webauto';
23
+ } catch {
24
+ // ignore
25
+ }
26
+ if (homeDir && homeDir.trim()) return path.join(homeDir, '.webauto');
27
+ const envHome = process.env.HOME || process.env.USERPROFILE;
28
+ if (envHome && envHome.trim()) return path.join(envHome, '.webauto');
29
+ try {
30
+ return path.join(os.homedir(), '.webauto');
31
+ } catch {
32
+ return path.join(process.cwd(), '.webauto');
33
+ }
34
+ }
19
35
  if (homeDir && homeDir.trim()) return path.join(homeDir, '.webauto', 'download');
20
36
  const envHome = process.env.HOME || process.env.USERPROFILE;
21
37
  if (envHome && envHome.trim()) return path.join(envHome, '.webauto', 'download');