@web-auto/webauto 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +915 -85
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2415 -470
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +236 -29
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +14 -5
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Weibo Search Links Collection Block
3
+ * Collects post links from Weibo search results
4
+ *
5
+ * Collection ID format: search:<keyword>
6
+ *
7
+ * Modes:
8
+ * - fresh: Clear existing data and recollect
9
+ * - incremental: Keep existing data and add new posts
10
+ */
11
+ import { BaseBlock } from '../base';
12
+ import { CollectionDataManager } from '../../collection-manager/index.js';
13
+ import { ProcessRegistry } from '../../process-registry/index.js';
14
+ import { RateLimiter } from '../../rate-limiter/index.js';
15
+ export class WeiboCollectSearchLinksBlock extends BaseBlock {
16
+ name = 'weibo-collect-search-links';
17
+ description = 'Collect Weibo post links from search results';
18
+ async execute(config, context) {
19
+ const { profile, keyword, target, mode = 'incremental', maxPages = 0 } = config;
20
+ // Build collection spec
21
+ const spec = {
22
+ source: 'search',
23
+ keyword
24
+ };
25
+ // Initialize data manager
26
+ const dataManager = new CollectionDataManager({
27
+ platform: 'weibo',
28
+ env: context.env || 'debug',
29
+ spec,
30
+ mode
31
+ });
32
+ await dataManager.init();
33
+ const paths = dataManager.getPaths();
34
+ // Register process
35
+ const processRegistry = ProcessRegistry.getInstance();
36
+ const processId = processRegistry.register({
37
+ name: `weibo-search-${keyword}`,
38
+ platform: 'weibo',
39
+ profile,
40
+ metadata: { collectionId: dataManager.getCollectionId() }
41
+ });
42
+ // Get rate limiter
43
+ const rateLimiter = RateLimiter.getInstance();
44
+ const page = context.page;
45
+ if (!page) {
46
+ return {
47
+ success: false,
48
+ error: 'Page not available',
49
+ posts: 0,
50
+ linksFile: paths.linksPath,
51
+ collectionId: dataManager.getCollectionId(),
52
+ stats: {
53
+ totalPosts: 0,
54
+ newPosts: 0,
55
+ duplicatesSkipped: 0
56
+ }
57
+ };
58
+ }
59
+ try {
60
+ // Request search quota
61
+ const searchQuota = await rateLimiter.requestQuota('search', keyword);
62
+ if (!searchQuota.allowed) {
63
+ return {
64
+ success: false,
65
+ error: `Search quota exceeded: ${searchQuota.reason}`,
66
+ posts: 0,
67
+ linksFile: paths.linksPath,
68
+ collectionId: dataManager.getCollectionId(),
69
+ stats: {
70
+ totalPosts: 0,
71
+ newPosts: 0,
72
+ duplicatesSkipped: 0
73
+ }
74
+ };
75
+ }
76
+ // Navigate to Weibo search
77
+ const searchUrl = `https://s.weibo.com/weibo?q=${encodeURIComponent(keyword)}`;
78
+ await page.goto(searchUrl, { waitUntil: 'networkidle' });
79
+ await page.waitForTimeout(2000);
80
+ // Handle login page if needed
81
+ const currentUrl = page.url();
82
+ if (currentUrl.includes('login') || currentUrl.includes('passport')) {
83
+ return {
84
+ success: false,
85
+ error: 'Login required',
86
+ posts: 0,
87
+ linksFile: paths.linksPath,
88
+ collectionId: dataManager.getCollectionId(),
89
+ stats: {
90
+ totalPosts: 0,
91
+ newPosts: 0,
92
+ duplicatesSkipped: 0
93
+ }
94
+ };
95
+ }
96
+ let collectedPosts = [];
97
+ let currentPage = 1;
98
+ let noNewPostsCount = 0;
99
+ const maxNoNewPosts = 3;
100
+ while (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
101
+ // Check heartbeat
102
+ if (!processRegistry.heartbeat(processId)) {
103
+ break;
104
+ }
105
+ // Collect visible posts
106
+ const visiblePosts = await this.collectVisiblePosts(page);
107
+ let newPostsThisRound = 0;
108
+ for (const post of visiblePosts) {
109
+ if (!dataManager.hasPost(post.id)) {
110
+ await dataManager.addPost({
111
+ id: post.id,
112
+ url: post.url,
113
+ collectedAt: new Date().toISOString()
114
+ });
115
+ collectedPosts.push(post);
116
+ newPostsThisRound++;
117
+ if (collectedPosts.length >= target)
118
+ break;
119
+ }
120
+ }
121
+ // Check if we found new posts
122
+ if (newPostsThisRound === 0) {
123
+ noNewPostsCount++;
124
+ }
125
+ else {
126
+ noNewPostsCount = 0;
127
+ }
128
+ // Check max pages
129
+ if (maxPages > 0 && currentPage >= maxPages) {
130
+ break;
131
+ }
132
+ // Try to go to next page
133
+ if (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
134
+ const hasNextPage = await this.goToNextPage(page, currentPage);
135
+ if (!hasNextPage) {
136
+ break;
137
+ }
138
+ currentPage++;
139
+ await page.waitForTimeout(2000);
140
+ }
141
+ }
142
+ // Persist data
143
+ await dataManager.persist();
144
+ const stats = dataManager.getStats();
145
+ return {
146
+ success: true,
147
+ posts: collectedPosts.length,
148
+ linksFile: paths.linksPath,
149
+ collectionId: dataManager.getCollectionId(),
150
+ stats: {
151
+ totalPosts: stats.totalPosts,
152
+ newPosts: stats.newPosts,
153
+ duplicatesSkipped: stats.duplicatesSkipped
154
+ }
155
+ };
156
+ }
157
+ finally {
158
+ processRegistry.unregister(processId);
159
+ }
160
+ }
161
+ async collectVisiblePosts(page) {
162
+ // Search results use .card-wrap elements
163
+ const posts = await page.evaluate(() => {
164
+ const cards = document.querySelectorAll('.card-wrap');
165
+ const results = [];
166
+ for (const card of cards) {
167
+ // Find post link
168
+ const link = card.querySelector('a[href*="/status/"]');
169
+ if (link) {
170
+ const href = link.href;
171
+ const match = href.match(/status\/(\d+)/);
172
+ if (match) {
173
+ results.push({
174
+ id: match[1],
175
+ url: href
176
+ });
177
+ }
178
+ }
179
+ }
180
+ return results;
181
+ });
182
+ return posts;
183
+ }
184
+ async goToNextPage(page, currentPage) {
185
+ try {
186
+ // Find next page link
187
+ const nextLink = await page.$(`a[href*="page=${currentPage + 1}"]`);
188
+ if (nextLink) {
189
+ await nextLink.click();
190
+ await page.waitForLoadState('networkidle');
191
+ return true;
192
+ }
193
+ // Try clicking next button
194
+ const nextBtn = await page.$('.next');
195
+ if (nextBtn) {
196
+ await nextBtn.click();
197
+ await page.waitForLoadState('networkidle');
198
+ return true;
199
+ }
200
+ return false;
201
+ }
202
+ catch {
203
+ return false;
204
+ }
205
+ }
206
+ }
207
+ export default WeiboCollectSearchLinksBlock;
208
+ //# sourceMappingURL=WeiboCollectSearchLinksBlock.js.map
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Weibo Timeline List Collection Block
3
+ * Collects posts from homepage timeline feed
4
+ *
5
+ * Collection ID format: timeline:YYYY-MM-DD
6
+ */
7
+ import { BaseBlock } from '../base';
8
+ import { CollectionDataManager } from '../../collection-manager';
9
+ import { ProcessRegistry } from '../../process-registry';
10
+ import { RateLimiter } from '../../rate-limiter';
11
+ export class WeiboCollectTimelineListBlock extends BaseBlock {
12
+ name = 'weibo-collect-timeline-list';
13
+ description = 'Collect Weibo timeline posts from homepage feed';
14
+ async execute(config, context) {
15
+ const { profile, target, mode = 'incremental', date } = config;
16
+ // Build collection spec
17
+ const today = date || new Date().toISOString().split('T')[0];
18
+ const spec = {
19
+ source: 'timeline',
20
+ date: today
21
+ };
22
+ // Initialize data manager
23
+ const dataManager = new CollectionDataManager({
24
+ platform: 'weibo',
25
+ env: context.env || 'debug',
26
+ spec,
27
+ mode
28
+ });
29
+ await dataManager.init();
30
+ const paths = dataManager.getPaths();
31
+ // Register process
32
+ const processRegistry = ProcessRegistry.getInstance();
33
+ const processId = processRegistry.register({
34
+ name: `weibo-timeline-${today}`,
35
+ platform: 'weibo',
36
+ profile,
37
+ metadata: { collectionId: dataManager.getCollectionId() }
38
+ });
39
+ const rateLimiter = RateLimiter.getInstance();
40
+ const page = context.page;
41
+ if (!page) {
42
+ return {
43
+ success: false,
44
+ error: 'Page not available',
45
+ posts: 0,
46
+ linksFile: paths.linksPath,
47
+ collectionId: dataManager.getCollectionId()
48
+ };
49
+ }
50
+ try {
51
+ // Navigate to Weibo homepage
52
+ await page.goto('https://weibo.com', { waitUntil: 'networkidle' });
53
+ await page.waitForTimeout(2000);
54
+ const collectedPosts = [];
55
+ let noNewPostsCount = 0;
56
+ const maxNoNewPosts = 3;
57
+ while (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
58
+ // Check heartbeat
59
+ if (!processRegistry.heartbeat(processId)) {
60
+ break;
61
+ }
62
+ // Collect timeline posts
63
+ const posts = await this.collectVisiblePosts(page);
64
+ let newPostsThisRound = 0;
65
+ for (const post of posts) {
66
+ if (!dataManager.hasPost(post.id)) {
67
+ await dataManager.addPost({
68
+ id: post.id,
69
+ url: post.url,
70
+ collectedAt: new Date().toISOString()
71
+ });
72
+ collectedPosts.push(post);
73
+ newPostsThisRound++;
74
+ if (collectedPosts.length >= target)
75
+ break;
76
+ }
77
+ }
78
+ // Check if we found new posts
79
+ if (newPostsThisRound === 0) {
80
+ noNewPostsCount++;
81
+ }
82
+ else {
83
+ noNewPostsCount = 0;
84
+ }
85
+ // Scroll to load more
86
+ if (collectedPosts.length < target) {
87
+ await page.mouse.wheel(0, 800);
88
+ await page.waitForTimeout(1500);
89
+ }
90
+ }
91
+ await dataManager.persist();
92
+ return {
93
+ success: true,
94
+ posts: collectedPosts.length,
95
+ linksFile: paths.linksPath,
96
+ collectionId: dataManager.getCollectionId()
97
+ };
98
+ }
99
+ finally {
100
+ processRegistry.unregister(processId);
101
+ }
102
+ }
103
+ async collectVisiblePosts(page) {
104
+ // Timeline posts use div[class*='_feed_'] or article
105
+ const posts = await page.evaluate(() => {
106
+ const elements = document.querySelectorAll('div[class*="_feed_"], article');
107
+ const results = [];
108
+ for (const el of elements) {
109
+ // Find post link
110
+ const link = el.querySelector('a[href*="/status/"]');
111
+ if (link) {
112
+ const href = link.href;
113
+ const match = href.match(/status\/(\d+)/);
114
+ if (match) {
115
+ results.push({
116
+ id: match[1],
117
+ url: href
118
+ });
119
+ }
120
+ }
121
+ }
122
+ return results;
123
+ });
124
+ return posts;
125
+ }
126
+ }
127
+ export default WeiboCollectTimelineListBlock;
128
+ //# sourceMappingURL=WeiboCollectTimelineListBlock.js.map
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Weibo User Posts List Collection Block
3
+ * Collects posts from a specific user's profile page
4
+ *
5
+ * Collection ID format: user:<userId>:<userName>
6
+ */
7
+ import { BaseBlock } from '../base';
8
+ import { CollectionDataManager } from '../../collection-manager';
9
+ import { ProcessRegistry } from '../../process-registry';
10
+ export class WeiboCollectUserPostsListBlock extends BaseBlock {
11
+ name = 'weibo-collect-user-posts-list';
12
+ description = 'Collect Weibo posts from a specific user profile';
13
+ async execute(config, context) {
14
+ const { profile, userId, userName, target, mode = 'incremental' } = config;
15
+ // Build collection spec
16
+ const spec = {
17
+ source: 'user',
18
+ userId,
19
+ userName
20
+ };
21
+ // Initialize data manager
22
+ const dataManager = new CollectionDataManager({
23
+ platform: 'weibo',
24
+ env: context.env || 'debug',
25
+ spec,
26
+ mode
27
+ });
28
+ await dataManager.init();
29
+ const paths = dataManager.getPaths();
30
+ // Register process
31
+ const processRegistry = ProcessRegistry.getInstance();
32
+ const processId = processRegistry.register({
33
+ name: `weibo-user-${userId}`,
34
+ platform: 'weibo',
35
+ profile,
36
+ metadata: { collectionId: dataManager.getCollectionId() }
37
+ });
38
+ const page = context.page;
39
+ if (!page) {
40
+ return {
41
+ success: false,
42
+ error: 'Page not available',
43
+ posts: 0,
44
+ linksFile: paths.linksPath,
45
+ collectionId: dataManager.getCollectionId()
46
+ };
47
+ }
48
+ try {
49
+ // Navigate to user profile
50
+ const userUrl = `https://weibo.com/u/${userId}`;
51
+ await page.goto(userUrl, { waitUntil: 'networkidle' });
52
+ await page.waitForTimeout(2000);
53
+ const collectedPosts = [];
54
+ let noNewPostsCount = 0;
55
+ const maxNoNewPosts = 3;
56
+ while (collectedPosts.length < target && noNewPostsCount < maxNoNewPosts) {
57
+ // Check heartbeat
58
+ if (!processRegistry.heartbeat(processId)) {
59
+ break;
60
+ }
61
+ // Collect user posts
62
+ const posts = await this.collectVisiblePosts(page);
63
+ let newPostsThisRound = 0;
64
+ for (const post of posts) {
65
+ if (!dataManager.hasPost(post.id)) {
66
+ await dataManager.addPost({
67
+ id: post.id,
68
+ url: post.url,
69
+ collectedAt: new Date().toISOString()
70
+ });
71
+ collectedPosts.push(post);
72
+ newPostsThisRound++;
73
+ if (collectedPosts.length >= target)
74
+ break;
75
+ }
76
+ }
77
+ // Check if we found new posts
78
+ if (newPostsThisRound === 0) {
79
+ noNewPostsCount++;
80
+ }
81
+ else {
82
+ noNewPostsCount = 0;
83
+ }
84
+ // Scroll to load more
85
+ if (collectedPosts.length < target) {
86
+ await page.mouse.wheel(0, 800);
87
+ await page.waitForTimeout(1500);
88
+ }
89
+ }
90
+ await dataManager.persist();
91
+ return {
92
+ success: true,
93
+ posts: collectedPosts.length,
94
+ linksFile: paths.linksPath,
95
+ collectionId: dataManager.getCollectionId()
96
+ };
97
+ }
98
+ finally {
99
+ processRegistry.unregister(processId);
100
+ }
101
+ }
102
+ async collectVisiblePosts(page) {
103
+ // User profile posts use div[class*='feed'] or article
104
+ const posts = await page.evaluate(() => {
105
+ const elements = document.querySelectorAll('div[class*="feed"], article');
106
+ const results = [];
107
+ for (const el of elements) {
108
+ // Find post link
109
+ const link = el.querySelector('a[href*="/status/"]');
110
+ if (link) {
111
+ const href = link.href;
112
+ const match = href.match(/status\/(\d+)/);
113
+ if (match) {
114
+ results.push({
115
+ id: match[1],
116
+ url: href
117
+ });
118
+ }
119
+ }
120
+ }
121
+ return results;
122
+ });
123
+ return posts;
124
+ }
125
+ }
126
+ export default WeiboCollectUserPostsListBlock;
127
+ //# sourceMappingURL=WeiboCollectUserPostsListBlock.js.map
@@ -1,5 +1,6 @@
1
1
  import os from 'node:os';
2
2
  import path from 'node:path';
3
+ import { existsSync } from 'node:fs';
3
4
  export function sanitizeForPath(name) {
4
5
  if (!name)
5
6
  return '';
@@ -8,6 +9,26 @@ export function sanitizeForPath(name) {
8
9
  export function resolveDownloadRoot(custom, homeDir) {
9
10
  if (custom && custom.trim())
10
11
  return custom;
12
+ if (process.platform === 'win32') {
13
+ try {
14
+ if (existsSync('D:\\'))
15
+ return 'D:\\webauto';
16
+ }
17
+ catch {
18
+ // ignore
19
+ }
20
+ if (homeDir && homeDir.trim())
21
+ return path.join(homeDir, '.webauto');
22
+ const envHome = process.env.HOME || process.env.USERPROFILE;
23
+ if (envHome && envHome.trim())
24
+ return path.join(envHome, '.webauto');
25
+ try {
26
+ return path.join(os.homedir(), '.webauto');
27
+ }
28
+ catch {
29
+ return path.join(process.cwd(), '.webauto');
30
+ }
31
+ }
11
32
  if (homeDir && homeDir.trim())
12
33
  return path.join(homeDir, '.webauto', 'download');
13
34
  const envHome = process.env.HOME || process.env.USERPROFILE;
@@ -6,6 +6,7 @@ import { xiaohongshuPhase3CollectWorkflowV1 } from '../definitions/xiaohongshu-p
6
6
  import { xiaohongshuPhase1WorkflowV3 } from '../definitions/xiaohongshu-phase1-workflow-v3.js';
7
7
  import { xiaohongshuPhase2LinksWorkflowV3 } from '../definitions/xiaohongshu-phase2-links-workflow-v3.js';
8
8
  import { xiaohongshuPhase34FromLinksWorkflowV3 } from '../definitions/xiaohongshu-phase34-from-links-workflow-v3.js';
9
+ import { weiboSearchWorkflowV1 } from '../definitions/weibo-search-workflow-v1.js'; // 导入微博搜索工作流
9
10
  const registry = new Map();
10
11
  function register(def, idOverride) {
11
12
  const id = idOverride || def.id;
@@ -23,6 +24,7 @@ register(xiaohongshuPhase3CollectWorkflowV1, 'xiaohongshu-collect-phase3-v1');
23
24
  register(xiaohongshuPhase1WorkflowV3, 'xiaohongshu-phase1-v3');
24
25
  register(xiaohongshuPhase2LinksWorkflowV3, 'xiaohongshu-phase2-links-v3');
25
26
  register(xiaohongshuPhase34FromLinksWorkflowV3, 'xiaohongshu-phase34-from-links-v3');
27
+ register(weiboSearchWorkflowV1, 'weibo-search-v1'); // 注册微博搜索工作流
26
28
  export function getWorkflowDefinition(id) {
27
29
  return registry.get(id);
28
30
  }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * 微博搜索工作流 v1
3
+ *
4
+ * 流程:
5
+ * 1) 执行搜索
6
+ * 2) 采集搜索结果链接(支持翻页)
7
+ * 3) 从链接采集详情内容和评论
8
+ */
9
+ export const weiboSearchWorkflowV1 = {
10
+ id: 'weibo-search-v1',
11
+ name: '微博搜索采集 v1',
12
+ steps: [
13
+ {
14
+ blockName: 'ExecuteWeiboSearchBlock',
15
+ input: {
16
+ sessionId: '$sessionId',
17
+ keyword: '$keyword',
18
+ env: '$env',
19
+ serviceUrl: '$serviceUrl',
20
+ },
21
+ },
22
+ {
23
+ blockName: 'WeiboCollectSearchLinksBlock',
24
+ input: {
25
+ sessionId: '$sessionId',
26
+ keyword: '$keyword',
27
+ env: '$env',
28
+ targetCount: '$targetCount',
29
+ maxPages: '$maxPages',
30
+ serviceUrl: '$serviceUrl',
31
+ },
32
+ },
33
+ {
34
+ blockName: 'WeiboCollectFromLinksBlock',
35
+ input: {
36
+ sessionId: '$sessionId',
37
+ keyword: '$keyword',
38
+ env: '$env',
39
+ targetCount: '$targetCount',
40
+ maxComments: '$maxComments',
41
+ collectComments: '$collectComments',
42
+ serviceUrl: '$serviceUrl',
43
+ },
44
+ },
45
+ ],
46
+ };
47
+ //# sourceMappingURL=weibo-search-workflow-v1.js.map
@@ -16,6 +16,9 @@ import * as XiaohongshuFullCollectBlock from '../blocks/XiaohongshuFullCollectBl
16
16
  import * as XiaohongshuCollectLinksBlock from '../blocks/XiaohongshuCollectLinksBlock.js';
17
17
  import * as XiaohongshuCollectFromLinksBlock from '../blocks/XiaohongshuCollectFromLinksBlock.js';
18
18
  import * as ErrorRecoveryBlock from '../blocks/ErrorRecoveryBlock.js';
19
+ import * as ExecuteWeiboSearchBlock from '../blocks/ExecuteWeiboSearchBlock.js';
20
+ import * as WeiboCollectFromLinksBlock from '../blocks/WeiboCollectFromLinksBlock.js';
21
+ import * as WeiboCollectCommentsBlock from '../blocks/WeiboCollectCommentsBlock.js';
19
22
  import * as AnchorVerificationBlock from '../blocks/AnchorVerificationBlock.js';
20
23
  import * as CallWorkflowBlock from '../blocks/CallWorkflowBlock.js';
21
24
  import * as RecordFixtureBlock from '../blocks/RecordFixtureBlock.js';
@@ -42,6 +45,9 @@ export function createDefaultWorkflowExecutor() {
42
45
  executor.registerBlock('XiaohongshuCollectLinksBlock', { execute: XiaohongshuCollectLinksBlock.execute });
43
46
  executor.registerBlock('XiaohongshuCollectFromLinksBlock', { execute: XiaohongshuCollectFromLinksBlock.execute });
44
47
  executor.registerBlock('OrganizeXhsNotesBlock', { execute: OrganizeXhsNotesBlock.execute });
48
+ executor.registerBlock('ExecuteWeiboSearchBlock', { execute: ExecuteWeiboSearchBlock.execute });
49
+ executor.registerBlock('WeiboCollectFromLinksBlock', { execute: WeiboCollectFromLinksBlock.execute });
50
+ executor.registerBlock('WeiboCollectCommentsBlock', { execute: WeiboCollectCommentsBlock.execute });
45
51
  return executor;
46
52
  }
47
53
  export async function runWorkflowById(workflowId, initialContext) {
@@ -7,6 +7,7 @@
7
7
  * - 生成 README.md(含相对路径引用)
8
8
  */
9
9
  import { promises as fs } from 'node:fs';
10
+ import { getCurrentTimestamp } from '../../../../collection-manager/date-utils.js';
10
11
  import os from 'node:os';
11
12
  import path from 'node:path';
12
13
  function resolveDownloadRoot() {
@@ -79,6 +80,9 @@ export async function execute(input) {
79
80
  lines.push(`- **Note ID**: ${noteId}`);
80
81
  lines.push(`- **作者**: ${detail.authorName || '未知'} (${detail.authorId || 'N/A'})`);
81
82
  lines.push(`- **发布时间**: ${detail.publishTime || '未知'}`);
83
+ const ts = getCurrentTimestamp();
84
+ lines.push(`- **采集时间**: ${ts.collectedAt}`);
85
+ lines.push(`- **采集时间(本地)**: ${ts.collectedAtLocal}`);
82
86
  lines.push(`- **原始链接**: \`https://www.xiaohongshu.com/explore/${noteId}\``);
83
87
  lines.push('');
84
88
  lines.push(`## 正文`);
@@ -80,7 +80,7 @@ function parseLikeRuleToken(token) {
80
80
  const raw = String(token || '').trim();
81
81
  if (!raw)
82
82
  return null;
83
- const m = raw.match(/^\\{\\s*(.+?)\\s*([+\\-\\uFF0B\\uFF0D])\\s*(.+?)\\s*\\}$/);
83
+ const m = raw.match(/^\{\s*(.+?)\s*([+\-\uFF0B\uFF0D])\s*(.+?)\s*\}$/);
84
84
  if (!m) {
85
85
  return { kind: 'contains', include: raw, raw };
86
86
  }
@@ -88,7 +88,7 @@ function parseLikeRuleToken(token) {
88
88
  const right = normalizeText(m[3]);
89
89
  if (!left || !right)
90
90
  return null;
91
- const op = m[2] === '\\uFF0B' ? '+' : m[2] === '\\uFF0D' ? '-' : m[2];
91
+ const op = m[2] === '\uFF0B' ? '+' : m[2] === '\uFF0D' ? '-' : m[2];
92
92
  if (op === '+') {
93
93
  return { kind: 'and', includeA: left, includeB: right, raw: `{${left} + ${right}}` };
94
94
  }