@web-auto/webauto 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +983 -128
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2423 -469
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +256 -31
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +13 -4
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -7,6 +7,7 @@ import { xiaohongshuPhase3CollectWorkflowV1 } from '../definitions/xiaohongshu-p
7
7
  import { xiaohongshuPhase1WorkflowV3 } from '../definitions/xiaohongshu-phase1-workflow-v3.js';
8
8
  import { xiaohongshuPhase2LinksWorkflowV3 } from '../definitions/xiaohongshu-phase2-links-workflow-v3.js';
9
9
  import { xiaohongshuPhase34FromLinksWorkflowV3 } from '../definitions/xiaohongshu-phase34-from-links-workflow-v3.js';
10
+ import { weiboSearchWorkflowV1 } from '../definitions/weibo-search-workflow-v1.js'; // 导入微博搜索工作流
10
11
 
11
12
  const registry = new Map<string, WorkflowDefinitionInput>();
12
13
 
@@ -27,6 +28,7 @@ register(xiaohongshuPhase3CollectWorkflowV1, 'xiaohongshu-collect-phase3-v1');
27
28
  register(xiaohongshuPhase1WorkflowV3, 'xiaohongshu-phase1-v3');
28
29
  register(xiaohongshuPhase2LinksWorkflowV3, 'xiaohongshu-phase2-links-v3');
29
30
  register(xiaohongshuPhase34FromLinksWorkflowV3, 'xiaohongshu-phase34-from-links-v3');
31
+ register(weiboSearchWorkflowV1, 'weibo-search-v1'); // 注册微博搜索工作流
30
32
 
31
33
  export function getWorkflowDefinition(id: string): WorkflowDefinitionInput | undefined {
32
34
  return registry.get(id);
@@ -0,0 +1,47 @@
1
+ /**
2
+ * 微博搜索工作流 v1
3
+ *
4
+ * 流程:
5
+ * 1) 执行搜索
6
+ * 2) 采集搜索结果链接(支持翻页)
7
+ * 3) 从链接采集详情内容和评论
8
+ */
9
+
10
+ export const weiboSearchWorkflowV1 = {
11
+ id: 'weibo-search-v1',
12
+ name: '微博搜索采集 v1',
13
+ steps: [
14
+ {
15
+ blockName: 'ExecuteWeiboSearchBlock',
16
+ input: {
17
+ sessionId: '$sessionId',
18
+ keyword: '$keyword',
19
+ env: '$env',
20
+ serviceUrl: '$serviceUrl',
21
+ },
22
+ },
23
+ {
24
+ blockName: 'WeiboCollectSearchLinksBlock',
25
+ input: {
26
+ sessionId: '$sessionId',
27
+ keyword: '$keyword',
28
+ env: '$env',
29
+ targetCount: '$targetCount',
30
+ maxPages: '$maxPages',
31
+ serviceUrl: '$serviceUrl',
32
+ },
33
+ },
34
+ {
35
+ blockName: 'WeiboCollectFromLinksBlock',
36
+ input: {
37
+ sessionId: '$sessionId',
38
+ keyword: '$keyword',
39
+ env: '$env',
40
+ targetCount: '$targetCount',
41
+ maxComments: '$maxComments',
42
+ collectComments: '$collectComments',
43
+ serviceUrl: '$serviceUrl',
44
+ },
45
+ },
46
+ ],
47
+ };
@@ -16,6 +16,9 @@ import * as XiaohongshuFullCollectBlock from '../blocks/XiaohongshuFullCollectBl
16
16
  import * as XiaohongshuCollectLinksBlock from '../blocks/XiaohongshuCollectLinksBlock.js';
17
17
  import * as XiaohongshuCollectFromLinksBlock from '../blocks/XiaohongshuCollectFromLinksBlock.js';
18
18
  import * as ErrorRecoveryBlock from '../blocks/ErrorRecoveryBlock.js';
19
+ import * as ExecuteWeiboSearchBlock from '../blocks/ExecuteWeiboSearchBlock.js';
20
+ import * as WeiboCollectFromLinksBlock from '../blocks/WeiboCollectFromLinksBlock.js';
21
+ import * as WeiboCollectCommentsBlock from '../blocks/WeiboCollectCommentsBlock.js';
19
22
  import * as AnchorVerificationBlock from '../blocks/AnchorVerificationBlock.js';
20
23
  import * as CallWorkflowBlock from '../blocks/CallWorkflowBlock.js';
21
24
  import * as RecordFixtureBlock from '../blocks/RecordFixtureBlock.js';
@@ -43,6 +46,9 @@ export function createDefaultWorkflowExecutor(): WorkflowExecutor {
43
46
  executor.registerBlock('XiaohongshuCollectLinksBlock', { execute: XiaohongshuCollectLinksBlock.execute });
44
47
  executor.registerBlock('XiaohongshuCollectFromLinksBlock', { execute: XiaohongshuCollectFromLinksBlock.execute });
45
48
  executor.registerBlock('OrganizeXhsNotesBlock', { execute: OrganizeXhsNotesBlock.execute });
49
+ executor.registerBlock('ExecuteWeiboSearchBlock', { execute: ExecuteWeiboSearchBlock.execute });
50
+ executor.registerBlock('WeiboCollectFromLinksBlock', { execute: WeiboCollectFromLinksBlock.execute });
51
+ executor.registerBlock('WeiboCollectCommentsBlock', { execute: WeiboCollectCommentsBlock.execute });
46
52
  return executor;
47
53
  }
48
54
 
@@ -152,7 +152,7 @@ export async function execute(input: StartProfileInput): Promise<StartProfileOut
152
152
  height: viewport.height,
153
153
  }, browserServiceUrl);
154
154
  console.log('[Phase1StartProfile] window positioned at (0,0)');
155
- } catch (err) {
155
+ } catch (err: any) {
156
156
  console.warn('[Phase1StartProfile] window move failed:', err?.message || String(err));
157
157
  }
158
158
  console.log(`[Phase1StartProfile] viewport set: ${viewport.width}x${viewport.height}`);
@@ -8,6 +8,7 @@
8
8
  */
9
9
 
10
10
  import { promises as fs } from 'node:fs';
11
+ import { getCurrentTimestamp } from '../../../../collection-manager/date-utils.js';
11
12
  import os from 'node:os';
12
13
  import path from 'node:path';
13
14
 
@@ -122,6 +123,9 @@ export async function execute(input: PersistDetailInput): Promise<PersistDetailO
122
123
  lines.push(`- **Note ID**: ${noteId}`);
123
124
  lines.push(`- **作者**: ${detail.authorName || '未知'} (${detail.authorId || 'N/A'})`);
124
125
  lines.push(`- **发布时间**: ${detail.publishTime || '未知'}`);
126
+ const ts = getCurrentTimestamp();
127
+ lines.push(`- **采集时间**: ${ts.collectedAt}`);
128
+ lines.push(`- **采集时间(本地)**: ${ts.collectedAtLocal}`);
125
129
  lines.push(`- **原始链接**: \`https://www.xiaohongshu.com/explore/${noteId}\``);
126
130
  lines.push('');
127
131
  lines.push(`## 正文`);
@@ -183,7 +183,7 @@ function parseLikeRuleToken(token: string): LikeRule | null {
183
183
  const raw = String(token || '').trim();
184
184
  if (!raw) return null;
185
185
 
186
- const m = raw.match(/^\\{\\s*(.+?)\\s*([+\\-\\uFF0B\\uFF0D])\\s*(.+?)\\s*\\}$/);
186
+ const m = raw.match(/^\{\s*(.+?)\s*([+\-\uFF0B\uFF0D])\s*(.+?)\s*\}$/);
187
187
  if (!m) {
188
188
  return { kind: 'contains', include: raw, raw };
189
189
  }
@@ -192,7 +192,7 @@ function parseLikeRuleToken(token: string): LikeRule | null {
192
192
  const right = normalizeText(m[3]);
193
193
  if (!left || !right) return null;
194
194
 
195
- const op = m[2] === '\\uFF0B' ? '+' : m[2] === '\\uFF0D' ? '-' : m[2];
195
+ const op = m[2] === '\uFF0B' ? '+' : m[2] === '\uFF0D' ? '-' : m[2];
196
196
  if (op === '+') {
197
197
  return { kind: 'and', includeA: left, includeB: right, raw: `{${left} + ${right}}` };
198
198
  }
@@ -1228,4 +1228,3 @@ export async function execute(input: InteractInput): Promise<InteractOutput> {
1228
1228
  stopReason: reachedBottom ? bottomReason : undefined,
1229
1229
  };
1230
1230
  }
1231
-
@@ -1,9 +1,106 @@
1
+ import path from 'node:path';
2
+ import os from 'node:os';
3
+ import fs from 'node:fs/promises';
4
+
5
+ // Simplified inline types to avoid module resolution issues
6
+ interface XhsCollectedUrl {
7
+ noteId: string;
8
+ safeUrl: string;
9
+ searchUrl?: string;
10
+ timestamp?: number;
11
+ }
12
+
13
+ function sanitizeForPath(name: string, fallback = 'unknown'): string {
14
+ const text = String(name || '').trim();
15
+ if (!text) return fallback;
16
+ const cleaned = text.replace(/[\\/:"*?<>|]+/g, '_').trim();
17
+ return cleaned || fallback;
18
+ }
19
+
20
+ function asStringArray(input: unknown): string[] {
21
+ if (!Array.isArray(input)) return [];
22
+ return input
23
+ .map((item) => String(item || '').trim())
24
+ .filter(Boolean);
25
+ }
26
+
27
+ function normalizeCollectedUrls(input: unknown, completedNoteIds: Set<string>): XhsCollectedUrl[] {
28
+ if (!Array.isArray(input)) return [];
29
+ const out: XhsCollectedUrl[] = [];
30
+ const seen = new Set<string>();
31
+ for (const row of input) {
32
+ if (!row || typeof row !== 'object') continue;
33
+ const noteId = String((row as Record<string, unknown>).noteId || '').trim();
34
+ if (!noteId || completedNoteIds.has(noteId) || seen.has(noteId)) continue;
35
+ seen.add(noteId);
36
+ const safeUrl = String((row as Record<string, unknown>).safeUrl || '').trim();
37
+ const searchUrl = String((row as Record<string, unknown>).searchUrl || '').trim();
38
+ const timestampRaw = Number((row as Record<string, unknown>).timestamp);
39
+ const item: XhsCollectedUrl = {
40
+ noteId,
41
+ safeUrl,
42
+ ...(searchUrl ? { searchUrl } : {}),
43
+ ...(Number.isFinite(timestampRaw) ? { timestamp: timestampRaw } : {}),
44
+ };
45
+ out.push(item);
46
+ }
47
+ return out;
48
+ }
49
+
50
+ async function resolveStatePath(input: { keyword: string; env: string; downloadRoot?: string }): Promise<string | null> {
51
+ const root = resolveDownloadRoot(input.downloadRoot);
52
+ const sanitizedEnv = sanitizeForPath(input.env, 'debug');
53
+ const sanitizedKeyword = sanitizeForPath(input.keyword, 'unknown');
54
+ const rawEnv = String(input.env || '').trim();
55
+ const rawKeyword = String(input.keyword || '').trim();
56
+
57
+ const candidates = [
58
+ path.join(root, 'xiaohongshu', sanitizedEnv, sanitizedKeyword, '.collect-state.json'),
59
+ ];
60
+ const legacyPath = path.join(root, 'xiaohongshu', rawEnv, rawKeyword, '.collect-state.json');
61
+ if (legacyPath !== candidates[0]) candidates.push(legacyPath);
62
+
63
+ for (const filePath of candidates) {
64
+ try {
65
+ await fs.access(filePath);
66
+ return filePath;
67
+ } catch {
68
+ // continue
69
+ }
70
+ }
71
+ return null;
72
+ }
73
+
74
+ async function getXhsPendingItems(input: { keyword: string; env: string; downloadRoot?: string }): Promise<XhsCollectedUrl[]> {
75
+ const statePath = await resolveStatePath(input);
76
+ if (!statePath) return [];
77
+ try {
78
+ const content = await fs.readFile(statePath, 'utf8');
79
+ const state = JSON.parse(content) as Record<string, any>;
80
+ const completed = new Set(asStringArray(state?.detailCollection?.completedNoteIds));
81
+ return normalizeCollectedUrls(state?.listCollection?.collectedUrls, completed);
82
+ } catch (error: any) {
83
+ const code = String(error?.code || '');
84
+ const kind = code === 'ENOENT' ? 'missing_state' : code === 'EACCES' ? 'access_denied' : 'invalid_state';
85
+ console.warn(
86
+ `[xhs.sharding] failed to load pending items (${kind}) from ${statePath}: ${error?.message || String(error)}`,
87
+ );
88
+ return [];
89
+ }
90
+ }
91
+
1
92
  export interface ShardSpec {
2
93
  index: number;
3
94
  count: number;
4
95
  by?: 'noteId-hash' | 'index-mod';
5
96
  }
6
97
 
98
+ export interface DynamicShardPlan {
99
+ profileId: string;
100
+ assignedNoteIds: string[];
101
+ totalPending: number;
102
+ }
103
+
7
104
  export function fnv1a32(input: string) {
8
105
  // FNV-1a 32-bit
9
106
  let hash = 0x811c9dc5;
@@ -41,3 +138,58 @@ export function shardFilterByIndexMod<T>(items: T[], shard: ShardSpec) {
41
138
  if (shard.count <= 1) return list;
42
139
  return list.filter((_, idx) => idx % shard.count === shard.index);
43
140
  }
141
+
142
+ export function resolveDownloadRoot(customRoot?: string): string {
143
+ const fromArg = String(customRoot || '').trim();
144
+ if (fromArg) return path.resolve(fromArg);
145
+ const fromEnv = String(process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR || '').trim();
146
+ if (fromEnv) return path.resolve(fromEnv);
147
+ const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
148
+ return path.join(home!, '.webauto', 'download');
149
+ }
150
+
151
+ export async function buildDynamicShardPlan(input: {
152
+ keyword: string;
153
+ env: string;
154
+ downloadRoot?: string;
155
+ validProfiles: string[];
156
+ }): Promise<DynamicShardPlan[]> {
157
+ const { keyword, env, downloadRoot, validProfiles } = input;
158
+ if (!validProfiles.length) return [];
159
+
160
+ // Load pending items (not completed yet)
161
+ const pendingItems = await getXhsPendingItems({ keyword, env, downloadRoot });
162
+ if (pendingItems.length === 0) {
163
+ return validProfiles.map((profileId: string) => ({ profileId, assignedNoteIds: [] as string[], totalPending: 0 }));
164
+ }
165
+
166
+ // Calculate how many notes each profile should handle
167
+ // Using ceil to ensure we cover all pending items
168
+ const perProfileCount = Math.ceil(pendingItems.length / validProfiles.length);
169
+
170
+ const plans: DynamicShardPlan[] = [];
171
+ for (let i = 0; i < validProfiles.length; i++) {
172
+ const profileId = validProfiles[i];
173
+ const startIdx = i * perProfileCount;
174
+ const endIdx = Math.min(startIdx + perProfileCount, pendingItems.length);
175
+ const assignedNoteIds: string[] = pendingItems.slice(startIdx, endIdx).map((item: XhsCollectedUrl) => item.noteId);
176
+ plans.push({
177
+ profileId,
178
+ assignedNoteIds,
179
+ totalPending: pendingItems.length,
180
+ });
181
+ }
182
+
183
+ return plans;
184
+ }
185
+
186
+ export async function getPendingItemsByNoteIds(input: {
187
+ keyword: string;
188
+ env: string;
189
+ downloadRoot?: string;
190
+ noteIds: string[];
191
+ }): Promise<XhsCollectedUrl[]> {
192
+ const pendingItems = await getXhsPendingItems({ keyword: input.keyword, env: input.env, downloadRoot: input.downloadRoot });
193
+ const noteIdSet = new Set(input.noteIds);
194
+ return pendingItems.filter((item: XhsCollectedUrl) => noteIdSet.has(item.noteId));
195
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@web-auto/webauto",
3
- "version": "0.1.4",
3
+ "version": "0.1.7",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "webauto": "bin/webauto.mjs"
@@ -14,6 +14,7 @@
14
14
  "apps/desktop-console/dist/",
15
15
  "apps/desktop-console/default-settings.json",
16
16
  "apps/desktop-console/entry/ui-console.mjs",
17
+ "apps/desktop-console/entry/ui-cli.mjs",
17
18
  "apps/webauto/",
18
19
  "!dist/**/*.map",
19
20
  "!dist/**/__tests__/**",
@@ -46,6 +47,7 @@
46
47
  "ws": "^8.19.0"
47
48
  },
48
49
  "scripts": {
50
+ "postinstall": "node scripts/postinstall-resources.mjs",
49
51
  "start:mcp:browser": "npx @browsermcp/mcp@latest",
50
52
  "build": "npm run prebuild && npm run build:services && npm run self-check:post-build && npm run ui:test && npm --prefix apps/desktop-console run build",
51
53
  "build:services": "tsc -p tsconfig.services.json",
@@ -57,10 +59,17 @@
57
59
  "check:modules": "echo \"Module checks skipped\"",
58
60
  "check:ts": "tsc --noEmit -p tsconfig.services.json",
59
61
  "test:modules:unit": "npx tsx --test $(find modules -name \"*.test.ts\" -o -name \"*.test.mts\" 2>/dev/null | tr \"\\n\" \" \")",
60
- "test:desktop-console:unit": "tsx --test apps/desktop-console/src/main/profile-store.test.mts apps/desktop-console/src/main/index-streaming.test.mts",
62
+ "test:desktop-console:unit": "tsx --test apps/desktop-console/src/main/profile-store.test.mts apps/desktop-console/src/main/index-streaming.test.mts apps/desktop-console/src/main/ui-cli-bridge.test.mts apps/desktop-console/src/main/heartbeat-watchdog.test.mts apps/desktop-console/src/main/core-daemon-manager.test.mts apps/desktop-console/src/main/desktop-settings.test.mts apps/desktop-console/src/main/env-check.test.mts",
61
63
  "test:desktop-console:renderer": "npm --prefix apps/desktop-console run test:renderer",
62
- "test:desktop-console:coverage": "c8 --reporter=text --reporter=lcov --all --src apps/desktop-console/src/main --extension .mts --extension .mjs --exclude \"**/*.test.mts\" --include \"apps/desktop-console/src/main/profile-store.mts\" tsx --test apps/desktop-console/src/main/profile-store.test.mts",
63
- "test": "npm run check:modules && npm run check:ts && npm run test:modules:unit && npm run test:desktop-console:unit && npm run test:services:unit",
64
+ "test:desktop-console:coverage": "c8 --reporter=text --reporter=lcov --all --src apps/desktop-console/src/main --extension .mts --extension .mjs --extension .ts --exclude \"**/*.test.*\" --check-coverage --lines 75 --functions 75 --branches 55 --statements 75 --include \"apps/desktop-console/src/main/profile-store.mts\" --include \"apps/desktop-console/src/main/ui-cli-bridge.mts\" --include \"apps/desktop-console/src/main/heartbeat-watchdog.mts\" --include \"apps/desktop-console/src/main/core-daemon-manager.mts\" --include \"apps/desktop-console/src/main/desktop-settings.mts\" --include \"apps/desktop-console/src/main/env-check.mts\" tsx --test apps/desktop-console/src/main/profile-store.test.mts apps/desktop-console/src/main/index-streaming.test.mts apps/desktop-console/src/main/ui-cli-bridge.test.mts apps/desktop-console/src/main/heartbeat-watchdog.test.mts apps/desktop-console/src/main/core-daemon-manager.test.mts apps/desktop-console/src/main/desktop-settings.test.mts apps/desktop-console/src/main/env-check.test.mts",
65
+ "test:webauto:schedule:unit": "node --test tests/unit/webauto/schedule-store.test.mjs tests/unit/webauto/schedule-cli.test.mjs",
66
+ "test:webauto:ui-cli:unit": "node --test tests/unit/webauto/ui-cli-command.test.mjs",
67
+ "test:webauto:install:unit": "node --test tests/unit/webauto/xhs-install.test.mjs",
68
+ "test:webauto:schedule:coverage": "c8 --reporter=text --reporter=text-summary --check-coverage --lines 85 --functions 85 --branches 60 --statements 85 --include apps/webauto/entry/lib/schedule-store.mjs --include apps/webauto/entry/schedule.mjs node --test tests/unit/webauto/schedule-store.test.mjs tests/unit/webauto/schedule-cli.test.mjs",
69
+ "test:ci": "npm test && npm --prefix apps/desktop-console run test:renderer",
70
+ "coverage:ci": "node scripts/test/run-coverage.mjs",
71
+ "build:release": "node bin/webauto.mjs build:release",
72
+ "test": "npm run check:modules && npm run check:ts && npm run test:modules:unit && npm run test:webauto:schedule:unit && npm run test:webauto:ui-cli:unit && npm run test:webauto:install:unit && npm run test:desktop-console:unit && npm run test:services:unit",
64
73
  "cli:session-manager": "tsx modules/session-manager/src/cli.ts",
65
74
  "cli:logging": "tsx modules/logging/src/cli.ts",
66
75
  "cli:operations": "tsx modules/operations/src/cli.ts",
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env node
2
+ import { spawnSync } from 'node:child_process';
3
+ import { existsSync } from 'node:fs';
4
+ import path from 'node:path';
5
+ import { fileURLToPath } from 'node:url';
6
+
7
+ const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
8
+
9
+ function shouldSkipAutoPrepare() {
10
+ if (String(process.env.WEBAUTO_SKIP_AUTO_RESOURCES || '').trim() === '1') {
11
+ return { skip: true, reason: 'WEBAUTO_SKIP_AUTO_RESOURCES=1' };
12
+ }
13
+ if (String(process.env.CI || '').trim().toLowerCase() === 'true') {
14
+ return { skip: true, reason: 'CI=true' };
15
+ }
16
+ const force = String(process.env.WEBAUTO_AUTO_RESOURCES || '').trim() === '1';
17
+ if (!force && existsSync(path.join(ROOT, '.git'))) {
18
+ return { skip: true, reason: 'dev_repo_detected' };
19
+ }
20
+ return { skip: false, reason: '' };
21
+ }
22
+
23
+ function runAutoPrepare() {
24
+ const script = path.join(ROOT, 'apps', 'webauto', 'entry', 'xhs-install.mjs');
25
+ const ret = spawnSync(process.execPath, [script, '--auto', '--all', '--json'], {
26
+ cwd: ROOT,
27
+ encoding: 'utf8',
28
+ windowsHide: true,
29
+ timeout: 10 * 60 * 1000,
30
+ });
31
+ return ret;
32
+ }
33
+
34
+ function main() {
35
+ const gate = shouldSkipAutoPrepare();
36
+ if (gate.skip) {
37
+ console.log(`[webauto:postinstall] skip auto resource prepare (${gate.reason})`);
38
+ return;
39
+ }
40
+
41
+ console.log('[webauto:postinstall] auto preparing resources (camoufox + geoip)...');
42
+ const ret = runAutoPrepare();
43
+ const stdout = String(ret.stdout || '').trim();
44
+ const stderr = String(ret.stderr || '').trim();
45
+ if (stdout) console.log(`[webauto:postinstall] ${stdout}`);
46
+ if (stderr) console.warn(`[webauto:postinstall] ${stderr}`);
47
+
48
+ if (ret.status === 0) {
49
+ console.log('[webauto:postinstall] resource prepare done');
50
+ return;
51
+ }
52
+
53
+ const strict = String(process.env.WEBAUTO_AUTO_RESOURCES_STRICT || '').trim() === '1';
54
+ const message = `[webauto:postinstall] auto resource prepare failed (exit=${ret.status ?? 'null'}). You can run: webauto deps install --all`;
55
+ if (strict) {
56
+ console.error(message);
57
+ process.exit(ret.status || 1);
58
+ }
59
+ console.warn(message);
60
+ }
61
+
62
+ main();
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env node
2
+ import { spawn } from 'node:child_process';
3
+ import path from 'node:path';
4
+ import { existsSync } from 'node:fs';
5
+
6
+ const ROOT = process.cwd();
7
+
8
+ function resolveOnPath(candidates) {
9
+ const pathEnv = process.env.PATH || process.env.Path || '';
10
+ const dirs = pathEnv.split(path.delimiter).filter(Boolean);
11
+ for (const dir of dirs) {
12
+ for (const name of candidates) {
13
+ const full = path.join(dir, name);
14
+ if (existsSync(full)) return full;
15
+ }
16
+ }
17
+ return null;
18
+ }
19
+
20
+ function npmRunner() {
21
+ if (process.platform !== 'win32') return { cmd: 'npm', prefix: [] };
22
+ const resolved = resolveOnPath(['npm.cmd', 'npm.exe', 'npm.bat', 'npm.ps1']) || 'npm.cmd';
23
+ const lower = String(resolved).toLowerCase();
24
+ if (lower.endsWith('.ps1')) {
25
+ return {
26
+ cmd: 'powershell.exe',
27
+ prefix: ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', resolved],
28
+ };
29
+ }
30
+ if (lower.endsWith('.cmd') || lower.endsWith('.bat')) {
31
+ return {
32
+ cmd: 'cmd.exe',
33
+ prefix: ['/d', '/s', '/c', resolved],
34
+ };
35
+ }
36
+ return { cmd: resolved, prefix: [] };
37
+ }
38
+
39
+ async function run(label, cmd, args, cwd = ROOT) {
40
+ await new Promise((resolve, reject) => {
41
+ const child = spawn(cmd, args, {
42
+ cwd,
43
+ env: process.env,
44
+ stdio: 'inherit',
45
+ windowsHide: true,
46
+ });
47
+ child.on('error', reject);
48
+ child.on('exit', (code) => {
49
+ if (code === 0) return resolve();
50
+ reject(new Error(`${label} failed with exit ${code}`));
51
+ });
52
+ });
53
+ }
54
+
55
+ async function main() {
56
+ const npm = npmRunner();
57
+ const runNpm = async (label, args) => run(label, npm.cmd, [...npm.prefix, ...args], ROOT);
58
+
59
+ console.log('[coverage] running root desktop main coverage');
60
+ await runNpm('test:desktop-console:coverage', ['run', 'test:desktop-console:coverage']);
61
+
62
+ // Renderer suites are already enforced by `test:ci`.
63
+ // Keep this stage focused on deterministic coverage gates that are stable in CI.
64
+ console.log('[coverage] running desktop renderer smoke tests');
65
+ await runNpm('desktop renderer tests', ['--prefix', 'apps/desktop-console', 'run', 'test:renderer']);
66
+
67
+ console.log('[coverage] running webauto schedule coverage');
68
+ await runNpm('test:webauto:schedule:coverage', ['run', 'test:webauto:schedule:coverage']);
69
+
70
+ console.log('[coverage] done');
71
+ }
72
+
73
+ main().catch((err) => {
74
+ console.error('[coverage] failed:', err?.message || String(err));
75
+ process.exit(1);
76
+ });
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Weibo Search Collection CLI
4
+ *
5
+ * Usage:
6
+ * webauto weibo search --profile <profile> --keyword <keyword> --target <count> [--mode fresh|incremental]
7
+ */
8
+
9
+ import { Command } from 'commander';
10
+ import { WeiboCollectSearchLinksBlock } from '../../modules/workflow/blocks/WeiboCollectSearchLinksBlock';
11
+ import type { BlockContext } from '../../modules/workflow/types';
12
+
13
+ const program = new Command();
14
+
15
+ program
16
+ .name('webauto weibo search')
17
+ .description('Collect Weibo posts from search results')
18
+ .requiredOption('--profile <profile>', 'Browser profile to use')
19
+ .requiredOption('--keyword <keyword>', 'Search keyword')
20
+ .requiredOption('--target <count>', 'Target number of posts', parseInt)
21
+ .option('--mode <mode>', 'Collection mode: fresh or incremental', 'incremental')
22
+ .option('--max-pages <pages>', 'Maximum pages to search (0 = unlimited)', parseInt, 0)
23
+ .option('--env <env>', 'Environment (debug/production)', 'debug')
24
+ .parse(process.argv);
25
+
26
+ const options = program.opts();
27
+
28
+ async function main() {
29
+ console.log(`[Weibo Search] Starting collection`);
30
+ console.log(` Profile: ${options.profile}`);
31
+ console.log(` Keyword: ${options.keyword}`);
32
+ console.log(` Target: ${options.target}`);
33
+ console.log(` Mode: ${options.mode}`);
34
+
35
+ // Use camo CLI for browser context
36
+ const { spawn } = await import('child_process');
37
+ const { promisify } = await import('util');
38
+ const exec = promisify(require('child_process').exec);
39
+
40
+ // Create context via camo
41
+ const { stdout } = await exec(`camo start ${options.profile} --url https://s.weibo.com/weibo --alias weibo-search`);
42
+ console.log(stdout);
43
+
44
+ // TODO: Connect to the browser via CDP and run the block
45
+ // For now, we output the configuration
46
+ console.log('\n[Ready] Browser started. Run the collection via UI.');
47
+ }
48
+
49
+ main().catch(console.error);
@@ -32,7 +32,7 @@ function ensureLogDir(): string {
32
32
 
33
33
  function safeAppend(filePath: string, line: string): void {
34
34
  try {
35
- fs.appendFileSync(filePath, line);
35
+ fs.appendFileSync(filePath, line, 'utf8');
36
36
  } catch {
37
37
  // ignore
38
38
  }