@web-auto/webauto 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/apps/desktop-console/default-settings.json +1 -0
  2. package/apps/desktop-console/dist/main/index.mjs +1618 -0
  3. package/apps/desktop-console/{src → dist}/main/preload.mjs +10 -0
  4. package/apps/desktop-console/dist/renderer/index.js +3063 -0
  5. package/apps/desktop-console/entry/ui-console.mjs +299 -0
  6. package/apps/webauto/entry/account.mjs +356 -0
  7. package/apps/webauto/entry/lib/account-detect.mjs +160 -0
  8. package/apps/webauto/entry/lib/account-store.mjs +587 -0
  9. package/apps/webauto/entry/lib/profilepool.mjs +1 -1
  10. package/apps/webauto/entry/xhs-install.mjs +27 -3
  11. package/apps/webauto/entry/xhs-status.mjs +152 -0
  12. package/apps/webauto/entry/xhs-unified.mjs +595 -17
  13. package/bin/webauto.mjs +247 -12
  14. package/dist/apps/webauto/server.js +66 -0
  15. package/dist/modules/camo-backend/src/index.js +575 -0
  16. package/dist/modules/camo-backend/src/internal/BrowserSession.js +817 -0
  17. package/dist/modules/camo-backend/src/internal/ElementRegistry.js +61 -0
  18. package/dist/modules/camo-backend/src/internal/ProfileLock.js +85 -0
  19. package/dist/modules/camo-backend/src/internal/SessionManager.js +172 -0
  20. package/dist/modules/camo-backend/src/internal/container-matcher.js +852 -0
  21. package/dist/modules/camo-backend/src/internal/engine-manager.js +258 -0
  22. package/dist/modules/camo-backend/src/internal/fingerprint.js +203 -0
  23. package/dist/modules/camo-backend/src/internal/pageRuntime.js +29 -0
  24. package/dist/modules/camo-backend/src/internal/runtimeInjector.js +30 -0
  25. package/dist/modules/camo-backend/src/internal/state-bus.js +46 -0
  26. package/dist/modules/camo-backend/src/internal/storage-paths.js +36 -0
  27. package/dist/modules/camo-backend/src/internal/ws-server.js +1202 -0
  28. package/dist/modules/camo-runtime/src/utils/browser-service.mjs +423 -0
  29. package/dist/modules/camo-runtime/src/utils/config.mjs +77 -0
  30. package/dist/modules/container-registry/src/index.js +184 -0
  31. package/dist/modules/logging/src/index.js +92 -0
  32. package/dist/modules/operations/src/builtin.js +27 -0
  33. package/dist/modules/operations/src/container-binding.js +75 -0
  34. package/dist/modules/operations/src/executor.js +146 -0
  35. package/dist/modules/operations/src/operations/click.js +167 -0
  36. package/dist/modules/operations/src/operations/extract.js +204 -0
  37. package/dist/modules/operations/src/operations/find-child.js +17 -0
  38. package/dist/modules/operations/src/operations/highlight.js +138 -0
  39. package/dist/modules/operations/src/operations/key.js +61 -0
  40. package/dist/modules/operations/src/operations/navigate.js +148 -0
  41. package/dist/modules/operations/src/operations/scroll.js +126 -0
  42. package/dist/modules/operations/src/operations/type.js +190 -0
  43. package/dist/modules/operations/src/queue.js +100 -0
  44. package/dist/modules/operations/src/registry.js +11 -0
  45. package/dist/modules/operations/src/system/mouse.js +33 -0
  46. package/dist/modules/state/src/atomic-json.js +33 -0
  47. package/dist/modules/workflow/blocks/AnchorVerificationBlock.js +71 -0
  48. package/dist/modules/workflow/blocks/BehaviorRandomizer.js +26 -0
  49. package/dist/modules/workflow/blocks/CallWorkflowBlock.js +38 -0
  50. package/dist/modules/workflow/blocks/CloseDetailBlock.js +209 -0
  51. package/dist/modules/workflow/blocks/CollectBatch.js +137 -0
  52. package/dist/modules/workflow/blocks/CollectCommentsBlock.js +415 -0
  53. package/dist/modules/workflow/blocks/CollectSearchListBlock.js +599 -0
  54. package/dist/modules/workflow/blocks/CollectWeiboPosts.js +229 -0
  55. package/dist/modules/workflow/blocks/DetectPageStateBlock.js +259 -0
  56. package/dist/modules/workflow/blocks/EnsureLoginBlock.js +162 -0
  57. package/dist/modules/workflow/blocks/EnsureSession.js +426 -0
  58. package/dist/modules/workflow/blocks/ErrorClassifier.js +164 -0
  59. package/dist/modules/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  60. package/dist/modules/workflow/blocks/ExpandCommentsBlock.js +1032 -0
  61. package/dist/modules/workflow/blocks/ExtractDetailBlock.js +310 -0
  62. package/dist/modules/workflow/blocks/ExtractPostFields.js +88 -0
  63. package/dist/modules/workflow/blocks/GenerateSmartReplyBlock.js +68 -0
  64. package/dist/modules/workflow/blocks/GoToSearchBlock.js +497 -0
  65. package/dist/modules/workflow/blocks/GracefulFallbackBlock.js +104 -0
  66. package/dist/modules/workflow/blocks/HighlightBlock.js +66 -0
  67. package/dist/modules/workflow/blocks/InitAutoScroll.js +65 -0
  68. package/dist/modules/workflow/blocks/LoadContainerDefinition.js +50 -0
  69. package/dist/modules/workflow/blocks/LoadContainerIndex.js +43 -0
  70. package/dist/modules/workflow/blocks/LocateAndGuardBlock.js +176 -0
  71. package/dist/modules/workflow/blocks/LoginRecoveryBlock.js +242 -0
  72. package/dist/modules/workflow/blocks/MatchContainers.js +64 -0
  73. package/dist/modules/workflow/blocks/MonitoringBlock.js +190 -0
  74. package/dist/modules/workflow/blocks/OpenDetailBlock.js +1240 -0
  75. package/dist/modules/workflow/blocks/OrganizeXhsNotesBlock.js +117 -0
  76. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +270 -0
  77. package/dist/modules/workflow/blocks/PickSinglePost.js +69 -0
  78. package/dist/modules/workflow/blocks/ProgressTracker.js +125 -0
  79. package/dist/modules/workflow/blocks/RecordFixtureBlock.js +44 -0
  80. package/dist/modules/workflow/blocks/RenderMarkdown.js +48 -0
  81. package/dist/modules/workflow/blocks/SaveFile.js +54 -0
  82. package/dist/modules/workflow/blocks/ScrollNextBatch.js +72 -0
  83. package/dist/modules/workflow/blocks/SessionHealthBlock.js +73 -0
  84. package/dist/modules/workflow/blocks/StartBrowserService.js +45 -0
  85. package/dist/modules/workflow/blocks/ValidateContainerDefinition.js +67 -0
  86. package/dist/modules/workflow/blocks/ValidateExtract.js +35 -0
  87. package/dist/modules/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  88. package/dist/modules/workflow/blocks/WaitStable.js +74 -0
  89. package/dist/modules/workflow/blocks/WarmupCommentsBlock.js +120 -0
  90. package/dist/modules/workflow/blocks/WorkflowExecutor.js +156 -0
  91. package/dist/modules/workflow/blocks/XiaohongshuCollectFromLinksBlock.js +1004 -0
  92. package/dist/modules/workflow/blocks/XiaohongshuCollectLinksBlock.js +1049 -0
  93. package/dist/modules/workflow/blocks/XiaohongshuFullCollectBlock.js +782 -0
  94. package/dist/modules/workflow/blocks/helpers/anchorVerify.js +198 -0
  95. package/dist/modules/workflow/blocks/helpers/asyncWorkQueue.js +53 -0
  96. package/dist/modules/workflow/blocks/helpers/commentScroller.js +334 -0
  97. package/dist/modules/workflow/blocks/helpers/commentSectionLocator.js +126 -0
  98. package/dist/modules/workflow/blocks/helpers/containerAnchors.js +301 -0
  99. package/dist/modules/workflow/blocks/helpers/debugArtifacts.js +6 -0
  100. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +29 -0
  101. package/dist/modules/workflow/blocks/helpers/expandCommentsController.js +53 -0
  102. package/dist/modules/workflow/blocks/helpers/expandCommentsExtractor.js +129 -0
  103. package/dist/modules/workflow/blocks/helpers/macosVisionOcrPlugin.js +116 -0
  104. package/dist/modules/workflow/blocks/helpers/mergeXhsMarkdown.js +109 -0
  105. package/dist/modules/workflow/blocks/helpers/openDetailController.js +56 -0
  106. package/dist/modules/workflow/blocks/helpers/openDetailTypes.js +7 -0
  107. package/dist/modules/workflow/blocks/helpers/openDetailViewport.js +474 -0
  108. package/dist/modules/workflow/blocks/helpers/openDetailWaiter.js +104 -0
  109. package/dist/modules/workflow/blocks/helpers/operationLogger.js +195 -0
  110. package/dist/modules/workflow/blocks/helpers/persistedNotes.js +107 -0
  111. package/dist/modules/workflow/blocks/helpers/replyExpander.js +260 -0
  112. package/dist/modules/workflow/blocks/helpers/scrollIntoView.js +138 -0
  113. package/dist/modules/workflow/blocks/helpers/searchExecutor.js +328 -0
  114. package/dist/modules/workflow/blocks/helpers/searchGate.js +46 -0
  115. package/dist/modules/workflow/blocks/helpers/searchPageState.js +164 -0
  116. package/dist/modules/workflow/blocks/helpers/searchResultWaiter.js +64 -0
  117. package/dist/modules/workflow/blocks/helpers/simpleAnchor.js +134 -0
  118. package/dist/modules/workflow/blocks/helpers/smartReply.js +40 -0
  119. package/dist/modules/workflow/blocks/helpers/systemInput.js +635 -0
  120. package/dist/modules/workflow/blocks/helpers/targetCountMode.js +9 -0
  121. package/dist/modules/workflow/blocks/helpers/xhsCliArgs.js +80 -0
  122. package/dist/modules/workflow/blocks/helpers/xhsCommentDom.js +805 -0
  123. package/dist/modules/workflow/blocks/helpers/xhsNoteOrganizer.js +140 -0
  124. package/dist/modules/workflow/blocks/restore/RestorePhaseBlock.js +204 -0
  125. package/dist/modules/workflow/config/workflowRegistry.js +32 -0
  126. package/dist/modules/workflow/definitions/batch-collect-workflow.js +63 -0
  127. package/dist/modules/workflow/definitions/scroll-extract-workflow.js +74 -0
  128. package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow-v2.js +81 -0
  129. package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow.js +57 -0
  130. package/dist/modules/workflow/definitions/xiaohongshu-full-collect-workflow-v3.js +68 -0
  131. package/dist/modules/workflow/definitions/xiaohongshu-note-collect.js +49 -0
  132. package/dist/modules/workflow/definitions/xiaohongshu-phase1-workflow-v3.js +30 -0
  133. package/dist/modules/workflow/definitions/xiaohongshu-phase2-links-workflow-v3.js +40 -0
  134. package/dist/modules/workflow/definitions/xiaohongshu-phase3-collect-workflow-v1.js +54 -0
  135. package/dist/modules/workflow/definitions/xiaohongshu-phase34-from-links-workflow-v3.js +25 -0
  136. package/dist/modules/workflow/src/WeiboEventDrivenWorkflowRunner.js +308 -0
  137. package/dist/modules/workflow/src/context.js +70 -0
  138. package/dist/modules/workflow/src/index.js +5 -0
  139. package/dist/modules/workflow/src/orchestrator.js +230 -0
  140. package/dist/modules/workflow/src/runner.js +55 -0
  141. package/dist/modules/workflow/src/runtime.js +70 -0
  142. package/dist/modules/workflow/workflows/WeiboFeedExtractionWorkflow.js +359 -0
  143. package/dist/modules/workflow/workflows/XiaohongshuLoginWorkflow.js +110 -0
  144. package/dist/modules/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  145. package/dist/modules/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  146. package/dist/modules/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  147. package/dist/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  148. package/dist/modules/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  149. package/dist/modules/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  150. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  151. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  152. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  153. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  154. package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  155. package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  156. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  157. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  158. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  159. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  160. package/dist/modules/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  161. package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  162. package/dist/modules/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  163. package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  164. package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  165. package/dist/modules/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  166. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +42 -0
  167. package/dist/modules/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  168. package/dist/modules/xiaohongshu/app/src/index.js +9 -0
  169. package/dist/modules/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  170. package/dist/modules/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  171. package/dist/services/controller/src/controller.js +1476 -0
  172. package/dist/services/controller/src/index.js +2 -0
  173. package/dist/services/controller/src/payload-normalizer.js +129 -0
  174. package/dist/services/shared/heartbeat.js +120 -0
  175. package/dist/services/shared/lib/errorHandler.js +2 -0
  176. package/dist/services/shared/serviceProcessLogger.js +139 -0
  177. package/dist/services/unified-api/RemoteBrowserSession.js +176 -0
  178. package/dist/services/unified-api/RemoteSessionManager.js +148 -0
  179. package/dist/services/unified-api/container-operations-handler.js +115 -0
  180. package/dist/services/unified-api/server.js +652 -0
  181. package/dist/services/unified-api/state-registry.js +274 -0
  182. package/dist/services/unified-api/task-persistence.js +66 -0
  183. package/dist/services/unified-api/task-state.js +130 -0
  184. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +12 -5
  185. package/modules/xiaohongshu/app/pnpm-lock.yaml +24 -0
  186. package/package.json +37 -9
  187. package/.beads/README.md +0 -81
  188. package/.beads/config.yaml +0 -67
  189. package/.beads/interactions.jsonl +0 -0
  190. package/.beads/issues.jsonl +0 -180
  191. package/.beads/metadata.json +0 -4
  192. package/.claude/settings.local.json +0 -10
  193. package/.github/workflows/ci.yml +0 -55
  194. package/AGENTS.md +0 -253
  195. package/apps/desktop-console/README.md +0 -27
  196. package/apps/desktop-console/package-lock.json +0 -897
  197. package/apps/desktop-console/package.json +0 -20
  198. package/apps/desktop-console/scripts/build-and-install.mjs +0 -19
  199. package/apps/desktop-console/scripts/build.mjs +0 -45
  200. package/apps/desktop-console/scripts/test-preload.mjs +0 -13
  201. package/apps/desktop-console/src/main/config.mts +0 -26
  202. package/apps/desktop-console/src/main/core-daemon-manager.mts +0 -131
  203. package/apps/desktop-console/src/main/desktop-settings.mts +0 -267
  204. package/apps/desktop-console/src/main/heartbeat-watchdog.mts +0 -50
  205. package/apps/desktop-console/src/main/heartbeat-watchdog.test.mts +0 -68
  206. package/apps/desktop-console/src/main/index-streaming.test.mts +0 -20
  207. package/apps/desktop-console/src/main/index.mts +0 -980
  208. package/apps/desktop-console/src/main/profile-store.mts +0 -239
  209. package/apps/desktop-console/src/main/profile-store.test.mts +0 -54
  210. package/apps/desktop-console/src/main/state-bridge.mts +0 -114
  211. package/apps/desktop-console/src/main/task-state-types.ts +0 -32
  212. package/apps/desktop-console/src/renderer/hooks/use-task-state.mts +0 -120
  213. package/apps/desktop-console/src/renderer/index.mts +0 -133
  214. package/apps/desktop-console/src/renderer/index.test.mts +0 -34
  215. package/apps/desktop-console/src/renderer/path-helpers.mts +0 -46
  216. package/apps/desktop-console/src/renderer/path-helpers.test.mts +0 -14
  217. package/apps/desktop-console/src/renderer/tabs/debug.mts +0 -48
  218. package/apps/desktop-console/src/renderer/tabs/debug.test.mts +0 -22
  219. package/apps/desktop-console/src/renderer/tabs/logs.mts +0 -421
  220. package/apps/desktop-console/src/renderer/tabs/logs.test.mts +0 -27
  221. package/apps/desktop-console/src/renderer/tabs/preflight.mts +0 -486
  222. package/apps/desktop-console/src/renderer/tabs/preflight.test.mts +0 -33
  223. package/apps/desktop-console/src/renderer/tabs/profile-pool.mts +0 -213
  224. package/apps/desktop-console/src/renderer/tabs/results.mts +0 -171
  225. package/apps/desktop-console/src/renderer/tabs/run.test.mts +0 -63
  226. package/apps/desktop-console/src/renderer/tabs/runtime.mts +0 -151
  227. package/apps/desktop-console/src/renderer/tabs/settings.mts +0 -146
  228. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/account-flow.mts +0 -486
  229. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/guide-browser-check.mts +0 -56
  230. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/helpers.mts +0 -262
  231. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/layout-block.mts +0 -430
  232. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/live-stats.mts +0 -847
  233. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/run-flow.mts +0 -443
  234. package/apps/desktop-console/src/renderer/tabs/xiaohongshu-state.mts +0 -425
  235. package/apps/desktop-console/src/renderer/tabs/xiaohongshu.mts +0 -497
  236. package/apps/desktop-console/src/renderer/tabs/xiaohongshu.test.mts +0 -291
  237. package/apps/desktop-console/src/renderer/ui-components.mts +0 -31
  238. package/docs/README_camoufox_chinese.md +0 -141
  239. package/docs/USAGE_V3.md +0 -163
  240. package/docs/arch/OCR_MACOS_PLUGIN.md +0 -39
  241. package/docs/arch/PORTS.md +0 -40
  242. package/docs/arch/REGRESSION_CHECKLIST.md +0 -121
  243. package/docs/arch/SEARCH_GATE.md +0 -224
  244. package/docs/arch/VIEWPORT_SAFETY.md +0 -182
  245. package/docs/arch/XIAOHONGSHU_OFFLINE_MOCK_DESIGN.md +0 -267
  246. package/docs/xiaohongshu-container-driven-summary.md +0 -221
  247. package/docs/xiaohongshu-full-collect-runbook.md +0 -134
  248. package/docs/xiaohongshu-next-steps.md +0 -228
  249. package/docs/xiaohongshu-quickstart.md +0 -73
  250. package/docs/xiaohongshu-workflow-summary.md +0 -227
  251. package/modules/container-registry/tests/container-registry.test.ts +0 -16
  252. package/modules/logging/tests/logging.test.ts +0 -38
  253. package/modules/operations/tests/operations.test.ts +0 -22
  254. package/modules/operations/tests/viewport-filter.test.ts +0 -161
  255. package/modules/operations/tests/visible-only.test.ts +0 -250
  256. package/modules/session-manager/tests/session-manager.test.ts +0 -23
  257. package/modules/state/src/atomic-json.test.ts +0 -30
  258. package/modules/state/src/paths.test.ts +0 -59
  259. package/modules/state/src/xiaohongshu-collect-state.test.ts +0 -259
  260. package/modules/workflow/blocks/AnchorVerificationBlock.d.ts.map +0 -1
  261. package/modules/workflow/blocks/AnchorVerificationBlock.js.map +0 -1
  262. package/modules/workflow/blocks/DetectPageStateBlock.d.ts.map +0 -1
  263. package/modules/workflow/blocks/DetectPageStateBlock.js.map +0 -1
  264. package/modules/workflow/blocks/ErrorRecoveryBlock.d.ts.map +0 -1
  265. package/modules/workflow/blocks/ErrorRecoveryBlock.js.map +0 -1
  266. package/modules/workflow/blocks/WaitSearchPermitBlock.d.ts.map +0 -1
  267. package/modules/workflow/blocks/WaitSearchPermitBlock.js.map +0 -1
  268. package/modules/workflow/blocks/helpers/containerAnchors.d.ts.map +0 -1
  269. package/modules/workflow/blocks/helpers/containerAnchors.js.map +0 -1
  270. package/modules/workflow/blocks/helpers/downloadPaths.test.ts +0 -62
  271. package/modules/workflow/blocks/helpers/mergeXhsMarkdown.test.ts +0 -121
  272. package/modules/workflow/blocks/helpers/operationLogger.d.ts.map +0 -1
  273. package/modules/workflow/blocks/helpers/operationLogger.js.map +0 -1
  274. package/modules/workflow/blocks/helpers/persistedNotes.test.ts +0 -268
  275. package/modules/workflow/blocks/helpers/searchPageState.d.ts.map +0 -1
  276. package/modules/workflow/blocks/helpers/searchPageState.js.map +0 -1
  277. package/modules/workflow/blocks/helpers/targetCountMode.test.ts +0 -29
  278. package/modules/workflow/blocks/helpers/xhsCliArgs.test.ts +0 -75
  279. package/modules/workflow/tests/smartReply.test.ts +0 -32
  280. package/modules/xiaohongshu/app/src/blocks/Phase3Interact.matcher.test.ts +0 -33
  281. package/modules/xiaohongshu/app/src/utils/__tests__/checkpoints.test.ts +0 -141
  282. package/modules/xiaohongshu/app/tests/commentMatchDsl.test.ts +0 -50
  283. package/modules/xiaohongshu/app/tests/commentMatcher.test.ts +0 -46
  284. package/modules/xiaohongshu/app/tests/sharding.test.ts +0 -31
  285. package/package-scripts.json +0 -8
  286. package/runtime/infra/utils/README.md +0 -13
  287. package/runtime/infra/utils/scripts/README.md +0 -0
  288. package/runtime/infra/utils/scripts/development/eval-in-session.mjs +0 -40
  289. package/runtime/infra/utils/scripts/development/highlight-search-containers.mjs +0 -35
  290. package/runtime/infra/utils/scripts/service/kill-port.mjs +0 -24
  291. package/runtime/infra/utils/scripts/service/start-api.mjs +0 -39
  292. package/runtime/infra/utils/scripts/service/start-browser-service.mjs +0 -106
  293. package/runtime/infra/utils/scripts/service/stop-api.mjs +0 -18
  294. package/runtime/infra/utils/scripts/service/stop-browser-service.mjs +0 -104
  295. package/runtime/infra/utils/scripts/test-services.mjs +0 -94
  296. package/services/shared/heartbeat.test.ts +0 -102
  297. package/services/unified-api/__tests__/task-state.test.ts +0 -95
  298. package/sitecustomize.py +0 -19
  299. package/tests/README.md +0 -194
  300. package/tests/e2e/workflows/weibo-feed-extraction.test.ts +0 -171
  301. package/tests/fixtures/data/container-definitions.json +0 -67
  302. package/tests/fixtures/pages/simple-page.html +0 -69
  303. package/tests/integration/01-test-container-match.mjs +0 -188
  304. package/tests/integration/02-test-dom-branch.mjs +0 -161
  305. package/tests/integration/03-test-container-operation-system.mjs +0 -91
  306. package/tests/integration/05-test-container-lifecycle-events.mjs +0 -224
  307. package/tests/integration/05-test-container-lifecycle-with-events.mjs +0 -250
  308. package/tests/integration/06-test-container-dom-tree-drawing.mjs +0 -256
  309. package/tests/integration/07-test-weibo-container-lifecycle.mjs +0 -355
  310. package/tests/integration/08-test-weibo-feed-workflow.test.mjs +0 -164
  311. package/tests/integration/10-test-visual-analyzer.mjs +0 -312
  312. package/tests/integration/11-test-visual-loop.mjs +0 -284
  313. package/tests/integration/12-test-simple-visual-loop.mjs +0 -242
  314. package/tests/integration/13-test-visual-robust.mjs +0 -185
  315. package/tests/integration/14-test-visual-highlight-loop.mjs +0 -271
  316. package/tests/integration/inspect-page.mjs +0 -50
  317. package/tests/integration/run-all-tests.mjs +0 -95
  318. package/tests/patch_verification/CODEX_PATCH_TEST.md +0 -103
  319. package/tests/patch_verification/PHASE2_ANALYSIS.md +0 -179
  320. package/tests/patch_verification/PHASE2_OPTIMIZATION_REPORT.md +0 -55
  321. package/tests/patch_verification/PHASE2_TO_PHASE4_SUMMARY.md +0 -126
  322. package/tests/patch_verification/QUICK_TEST_SEQUENCE.md +0 -262
  323. package/tests/patch_verification/README.md +0 -143
  324. package/tests/patch_verification/RUN_TESTS.md +0 -60
  325. package/tests/patch_verification/TEST_EXECUTION.md +0 -99
  326. package/tests/patch_verification/TEST_PLAN.md +0 -328
  327. package/tests/patch_verification/TEST_RESULTS.md +0 -34
  328. package/tests/patch_verification/TOOL_TEST_PLAN.md +0 -48
  329. package/tests/patch_verification/run-tool-test.mjs +0 -121
  330. package/tests/patch_verification/temp_test_files/test01.txt +0 -1
  331. package/tests/patch_verification/temp_test_files/test02.txt +0 -3
  332. package/tests/patch_verification/temp_test_files/test02_gnu.txt +0 -3
  333. package/tests/patch_verification/temp_test_files/test03.txt +0 -1
  334. package/tests/patch_verification/temp_test_files/test03_multiline.txt +0 -5
  335. package/tests/patch_verification/temp_test_files/test04_function.ts +0 -5
  336. package/tests/patch_verification/temp_test_files/test05_import.ts +0 -4
  337. package/tests/patch_verification/temp_test_files/test06_special_chars.txt +0 -4
  338. package/tests/patch_verification/temp_test_files/test07_indentation.ts +0 -5
  339. package/tests/patch_verification/temp_test_files/test08_mismatch.txt +0 -1
  340. package/tests/patch_verification/temp_test_files/test_add_02.txt +0 -3
  341. package/tests/patch_verification/temp_test_files/test_simple.txt +0 -1
  342. package/tests/runner/TestReporter.mjs +0 -57
  343. package/tests/runner/TestRunner.mjs +0 -244
  344. package/tests/unit/commands/profile.test.mjs +0 -10
  345. package/tests/unit/container/change-notifier.test.mjs +0 -181
  346. package/tests/unit/lifecycle/session-registry.test.mjs +0 -135
  347. package/tests/unit/operations/registry.test.ts +0 -73
  348. package/tests/unit/utils/browser-service.test.mjs +0 -153
  349. package/tests/unit/utils/config.test.mjs +0 -166
  350. package/tests/unit/utils/fingerprint.test.mjs +0 -166
  351. package/tsconfig.json +0 -31
  352. package/tsconfig.services.json +0 -26
  353. /package/apps/desktop-console/{src → dist}/renderer/index.html +0 -0
  354. /package/apps/desktop-console/{src/renderer/tabs → dist/renderer}/run.mts +0 -0
@@ -1,228 +0,0 @@
1
- # 小红书 Workflow - 下一步任务清单
2
-
3
- ## ✅ 已完成
4
-
5
- 1. **容器库架构** - 完整实现了搜索页、详情页、评论区等所有容器定义
6
- 2. **Workflow Block** - `XiaohongshuCrawlerBlock.ts` 实现了完整采集逻辑
7
- 3. **Debug 脚本** - 创建了 `debug-xhs-status/search/detail.mjs` 三个阶段测试脚本
8
- 4. **AGENTS.md 规则** - 新增"调试脚本必须保持浏览器会话不被破坏"规则
9
-
10
- ## 📋 当前问题分析
11
-
12
- ### 问题 1: 调试脚本频繁重启浏览器 ❌
13
- **现状**:
14
- - `scripts/xiaohongshu-test-comments.mjs` 中有 `startBrowserSession()` 逻辑
15
- - 每次运行都可能调用 `start-headful.mjs` 启动新会话
16
- - 破坏了现有 session 状态
17
-
18
- **需要修改**:
19
- - `xiaohongshu-test-comments.mjs`
20
- - 其他可能启动新session的测试脚本
21
-
22
- ### 问题 2: 频繁导航到同一页面 ❌
23
- **现状**:
24
- - `debug-xhs-search.mjs` 中有 `ensureSearchPage()` 会直接跳转
25
- - 没有先检查当前URL是否已经在目标页面
26
-
27
- **需要修改**:
28
- - 在导航前先检查 `getCurrentUrl()`
29
- - 如果已在目标页,只刷新而不重新导航
30
-
31
- ### 问题 3: 脚本不是 unattached 模式 ❌
32
- **现状**:
33
- - 测试脚本直接操作浏览器,可能改变会话状态
34
- - 没有明确的"只读"或"非侵入"模式
35
-
36
- **需要改进**:
37
- - 明确标记哪些操作是只读的(如 status 检查)
38
- - 哪些操作会改变状态(如 search、navigate)
39
- - 提供恢复机制(如记录初始 URL,测试后恢复)
40
-
41
- ## 🎯 下一步任务(按优先级排序)
42
-
43
- ### Task 1: 修改现有调试脚本为 unattached 模式 ⏳
44
-
45
- **目标**: 让调试脚本复用现有 session,不重启浏览器
46
-
47
- **子任务**:
48
-
49
- 1. **修改 `debug-xhs-status.mjs`** ✅
50
- - ✅ 已经符合要求:仅读取状态,不改变页面
51
- - ✅ 使用现有 PROFILE,不启动新session
52
-
53
- 2. **修改 `debug-xhs-search.mjs`** ⏳
54
- - [ ] `ensureSearchPage()` 改为先检查当前URL
55
- - [ ] 如果已在搜索页,优先使用 `location.reload()` 而非重新导航
56
- - [ ] 记录初始URL,测试完成后可选恢复
57
-
58
- 3. **修改 `debug-xhs-detail.mjs`** ⏳
59
- - [ ] 检查当前是否已在搜索页
60
- - [ ] 测试完成后关闭详情模态,恢复到搜索页
61
- - [ ] 不强制导航,优先使用现有页面状态
62
-
63
- 4. **修改 `xiaohongshu-test-comments.mjs`** ⏳
64
- - [ ] 移除 `startBrowserSession()` 自动启动逻辑
65
- - [ ] 改为检测session不存在时,提示用户手动启动
66
- - [ ] 或者提供 `--ensure-session` flag,明确需要时才启动
67
-
68
- **修改原则**:
69
- ```javascript
70
- // ❌ 旧方式 - 直接导航
71
- async function ensureSearchPage() {
72
- await controllerAction('browser:execute', {
73
- script: `window.location.href = 'https://www.xiaohongshu.com'`
74
- });
75
- }
76
-
77
- // ✅ 新方式 - 检查后刷新或导航
78
- async function ensureSearchPage() {
79
- const url = await getCurrentUrl();
80
- if (url.includes('xiaohongshu.com/search_result')) {
81
- console.log(' ✅ 已在搜索页,刷新...');
82
- await controllerAction('browser:execute', {
83
- script: 'location.reload()'
84
- });
85
- } else if (url.includes('xiaohongshu.com')) {
86
- console.log(' ⚠️ 在小红书其他页面,导航到搜索...');
87
- await controllerAction('browser:execute', {
88
- script: `window.location.href = 'https://www.xiaohongshu.com/search_result?...'`
89
- });
90
- } else {
91
- console.log(' ❌ 不在小红书页面,请先手动导航');
92
- process.exit(1);
93
- }
94
- }
95
- ```
96
-
97
- ### Task 2: 创建 Session 检查脚本 ⏳
98
-
99
- **目标**: 提供统一的 session 状态检查工具
100
-
101
- **文件**: `scripts/check-xiaohongshu-session.mjs`
102
-
103
- **功能**:
104
- - 检查 `xiaohongshu_fresh` session 是否存在
105
- - 显示当前 URL
106
- - 显示登录状态
107
- - 显示 Cookie 过期时间
108
- - 给出启动建议(如果 session 不存在)
109
-
110
- **用法**:
111
- ```bash
112
- node scripts/check-xiaohongshu-session.mjs
113
- ```
114
-
115
- ### Task 3: 更新测试脚本文档 ⏳
116
-
117
- **目标**: 明确测试流程和最佳实践
118
-
119
- **文件**: `docs/testing-xiaohongshu.md`
120
-
121
- **内容**:
122
- 1. **Session 管理规范**
123
- - 启动 session:`node scripts/start-headful.mjs --profile xiaohongshu_fresh --url https://www.xiaohongshu.com`
124
- - 检查 session:`node scripts/check-xiaohongshu-session.mjs`
125
- - Session 应保持运行,不要频繁重启
126
-
127
- 2. **测试流程**
128
- ```bash
129
- # 1. 启动 session(仅首次或session丢失时)
130
- node scripts/start-headful.mjs --profile xiaohongshu_fresh --url https://www.xiaohongshu.com
131
-
132
- # 2. 运行阶段测试(session 保持运行)
133
- node scripts/debug-xhs-status.mjs # Step 1: 状态诊断
134
- node scripts/debug-xhs-search.mjs # Step 2: 搜索验证
135
- node scripts/debug-xhs-detail.mjs # Step 3: 详情页交互
136
-
137
- # 3. 完整 workflow 测试
138
- node scripts/run-xiaohongshu-workflow.ts --keyword "oppo小平板" --count 5
139
- ```
140
-
141
- 3. **调试技巧**
142
- - 使用 Bus 订阅监听事件:`wscat -c ws://127.0.0.1:7701/bus`
143
- - 查看容器匹配日志:检查 `container:match` 事件
144
- - 截图调试:在脚本中增加 `takeScreenshot()` 调用
145
-
146
- ### Task 4: 优化 XiaohongshuCrawlerBlock ⏳
147
-
148
- **目标**: 根据实际测试结果优化 Block 逻辑
149
-
150
- **待优化点**:
151
- 1. **Context Destroyed 处理**
152
- - [ ] 增加 retry 机制
153
- - [ ] 优化 `waitForDetailContext()` 的轮询策略
154
-
155
- 2. **评论展开优化**
156
- - [ ] 根据实际 DOM 调整滚动距离和次数
157
- - [ ] 优化 `show_more_button` 的查找逻辑
158
-
159
- 3. **图片下载优化**
160
- - [ ] 增加并发控制
161
- - [ ] 优化重试策略
162
-
163
- 4. **性能优化**
164
- - [ ] 减少不必要的等待时间
165
- - [ ] 并行处理部分操作
166
-
167
- ### Task 5: 小规模验证测试 ⏳
168
-
169
- **目标**: 运行完整 workflow,采集 5 条数据验证
170
-
171
- **步骤**:
172
- ```bash
173
- # 1. 确保 session 运行
174
- node scripts/check-xiaohongshu-session.mjs
175
-
176
- # 2. 运行 workflow(小规模)
177
- node scripts/run-xiaohongshu-workflow.ts --keyword "手机膜" --count 5
178
-
179
- # 3. 检查输出
180
- ls -la ~/.webauto/download/xiaohongshu/手机膜/
181
-
182
- # 4. 验证数据完整性
183
- # - Markdown 格式正确
184
- # - 图片下载成功
185
- # - 评论数据完整
186
- ```
187
-
188
- ## 🔄 迭代计划
189
-
190
- ### 第一轮:基础功能验证(本周)
191
- - [x] 创建调试脚本
192
- - [x] 补充 AGENTS.md 规则
193
- - [ ] 修改脚本为 unattached 模式
194
- - [ ] 运行 5 条数据测试
195
-
196
- ### 第二轮:稳定性优化(下周)
197
- - [ ] 优化 Context Destroyed 问题
198
- - [ ] 优化评论展开逻辑
199
- - [ ] 增加错误恢复机制
200
- - [ ] 运行 50 条数据测试
201
-
202
- ### 第三轮:性能优化(后续)
203
- - [ ] 并行采集优化
204
- - [ ] 图片下载优化
205
- - [ ] 增加增量采集支持
206
- - [ ] 运行 200+ 条数据测试
207
-
208
- ## 📊 预期成果
209
-
210
- - ✅ 调试流程清晰,session 状态稳定
211
- - ✅ 容器匹配成功率 > 95%
212
- - ✅ 评论展开成功率 > 90%
213
- - ✅ 图片下载成功率 > 85%
214
- - ✅ 平均每条数据采集时间 < 30秒
215
-
216
- ## 🚨 风险提示
217
-
218
- 1. **反爬策略变化**:小红书可能随时调整 DOM 结构或增加反爬措施
219
- 2. **评论加载时机**:动态加载的评论可能需要更长等待时间
220
- 3. **图片防盗链**:图片 URL 可能短期有效,需要及时下载
221
-
222
- ## 📝 相关文档
223
-
224
- - `container-library/xiaohongshu/README.md` - 容器定义
225
- - `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts` - 采集逻辑
226
- - `task.md` - 任务追踪
227
- - `AGENTS.md` - 架构规则
228
- - `docs/xiaohongshu-workflow-summary.md` - 实施总结
@@ -1,73 +0,0 @@
1
- # 小红书采集快速开始(新架构)
2
-
3
- ## 1. 安装与检查
4
-
5
- ```bash
6
- npm install
7
- npm run build:services
8
- node scripts/xiaohongshu/install.mjs --check
9
- ```
10
-
11
- ## 2. 登录准备(必须)
12
-
13
- 首次运行建议先在可视模式完成登录。
14
-
15
- ```bash
16
- node scripts/xiaohongshu/phase1-boot.mjs --profile xiaohongshu-batch-1 --headless false
17
- ```
18
-
19
- ## 3. 执行全流程
20
-
21
- ### 方式 A:编排入口(推荐)
22
-
23
- ```bash
24
- node scripts/xiaohongshu/phase-orchestrate.mjs \
25
- --mode phase1-phase2-unified \
26
- --profile xiaohongshu-batch-1 \
27
- --keyword "工作服定制" \
28
- --target 50 \
29
- --env debug \
30
- --headless false
31
- ```
32
-
33
- ### 方式 B:仅运行 unified(autoscript)
34
-
35
- ```bash
36
- node scripts/xiaohongshu/phase-unified-harvest.mjs \
37
- --profile xiaohongshu-batch-1 \
38
- --keyword "工作服定制" \
39
- --max-notes 50 \
40
- --do-comments true \
41
- --do-likes true \
42
- --like-keywords "真敬业" \
43
- --headless false
44
- ```
45
-
46
- ## 4. 查看进度与状态
47
-
48
- ```bash
49
- # 状态摘要
50
- node scripts/xiaohongshu/state.mjs show --keyword "工作服定制" --env debug
51
-
52
- # 状态 JSON
53
- node scripts/xiaohongshu/state.mjs show --keyword "工作服定制" --env debug --json
54
- ```
55
-
56
- ## 5. 输出目录
57
-
58
- ```text
59
- ~/.webauto/download/xiaohongshu/{env}/{keyword}/
60
- ├── phase2-links.jsonl
61
- ├── .collect-state.json
62
- ├── run*.log / run-events*.jsonl
63
- └── {noteId}/
64
- ├── README.md
65
- ├── comments.md
66
- └── images/
67
- ```
68
-
69
- ## 注意事项
70
-
71
- - 详情页必须通过页面点击进入,禁止 URL 直跳(避免 `xsec_token` 风险)。
72
- - 调试阶段建议 `--headless false`,便于观察容器匹配与动画时序。
73
- - 若需要仅采集链接,可单独运行 `phase2-collect.mjs`。
@@ -1,227 +0,0 @@
1
- # 小红书 Workflow 实施总结
2
-
3
- ## ✅ 已完成工作
4
-
5
- ### 1. 容器库架构
6
- 根据 `container-library/xiaohongshu/README.md`,完整实现了以下容器:
7
-
8
- #### 根容器
9
- - `xiaohongshu_search`: 搜索结果页 (`.feeds-page`)
10
- - `xiaohongshu_detail`: 笔记详情页 (`.note-detail-mask`)
11
- - `xiaohongshu_login`: 登录页
12
- - `xiaohongshu_home`: 主页/推荐流
13
-
14
- #### 搜索页容器层次
15
- ```
16
- xiaohongshu_search/
17
- ├── login_anchor/ # 登录锚点
18
- ├── search_bar/ # 搜索框
19
- └── search_result_list/ # 结果列表
20
- └── search_result_item/ # 单个结果项
21
- ```
22
-
23
- **search_result_item 核心能力**:
24
- - `extract`: 提取 title/link/detail_url/image/text/note_id/xsec_token
25
- - `navigate`: 直接读取 `a[href*='/explore/']` 并执行 `window.location.href`
26
- - `click`: 点击图片获取带 xtoken 的 URL
27
-
28
- #### 详情页容器层次
29
- ```
30
- xiaohongshu_detail/
31
- ├── login_anchor/
32
- └── modal_shell/ # 详情模态框 (.note-detail-mask)
33
- ├── header/ # 作者信息
34
- ├── content/ # 正文
35
- ├── gallery/ # 图片区域
36
- └── comment_section/ # 评论区域
37
- ├── show_more_button/ # 展开更多回复
38
- ├── comment_item/ # 评论项
39
- ├── end_marker/ # "THE END" 标记
40
- └── empty_state/ # 无评论状态
41
- ```
42
-
43
- **comment_section 核心能力**:
44
- - `scroll`: 向下滚动加载更多评论
45
- - `find-child`: 查找并触发 `show_more_button` 的自动点击
46
- - 自动检测 `end_marker` 和 `empty_state` 判断评论加载完成
47
-
48
- ### 2. Workflow Block 实现
49
-
50
- #### XiaohongshuCrawlerBlock
51
- 完整实现的主采集 Block,位于 `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts`:
52
-
53
- **核心功能**:
54
- 1. **登录守护**: `ensureLoginState()` - 检测登录页并等待人工登录
55
- 2. **搜索管理**: `runSearch()` + `ensureSearchPageContext()`
56
- 3. **列表采集**: `collectSearchItems()` - 基于容器树提取搜索结果
57
- 4. **详情导航**: `openDetailFromItem()` - 使用 `navigate` operation
58
- 5. **评论展开**: `scrollComments()` - 自动滚动并触发展开按钮
59
- 6. **数据提取**: `collectDetailData()` - 提取header/content/gallery/comments
60
- 7. **图片下载**: `saveNoteData()` - 保存 Markdown + 图片
61
- 8. **去重机制**: 基于已存在目录的 note_id 去重
62
-
63
- **数据流**:
64
- ```
65
- 搜索页 → match SEARCH_ROOT → find SEARCH_LIST → extract SEARCH_ITEM
66
-
67
- navigate → wait DETAIL_ROOT → find MODAL_SHELL
68
-
69
- extract HEADER/CONTENT/GALLERY → scroll COMMENT_SECTION → extract COMMENT_ITEM
70
-
71
- save Markdown + images → close modal → back to search
72
- ```
73
-
74
- ### 3. Debug 脚本(新增)
75
-
76
- 创建了3个原子化调试脚本:
77
-
78
- #### scripts/debug-xhs-status.mjs
79
- - 获取当前 URL
80
- - 截图当前页面
81
- - 分析 DOM 摘要(`.note-item`、`#search-input`、登录锚点)
82
- - 高亮关键元素
83
-
84
- #### scripts/debug-xhs-search.mjs
85
- - 确保在小红书页面
86
- - 高亮搜索框
87
- - 随机选择关键字(oppo小平板/手机膜/雷军/小米/华为/鸿蒙)
88
- - 执行搜索并等待结果稳定
89
-
90
- #### scripts/debug-xhs-detail.mjs
91
- - 获取列表第一个笔记
92
- - 高亮并打开详情页
93
- - 检查详情页加载(Modal/Title/Comments)
94
- - 自动展开评论(滚动 + 点击展开按钮)
95
- - 统计评论数量和状态
96
-
97
- ### 4. Workflow 定义
98
- `modules/workflow/definitions/xiaohongshu-collect-workflow.ts`:
99
- ```typescript
100
- {
101
- id: 'xiaohongshu-collect',
102
- name: '小红书关键词采集',
103
- steps: [
104
- { blockName: 'StartBrowserService', ... },
105
- { blockName: 'EnsureSession', ... },
106
- { blockName: 'XiaohongshuCrawlerBlock', ... }
107
- ]
108
- }
109
- ```
110
-
111
- ## 📋 调试计划(按 task.md)
112
-
113
- ### Step 1: 状态诊断 ✅
114
- ```bash
115
- node scripts/debug-xhs-status.mjs
116
- ```
117
- 验证:
118
- - 当前 URL
119
- - DOM 结构
120
- - 关键元素高亮
121
-
122
- ### Step 2: 搜索验证 ⏳
123
- ```bash
124
- node scripts/debug-xhs-search.mjs
125
- ```
126
- 验证:
127
- - 搜索框定位
128
- - 关键字轮换
129
- - 结果加载
130
-
131
- ### Step 3: 详情页交互 ⏳
132
- ```bash
133
- node scripts/debug-xhs-detail.mjs
134
- ```
135
- 验证:
136
- - 详情页打开
137
- - 评论展开
138
- - 数据完整性
139
-
140
- ### Step 4: 完整 Workflow ⏳
141
- ```bash
142
- # 方式1: 直接调用 Block
143
- node -e "import('./modules/workflow/blocks/XiaohongshuCrawlerBlock.ts').then(m => m.execute({ sessionId: 'xiaohongshu_fresh', keyword: 'oppo小平板', targetCount: 5 }))"
144
-
145
- # 方式2: 通过 Workflow Runner
146
- node scripts/run-xiaohongshu-workflow.ts
147
- ```
148
-
149
- ## 🔧 技术栈
150
-
151
- - **统一 API**: `http://127.0.0.1:7701` (HTTP/WS/Bus)
152
- - **Browser Service**: `http://127.0.0.1:7704` + `ws://127.0.0.1:8765`
153
- - **容器操作**: `/v1/container/<containerId>/execute`
154
- - **Controller 动作**: `/v1/controller/action`
155
- - **事件总线**: `ws://127.0.0.1:7701/bus` (订阅 `container:*`/`ui:*`)
156
-
157
- ## 🚨 已知问题与对策
158
-
159
- ### 1. Navigation Context Destroyed
160
- **问题**: 页面跳转时脚本执行被中断
161
- **对策**:
162
- - `waitForDetailContext()` - 轮询等待详情容器出现
163
- - `ensureSearchPageContext()` - 确保回到搜索页后重新匹配
164
-
165
- ### 2. 评论展开时机
166
- **问题**: 动态加载的评论需要滚动触发
167
- **对策**:
168
- - `scrollComments()` - 多轮滚动 + `find-child` 触发 `show_more_button`
169
- - 检测 `end_marker` 和 `empty_state` 判断结束
170
-
171
- ### 3. 图片下载反爬
172
- **问题**: 小红书图片需要 UA + Cookie
173
- **对策**:
174
- - `fetchBrowserHeaders()` - 读取浏览器 UA 和 Cookie
175
- - 重试机制(最多3次)
176
-
177
- ## 📁 输出结构
178
-
179
- ```
180
- ~/.webauto/download/xiaohongshu/{keyword}/
181
- ├── {title}_{noteId}/
182
- │ ├── content.md
183
- │ └── images/
184
- │ ├── 1.jpg
185
- │ ├── 2.jpg
186
- │ └── ...
187
- └── ...
188
- ```
189
-
190
- ### Markdown 格式
191
- ```markdown
192
- # 标题
193
-
194
- - **关键字**: oppo小平板
195
- - **作者**: xxx | [主页](link)
196
- - **Note ID**: 12345
197
- - **评论统计**: 10 条 / 结尾标记:是 / 空状态:否
198
-
199
- ## 正文
200
- (正文内容)
201
-
202
- ## 图片
203
- ![](./images/1.jpg)
204
- ![](./images/2.jpg)
205
-
206
- ## 评论(10)
207
- ### 1. 用户名 (userId)
208
- - 时间:2025-01-05
209
-
210
- 评论内容...
211
- ```
212
-
213
- ## 🎯 下一步
214
-
215
- 1. **运行 Debug 脚本**: 验证当前 Session 状态
216
- 2. **调整容器定义**: 根据实际 DOM 微调选择器
217
- 3. **测试完整流程**: 5条数据小规模测试
218
- 4. **优化性能**: 减少等待时间、提高提取成功率
219
- 5. **扩展 Block**: 支持更多操作(如批量导出、数据分析)
220
-
221
- ## 📝 参考文档
222
-
223
- - `container-library/xiaohongshu/README.md` - 容器定义规范
224
- - `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts` - 主采集逻辑
225
- - `task.md` - 任务追踪与调试计划
226
- - `AGENTS.md` - 架构设计原则
227
-
@@ -1,16 +0,0 @@
1
- import test from 'node:test';
2
- import assert from 'node:assert/strict';
3
- import { ContainerRegistry } from '../src/index.js';
4
-
5
- test('getContainersForUrl returns known container', () => {
6
- const registry = new ContainerRegistry();
7
- const containers = registry.getContainersForUrl('https://weibo.com/');
8
- assert.ok(containers.weibo_main_page, 'should include weibo_main_page');
9
- });
10
-
11
- test('listSites contains weibo entry', () => {
12
- const registry = new ContainerRegistry();
13
- const sites = registry.listSites();
14
- const hasWeibo = sites.some((site) => site.key.includes('weibo'));
15
- assert.ok(hasWeibo, 'should list weibo site');
16
- });
@@ -1,38 +0,0 @@
1
- import test from 'node:test';
2
- import assert from 'node:assert/strict';
3
- import os from 'node:os';
4
- import path from 'node:path';
5
- import fs from 'node:fs/promises';
6
- import { run as runCli } from '../src/cli.js';
7
-
8
- async function withTempLog(content: string, fn: (file: string) => Promise<void>) {
9
- const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'logging-test-'));
10
- const file = path.join(dir, 'sample.log');
11
- await fs.writeFile(file, content, 'utf-8');
12
- try {
13
- await fn(file);
14
- } finally {
15
- await fs.rm(dir, { recursive: true, force: true });
16
- }
17
- }
18
-
19
- test('logging cli stream returns tail lines', async () => {
20
- await withTempLog('line1\nline2\nline3\n', async (file) => {
21
- const result = await runCli(['stream', '--file', file, '--lines', '2']);
22
- assert.equal(result.success, true);
23
- assert.deepEqual(result.data.lines, ['line2', 'line3']);
24
- });
25
- });
26
-
27
- test('logging cli flush can truncate file', async () => {
28
- await withTempLog('foo\nbar\n', async (file) => {
29
- const flush = await runCli(['flush', '--file', file, '--truncate', 'false']);
30
- assert.equal(flush.success, true);
31
- assert.equal(flush.data.lines.length, 2);
32
-
33
- const flushTruncate = await runCli(['flush', '--file', file]);
34
- assert.equal(flushTruncate.success, true);
35
- const stats = await fs.stat(file);
36
- assert.equal(stats.size, 0);
37
- });
38
- });
@@ -1,22 +0,0 @@
1
- import test from 'node:test';
2
- import assert from 'node:assert/strict';
3
- import { run as runCli } from '../src/cli.js';
4
-
5
- test('operations cli list returns registered operations', async () => {
6
- const result = await runCli(['list']);
7
- assert.equal(result.success, true);
8
- assert.ok(result.data.some((op: any) => op.id === 'highlight'));
9
- assert.ok(result.data.some((op: any) => op.id === 'scroll'));
10
- });
11
-
12
- test('operations cli run works with mock page context', async () => {
13
- const result = await runCli(['run', '--op', 'highlight', '--config', '{"selector":"#app"}']);
14
- assert.equal(result.success, true);
15
- assert.equal(result.data.mock, true);
16
- });
17
-
18
- // Skipped: robotjs has been removed from dependencies
19
- test.skip('operations cli run supports system mouse', async () => {
20
- const result = await runCli(['run', '--op', 'system:mouse-move', '--config', '{"x":10,"y":20}']);
21
- assert.equal(result.success, true);
22
- });