@web-auto/webauto 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (354) hide show
  1. package/apps/desktop-console/default-settings.json +1 -0
  2. package/apps/desktop-console/dist/main/index.mjs +1618 -0
  3. package/apps/desktop-console/{src → dist}/main/preload.mjs +10 -0
  4. package/apps/desktop-console/dist/renderer/index.js +3063 -0
  5. package/apps/desktop-console/entry/ui-console.mjs +299 -0
  6. package/apps/webauto/entry/account.mjs +356 -0
  7. package/apps/webauto/entry/lib/account-detect.mjs +160 -0
  8. package/apps/webauto/entry/lib/account-store.mjs +587 -0
  9. package/apps/webauto/entry/lib/profilepool.mjs +1 -1
  10. package/apps/webauto/entry/xhs-install.mjs +27 -3
  11. package/apps/webauto/entry/xhs-status.mjs +152 -0
  12. package/apps/webauto/entry/xhs-unified.mjs +595 -17
  13. package/bin/webauto.mjs +247 -12
  14. package/dist/apps/webauto/server.js +66 -0
  15. package/dist/modules/camo-backend/src/index.js +575 -0
  16. package/dist/modules/camo-backend/src/internal/BrowserSession.js +817 -0
  17. package/dist/modules/camo-backend/src/internal/ElementRegistry.js +61 -0
  18. package/dist/modules/camo-backend/src/internal/ProfileLock.js +85 -0
  19. package/dist/modules/camo-backend/src/internal/SessionManager.js +172 -0
  20. package/dist/modules/camo-backend/src/internal/container-matcher.js +852 -0
  21. package/dist/modules/camo-backend/src/internal/engine-manager.js +258 -0
  22. package/dist/modules/camo-backend/src/internal/fingerprint.js +203 -0
  23. package/dist/modules/camo-backend/src/internal/pageRuntime.js +29 -0
  24. package/dist/modules/camo-backend/src/internal/runtimeInjector.js +30 -0
  25. package/dist/modules/camo-backend/src/internal/state-bus.js +46 -0
  26. package/dist/modules/camo-backend/src/internal/storage-paths.js +36 -0
  27. package/dist/modules/camo-backend/src/internal/ws-server.js +1202 -0
  28. package/dist/modules/camo-runtime/src/utils/browser-service.mjs +423 -0
  29. package/dist/modules/camo-runtime/src/utils/config.mjs +77 -0
  30. package/dist/modules/container-registry/src/index.js +184 -0
  31. package/dist/modules/logging/src/index.js +92 -0
  32. package/dist/modules/operations/src/builtin.js +27 -0
  33. package/dist/modules/operations/src/container-binding.js +75 -0
  34. package/dist/modules/operations/src/executor.js +146 -0
  35. package/dist/modules/operations/src/operations/click.js +167 -0
  36. package/dist/modules/operations/src/operations/extract.js +204 -0
  37. package/dist/modules/operations/src/operations/find-child.js +17 -0
  38. package/dist/modules/operations/src/operations/highlight.js +138 -0
  39. package/dist/modules/operations/src/operations/key.js +61 -0
  40. package/dist/modules/operations/src/operations/navigate.js +148 -0
  41. package/dist/modules/operations/src/operations/scroll.js +126 -0
  42. package/dist/modules/operations/src/operations/type.js +190 -0
  43. package/dist/modules/operations/src/queue.js +100 -0
  44. package/dist/modules/operations/src/registry.js +11 -0
  45. package/dist/modules/operations/src/system/mouse.js +33 -0
  46. package/dist/modules/state/src/atomic-json.js +33 -0
  47. package/dist/modules/workflow/blocks/AnchorVerificationBlock.js +71 -0
  48. package/dist/modules/workflow/blocks/BehaviorRandomizer.js +26 -0
  49. package/dist/modules/workflow/blocks/CallWorkflowBlock.js +38 -0
  50. package/dist/modules/workflow/blocks/CloseDetailBlock.js +209 -0
  51. package/dist/modules/workflow/blocks/CollectBatch.js +137 -0
  52. package/dist/modules/workflow/blocks/CollectCommentsBlock.js +415 -0
  53. package/dist/modules/workflow/blocks/CollectSearchListBlock.js +599 -0
  54. package/dist/modules/workflow/blocks/CollectWeiboPosts.js +229 -0
  55. package/dist/modules/workflow/blocks/DetectPageStateBlock.js +259 -0
  56. package/dist/modules/workflow/blocks/EnsureLoginBlock.js +162 -0
  57. package/dist/modules/workflow/blocks/EnsureSession.js +426 -0
  58. package/dist/modules/workflow/blocks/ErrorClassifier.js +164 -0
  59. package/dist/modules/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  60. package/dist/modules/workflow/blocks/ExpandCommentsBlock.js +1032 -0
  61. package/dist/modules/workflow/blocks/ExtractDetailBlock.js +310 -0
  62. package/dist/modules/workflow/blocks/ExtractPostFields.js +88 -0
  63. package/dist/modules/workflow/blocks/GenerateSmartReplyBlock.js +68 -0
  64. package/dist/modules/workflow/blocks/GoToSearchBlock.js +497 -0
  65. package/dist/modules/workflow/blocks/GracefulFallbackBlock.js +104 -0
  66. package/dist/modules/workflow/blocks/HighlightBlock.js +66 -0
  67. package/dist/modules/workflow/blocks/InitAutoScroll.js +65 -0
  68. package/dist/modules/workflow/blocks/LoadContainerDefinition.js +50 -0
  69. package/dist/modules/workflow/blocks/LoadContainerIndex.js +43 -0
  70. package/dist/modules/workflow/blocks/LocateAndGuardBlock.js +176 -0
  71. package/dist/modules/workflow/blocks/LoginRecoveryBlock.js +242 -0
  72. package/dist/modules/workflow/blocks/MatchContainers.js +64 -0
  73. package/dist/modules/workflow/blocks/MonitoringBlock.js +190 -0
  74. package/dist/modules/workflow/blocks/OpenDetailBlock.js +1240 -0
  75. package/dist/modules/workflow/blocks/OrganizeXhsNotesBlock.js +117 -0
  76. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +270 -0
  77. package/dist/modules/workflow/blocks/PickSinglePost.js +69 -0
  78. package/dist/modules/workflow/blocks/ProgressTracker.js +125 -0
  79. package/dist/modules/workflow/blocks/RecordFixtureBlock.js +44 -0
  80. package/dist/modules/workflow/blocks/RenderMarkdown.js +48 -0
  81. package/dist/modules/workflow/blocks/SaveFile.js +54 -0
  82. package/dist/modules/workflow/blocks/ScrollNextBatch.js +72 -0
  83. package/dist/modules/workflow/blocks/SessionHealthBlock.js +73 -0
  84. package/dist/modules/workflow/blocks/StartBrowserService.js +45 -0
  85. package/dist/modules/workflow/blocks/ValidateContainerDefinition.js +67 -0
  86. package/dist/modules/workflow/blocks/ValidateExtract.js +35 -0
  87. package/dist/modules/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  88. package/dist/modules/workflow/blocks/WaitStable.js +74 -0
  89. package/dist/modules/workflow/blocks/WarmupCommentsBlock.js +120 -0
  90. package/dist/modules/workflow/blocks/WorkflowExecutor.js +156 -0
  91. package/dist/modules/workflow/blocks/XiaohongshuCollectFromLinksBlock.js +1004 -0
  92. package/dist/modules/workflow/blocks/XiaohongshuCollectLinksBlock.js +1049 -0
  93. package/dist/modules/workflow/blocks/XiaohongshuFullCollectBlock.js +782 -0
  94. package/dist/modules/workflow/blocks/helpers/anchorVerify.js +198 -0
  95. package/dist/modules/workflow/blocks/helpers/asyncWorkQueue.js +53 -0
  96. package/dist/modules/workflow/blocks/helpers/commentScroller.js +334 -0
  97. package/dist/modules/workflow/blocks/helpers/commentSectionLocator.js +126 -0
  98. package/dist/modules/workflow/blocks/helpers/containerAnchors.js +301 -0
  99. package/dist/modules/workflow/blocks/helpers/debugArtifacts.js +6 -0
  100. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +29 -0
  101. package/dist/modules/workflow/blocks/helpers/expandCommentsController.js +53 -0
  102. package/dist/modules/workflow/blocks/helpers/expandCommentsExtractor.js +129 -0
  103. package/dist/modules/workflow/blocks/helpers/macosVisionOcrPlugin.js +116 -0
  104. package/dist/modules/workflow/blocks/helpers/mergeXhsMarkdown.js +109 -0
  105. package/dist/modules/workflow/blocks/helpers/openDetailController.js +56 -0
  106. package/dist/modules/workflow/blocks/helpers/openDetailTypes.js +7 -0
  107. package/dist/modules/workflow/blocks/helpers/openDetailViewport.js +474 -0
  108. package/dist/modules/workflow/blocks/helpers/openDetailWaiter.js +104 -0
  109. package/dist/modules/workflow/blocks/helpers/operationLogger.js +195 -0
  110. package/dist/modules/workflow/blocks/helpers/persistedNotes.js +107 -0
  111. package/dist/modules/workflow/blocks/helpers/replyExpander.js +260 -0
  112. package/dist/modules/workflow/blocks/helpers/scrollIntoView.js +138 -0
  113. package/dist/modules/workflow/blocks/helpers/searchExecutor.js +328 -0
  114. package/dist/modules/workflow/blocks/helpers/searchGate.js +46 -0
  115. package/dist/modules/workflow/blocks/helpers/searchPageState.js +164 -0
  116. package/dist/modules/workflow/blocks/helpers/searchResultWaiter.js +64 -0
  117. package/dist/modules/workflow/blocks/helpers/simpleAnchor.js +134 -0
  118. package/dist/modules/workflow/blocks/helpers/smartReply.js +40 -0
  119. package/dist/modules/workflow/blocks/helpers/systemInput.js +635 -0
  120. package/dist/modules/workflow/blocks/helpers/targetCountMode.js +9 -0
  121. package/dist/modules/workflow/blocks/helpers/xhsCliArgs.js +80 -0
  122. package/dist/modules/workflow/blocks/helpers/xhsCommentDom.js +805 -0
  123. package/dist/modules/workflow/blocks/helpers/xhsNoteOrganizer.js +140 -0
  124. package/dist/modules/workflow/blocks/restore/RestorePhaseBlock.js +204 -0
  125. package/dist/modules/workflow/config/workflowRegistry.js +32 -0
  126. package/dist/modules/workflow/definitions/batch-collect-workflow.js +63 -0
  127. package/dist/modules/workflow/definitions/scroll-extract-workflow.js +74 -0
  128. package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow-v2.js +81 -0
  129. package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow.js +57 -0
  130. package/dist/modules/workflow/definitions/xiaohongshu-full-collect-workflow-v3.js +68 -0
  131. package/dist/modules/workflow/definitions/xiaohongshu-note-collect.js +49 -0
  132. package/dist/modules/workflow/definitions/xiaohongshu-phase1-workflow-v3.js +30 -0
  133. package/dist/modules/workflow/definitions/xiaohongshu-phase2-links-workflow-v3.js +40 -0
  134. package/dist/modules/workflow/definitions/xiaohongshu-phase3-collect-workflow-v1.js +54 -0
  135. package/dist/modules/workflow/definitions/xiaohongshu-phase34-from-links-workflow-v3.js +25 -0
  136. package/dist/modules/workflow/src/WeiboEventDrivenWorkflowRunner.js +308 -0
  137. package/dist/modules/workflow/src/context.js +70 -0
  138. package/dist/modules/workflow/src/index.js +5 -0
  139. package/dist/modules/workflow/src/orchestrator.js +230 -0
  140. package/dist/modules/workflow/src/runner.js +55 -0
  141. package/dist/modules/workflow/src/runtime.js +70 -0
  142. package/dist/modules/workflow/workflows/WeiboFeedExtractionWorkflow.js +359 -0
  143. package/dist/modules/workflow/workflows/XiaohongshuLoginWorkflow.js +110 -0
  144. package/dist/modules/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  145. package/dist/modules/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  146. package/dist/modules/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  147. package/dist/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  148. package/dist/modules/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  149. package/dist/modules/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  150. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  151. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  152. package/dist/modules/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  153. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  154. package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  155. package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  156. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  157. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  158. package/dist/modules/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  159. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  160. package/dist/modules/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  161. package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  162. package/dist/modules/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  163. package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  164. package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  165. package/dist/modules/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  166. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +42 -0
  167. package/dist/modules/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  168. package/dist/modules/xiaohongshu/app/src/index.js +9 -0
  169. package/dist/modules/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  170. package/dist/modules/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  171. package/dist/services/controller/src/controller.js +1476 -0
  172. package/dist/services/controller/src/index.js +2 -0
  173. package/dist/services/controller/src/payload-normalizer.js +129 -0
  174. package/dist/services/shared/heartbeat.js +120 -0
  175. package/dist/services/shared/lib/errorHandler.js +2 -0
  176. package/dist/services/shared/serviceProcessLogger.js +139 -0
  177. package/dist/services/unified-api/RemoteBrowserSession.js +176 -0
  178. package/dist/services/unified-api/RemoteSessionManager.js +148 -0
  179. package/dist/services/unified-api/container-operations-handler.js +115 -0
  180. package/dist/services/unified-api/server.js +652 -0
  181. package/dist/services/unified-api/state-registry.js +274 -0
  182. package/dist/services/unified-api/task-persistence.js +66 -0
  183. package/dist/services/unified-api/task-state.js +130 -0
  184. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +12 -5
  185. package/modules/xiaohongshu/app/pnpm-lock.yaml +24 -0
  186. package/package.json +37 -9
  187. package/.beads/README.md +0 -81
  188. package/.beads/config.yaml +0 -67
  189. package/.beads/interactions.jsonl +0 -0
  190. package/.beads/issues.jsonl +0 -180
  191. package/.beads/metadata.json +0 -4
  192. package/.claude/settings.local.json +0 -10
  193. package/.github/workflows/ci.yml +0 -55
  194. package/AGENTS.md +0 -253
  195. package/apps/desktop-console/README.md +0 -27
  196. package/apps/desktop-console/package-lock.json +0 -897
  197. package/apps/desktop-console/package.json +0 -20
  198. package/apps/desktop-console/scripts/build-and-install.mjs +0 -19
  199. package/apps/desktop-console/scripts/build.mjs +0 -45
  200. package/apps/desktop-console/scripts/test-preload.mjs +0 -13
  201. package/apps/desktop-console/src/main/config.mts +0 -26
  202. package/apps/desktop-console/src/main/core-daemon-manager.mts +0 -131
  203. package/apps/desktop-console/src/main/desktop-settings.mts +0 -267
  204. package/apps/desktop-console/src/main/heartbeat-watchdog.mts +0 -50
  205. package/apps/desktop-console/src/main/heartbeat-watchdog.test.mts +0 -68
  206. package/apps/desktop-console/src/main/index-streaming.test.mts +0 -20
  207. package/apps/desktop-console/src/main/index.mts +0 -980
  208. package/apps/desktop-console/src/main/profile-store.mts +0 -239
  209. package/apps/desktop-console/src/main/profile-store.test.mts +0 -54
  210. package/apps/desktop-console/src/main/state-bridge.mts +0 -114
  211. package/apps/desktop-console/src/main/task-state-types.ts +0 -32
  212. package/apps/desktop-console/src/renderer/hooks/use-task-state.mts +0 -120
  213. package/apps/desktop-console/src/renderer/index.mts +0 -133
  214. package/apps/desktop-console/src/renderer/index.test.mts +0 -34
  215. package/apps/desktop-console/src/renderer/path-helpers.mts +0 -46
  216. package/apps/desktop-console/src/renderer/path-helpers.test.mts +0 -14
  217. package/apps/desktop-console/src/renderer/tabs/debug.mts +0 -48
  218. package/apps/desktop-console/src/renderer/tabs/debug.test.mts +0 -22
  219. package/apps/desktop-console/src/renderer/tabs/logs.mts +0 -421
  220. package/apps/desktop-console/src/renderer/tabs/logs.test.mts +0 -27
  221. package/apps/desktop-console/src/renderer/tabs/preflight.mts +0 -486
  222. package/apps/desktop-console/src/renderer/tabs/preflight.test.mts +0 -33
  223. package/apps/desktop-console/src/renderer/tabs/profile-pool.mts +0 -213
  224. package/apps/desktop-console/src/renderer/tabs/results.mts +0 -171
  225. package/apps/desktop-console/src/renderer/tabs/run.test.mts +0 -63
  226. package/apps/desktop-console/src/renderer/tabs/runtime.mts +0 -151
  227. package/apps/desktop-console/src/renderer/tabs/settings.mts +0 -146
  228. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/account-flow.mts +0 -486
  229. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/guide-browser-check.mts +0 -56
  230. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/helpers.mts +0 -262
  231. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/layout-block.mts +0 -430
  232. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/live-stats.mts +0 -847
  233. package/apps/desktop-console/src/renderer/tabs/xiaohongshu/run-flow.mts +0 -443
  234. package/apps/desktop-console/src/renderer/tabs/xiaohongshu-state.mts +0 -425
  235. package/apps/desktop-console/src/renderer/tabs/xiaohongshu.mts +0 -497
  236. package/apps/desktop-console/src/renderer/tabs/xiaohongshu.test.mts +0 -291
  237. package/apps/desktop-console/src/renderer/ui-components.mts +0 -31
  238. package/docs/README_camoufox_chinese.md +0 -141
  239. package/docs/USAGE_V3.md +0 -163
  240. package/docs/arch/OCR_MACOS_PLUGIN.md +0 -39
  241. package/docs/arch/PORTS.md +0 -40
  242. package/docs/arch/REGRESSION_CHECKLIST.md +0 -121
  243. package/docs/arch/SEARCH_GATE.md +0 -224
  244. package/docs/arch/VIEWPORT_SAFETY.md +0 -182
  245. package/docs/arch/XIAOHONGSHU_OFFLINE_MOCK_DESIGN.md +0 -267
  246. package/docs/xiaohongshu-container-driven-summary.md +0 -221
  247. package/docs/xiaohongshu-full-collect-runbook.md +0 -134
  248. package/docs/xiaohongshu-next-steps.md +0 -228
  249. package/docs/xiaohongshu-quickstart.md +0 -73
  250. package/docs/xiaohongshu-workflow-summary.md +0 -227
  251. package/modules/container-registry/tests/container-registry.test.ts +0 -16
  252. package/modules/logging/tests/logging.test.ts +0 -38
  253. package/modules/operations/tests/operations.test.ts +0 -22
  254. package/modules/operations/tests/viewport-filter.test.ts +0 -161
  255. package/modules/operations/tests/visible-only.test.ts +0 -250
  256. package/modules/session-manager/tests/session-manager.test.ts +0 -23
  257. package/modules/state/src/atomic-json.test.ts +0 -30
  258. package/modules/state/src/paths.test.ts +0 -59
  259. package/modules/state/src/xiaohongshu-collect-state.test.ts +0 -259
  260. package/modules/workflow/blocks/AnchorVerificationBlock.d.ts.map +0 -1
  261. package/modules/workflow/blocks/AnchorVerificationBlock.js.map +0 -1
  262. package/modules/workflow/blocks/DetectPageStateBlock.d.ts.map +0 -1
  263. package/modules/workflow/blocks/DetectPageStateBlock.js.map +0 -1
  264. package/modules/workflow/blocks/ErrorRecoveryBlock.d.ts.map +0 -1
  265. package/modules/workflow/blocks/ErrorRecoveryBlock.js.map +0 -1
  266. package/modules/workflow/blocks/WaitSearchPermitBlock.d.ts.map +0 -1
  267. package/modules/workflow/blocks/WaitSearchPermitBlock.js.map +0 -1
  268. package/modules/workflow/blocks/helpers/containerAnchors.d.ts.map +0 -1
  269. package/modules/workflow/blocks/helpers/containerAnchors.js.map +0 -1
  270. package/modules/workflow/blocks/helpers/downloadPaths.test.ts +0 -62
  271. package/modules/workflow/blocks/helpers/mergeXhsMarkdown.test.ts +0 -121
  272. package/modules/workflow/blocks/helpers/operationLogger.d.ts.map +0 -1
  273. package/modules/workflow/blocks/helpers/operationLogger.js.map +0 -1
  274. package/modules/workflow/blocks/helpers/persistedNotes.test.ts +0 -268
  275. package/modules/workflow/blocks/helpers/searchPageState.d.ts.map +0 -1
  276. package/modules/workflow/blocks/helpers/searchPageState.js.map +0 -1
  277. package/modules/workflow/blocks/helpers/targetCountMode.test.ts +0 -29
  278. package/modules/workflow/blocks/helpers/xhsCliArgs.test.ts +0 -75
  279. package/modules/workflow/tests/smartReply.test.ts +0 -32
  280. package/modules/xiaohongshu/app/src/blocks/Phase3Interact.matcher.test.ts +0 -33
  281. package/modules/xiaohongshu/app/src/utils/__tests__/checkpoints.test.ts +0 -141
  282. package/modules/xiaohongshu/app/tests/commentMatchDsl.test.ts +0 -50
  283. package/modules/xiaohongshu/app/tests/commentMatcher.test.ts +0 -46
  284. package/modules/xiaohongshu/app/tests/sharding.test.ts +0 -31
  285. package/package-scripts.json +0 -8
  286. package/runtime/infra/utils/README.md +0 -13
  287. package/runtime/infra/utils/scripts/README.md +0 -0
  288. package/runtime/infra/utils/scripts/development/eval-in-session.mjs +0 -40
  289. package/runtime/infra/utils/scripts/development/highlight-search-containers.mjs +0 -35
  290. package/runtime/infra/utils/scripts/service/kill-port.mjs +0 -24
  291. package/runtime/infra/utils/scripts/service/start-api.mjs +0 -39
  292. package/runtime/infra/utils/scripts/service/start-browser-service.mjs +0 -106
  293. package/runtime/infra/utils/scripts/service/stop-api.mjs +0 -18
  294. package/runtime/infra/utils/scripts/service/stop-browser-service.mjs +0 -104
  295. package/runtime/infra/utils/scripts/test-services.mjs +0 -94
  296. package/services/shared/heartbeat.test.ts +0 -102
  297. package/services/unified-api/__tests__/task-state.test.ts +0 -95
  298. package/sitecustomize.py +0 -19
  299. package/tests/README.md +0 -194
  300. package/tests/e2e/workflows/weibo-feed-extraction.test.ts +0 -171
  301. package/tests/fixtures/data/container-definitions.json +0 -67
  302. package/tests/fixtures/pages/simple-page.html +0 -69
  303. package/tests/integration/01-test-container-match.mjs +0 -188
  304. package/tests/integration/02-test-dom-branch.mjs +0 -161
  305. package/tests/integration/03-test-container-operation-system.mjs +0 -91
  306. package/tests/integration/05-test-container-lifecycle-events.mjs +0 -224
  307. package/tests/integration/05-test-container-lifecycle-with-events.mjs +0 -250
  308. package/tests/integration/06-test-container-dom-tree-drawing.mjs +0 -256
  309. package/tests/integration/07-test-weibo-container-lifecycle.mjs +0 -355
  310. package/tests/integration/08-test-weibo-feed-workflow.test.mjs +0 -164
  311. package/tests/integration/10-test-visual-analyzer.mjs +0 -312
  312. package/tests/integration/11-test-visual-loop.mjs +0 -284
  313. package/tests/integration/12-test-simple-visual-loop.mjs +0 -242
  314. package/tests/integration/13-test-visual-robust.mjs +0 -185
  315. package/tests/integration/14-test-visual-highlight-loop.mjs +0 -271
  316. package/tests/integration/inspect-page.mjs +0 -50
  317. package/tests/integration/run-all-tests.mjs +0 -95
  318. package/tests/patch_verification/CODEX_PATCH_TEST.md +0 -103
  319. package/tests/patch_verification/PHASE2_ANALYSIS.md +0 -179
  320. package/tests/patch_verification/PHASE2_OPTIMIZATION_REPORT.md +0 -55
  321. package/tests/patch_verification/PHASE2_TO_PHASE4_SUMMARY.md +0 -126
  322. package/tests/patch_verification/QUICK_TEST_SEQUENCE.md +0 -262
  323. package/tests/patch_verification/README.md +0 -143
  324. package/tests/patch_verification/RUN_TESTS.md +0 -60
  325. package/tests/patch_verification/TEST_EXECUTION.md +0 -99
  326. package/tests/patch_verification/TEST_PLAN.md +0 -328
  327. package/tests/patch_verification/TEST_RESULTS.md +0 -34
  328. package/tests/patch_verification/TOOL_TEST_PLAN.md +0 -48
  329. package/tests/patch_verification/run-tool-test.mjs +0 -121
  330. package/tests/patch_verification/temp_test_files/test01.txt +0 -1
  331. package/tests/patch_verification/temp_test_files/test02.txt +0 -3
  332. package/tests/patch_verification/temp_test_files/test02_gnu.txt +0 -3
  333. package/tests/patch_verification/temp_test_files/test03.txt +0 -1
  334. package/tests/patch_verification/temp_test_files/test03_multiline.txt +0 -5
  335. package/tests/patch_verification/temp_test_files/test04_function.ts +0 -5
  336. package/tests/patch_verification/temp_test_files/test05_import.ts +0 -4
  337. package/tests/patch_verification/temp_test_files/test06_special_chars.txt +0 -4
  338. package/tests/patch_verification/temp_test_files/test07_indentation.ts +0 -5
  339. package/tests/patch_verification/temp_test_files/test08_mismatch.txt +0 -1
  340. package/tests/patch_verification/temp_test_files/test_add_02.txt +0 -3
  341. package/tests/patch_verification/temp_test_files/test_simple.txt +0 -1
  342. package/tests/runner/TestReporter.mjs +0 -57
  343. package/tests/runner/TestRunner.mjs +0 -244
  344. package/tests/unit/commands/profile.test.mjs +0 -10
  345. package/tests/unit/container/change-notifier.test.mjs +0 -181
  346. package/tests/unit/lifecycle/session-registry.test.mjs +0 -135
  347. package/tests/unit/operations/registry.test.ts +0 -73
  348. package/tests/unit/utils/browser-service.test.mjs +0 -153
  349. package/tests/unit/utils/config.test.mjs +0 -166
  350. package/tests/unit/utils/fingerprint.test.mjs +0 -166
  351. package/tsconfig.json +0 -31
  352. package/tsconfig.services.json +0 -26
  353. /package/apps/desktop-console/{src → dist}/renderer/index.html +0 -0
  354. /package/apps/desktop-console/{src/renderer/tabs → dist/renderer}/run.mts +0 -0
@@ -0,0 +1,1004 @@
1
+ /**
2
+ * Workflow Block: XiaohongshuCollectFromLinksBlock
3
+ *
4
+ * Phase3/4(基于 Phase2 links):
5
+ * - 读取 phase2-links.jsonl(searchUrl 严格一致 + keyword 严格一致 + safeUrl 含 xsec_token)
6
+ * - Phase34 多 Tab:最多同时打开 4 个“不同笔记”的详情 tab,按 tab 轮换抓评论
7
+ * - 每次轮到某 tab:最多新增抓取 50 条评论,然后切换到下一个 tab
8
+ * - 当某笔记命中 end_marker 或 empty_state 即视为完成,关闭该 tab 并补充打开下一条笔记
9
+ *
10
+ * 开发阶段:任何异常 fail-fast,并落盘截图/元数据用于复盘。
11
+ */
12
+ import os from 'node:os';
13
+ import path from 'node:path';
14
+ import { promises as fs } from 'node:fs';
15
+ import { urlKeywordEquals } from './helpers/searchPageState.js';
16
+ import { countPersistedNotes } from './helpers/persistedNotes.js';
17
+ import { isDebugArtifactsEnabled } from './helpers/debugArtifacts.js';
18
+ import { execute as extractDetail } from './ExtractDetailBlock.js';
19
+ import { execute as expandComments } from './ExpandCommentsBlock.js';
20
+ import { execute as persistXhsNote } from './PersistXhsNoteBlock.js';
21
+ import { resolveTargetCount } from './helpers/targetCountMode.js';
22
+ import { mergeNotesMarkdown } from './helpers/mergeXhsMarkdown.js';
23
+ import { isDevMode } from './helpers/systemInput.js';
24
+ import { logControllerActionError, logControllerActionResult, logControllerActionStart, } from './helpers/operationLogger.js';
25
+ const DEFAULT_COMMENTS_COVERAGE_RATIO = 0.9;
26
+ function resolveCommentsCoverageRatio() {
27
+ const raw = String(process.env.WEBAUTO_COMMENTS_COVERAGE_RATIO || '').trim();
28
+ if (!raw)
29
+ return DEFAULT_COMMENTS_COVERAGE_RATIO;
30
+ const normalized = raw.endsWith('%') ? Number(raw.slice(0, -1)) / 100 : Number(raw);
31
+ if (!Number.isFinite(normalized) || normalized <= 0 || normalized > 1)
32
+ return DEFAULT_COMMENTS_COVERAGE_RATIO;
33
+ return normalized;
34
+ }
35
+ const COMMENTS_COVERAGE_RATIO = resolveCommentsCoverageRatio();
36
+ function sanitizeFilenamePart(value) {
37
+ return String(value || '')
38
+ .trim()
39
+ .replace(/[\\/:"*?<>|]+/g, '_')
40
+ .replace(/\s+/g, '_')
41
+ .slice(0, 80);
42
+ }
43
+ function resolveDownloadRoot() {
44
+ const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
45
+ if (custom && custom.trim())
46
+ return custom;
47
+ const home = process.env.HOME || process.env.USERPROFILE;
48
+ if (home && home.trim())
49
+ return path.join(home, '.webauto', 'download');
50
+ return path.join(os.homedir(), '.webauto', 'download');
51
+ }
52
+ function extractBase64FromScreenshotResponse(raw) {
53
+ const v = raw?.data?.data ??
54
+ raw?.data?.body?.data ??
55
+ raw?.body?.data ??
56
+ raw?.result?.data ??
57
+ raw?.result ??
58
+ raw?.data ??
59
+ raw;
60
+ return typeof v === 'string' && v.length > 10 ? v : undefined;
61
+ }
62
+ async function readJsonl(filePath) {
63
+ try {
64
+ const content = await fs.readFile(filePath, 'utf-8');
65
+ return content
66
+ .split('\n')
67
+ .map((l) => l.trim())
68
+ .filter(Boolean)
69
+ .map((line) => {
70
+ try {
71
+ return JSON.parse(line);
72
+ }
73
+ catch {
74
+ return null;
75
+ }
76
+ })
77
+ .filter(Boolean);
78
+ }
79
+ catch (e) {
80
+ if (e?.code === 'ENOENT')
81
+ return [];
82
+ throw e;
83
+ }
84
+ }
85
+ export async function execute(input) {
86
+ const { sessionId, keyword, env = 'debug', targetCount, targetCountMode = 'absolute', maxComments, strictTargetCount = true, serviceUrl = 'http://127.0.0.1:7701', } = input;
87
+ const profile = sessionId;
88
+ const controllerUrl = `${serviceUrl}/v1/controller/action`;
89
+ const downloadRoot = resolveDownloadRoot();
90
+ const keywordDir = path.join(downloadRoot, 'xiaohongshu', env, keyword);
91
+ const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
92
+ const debugArtifactsEnabled = isDebugArtifactsEnabled();
93
+ const debugDir = debugArtifactsEnabled ? path.join(keywordDir, '_debug', 'phase34_from_links') : null;
94
+ const failFast = isDevMode();
95
+ const maxCommentsLimit = typeof maxComments === 'number' && Number.isFinite(maxComments) && maxComments > 0
96
+ ? Math.floor(maxComments)
97
+ : null;
98
+ const countCoverageRatio = failFast && !maxCommentsLimit ? COMMENTS_COVERAGE_RATIO : undefined;
99
+ const maxRetryPerNote = Math.max(1, Number(process.env.WEBAUTO_PHASE34_RETRY_MAX || 2));
100
+ async function controllerAction(action, payload = {}) {
101
+ const opId = logControllerActionStart(action, payload, { source: 'XiaohongshuCollectFromLinksBlock' });
102
+ try {
103
+ const res = await fetch(controllerUrl, {
104
+ method: 'POST',
105
+ headers: { 'Content-Type': 'application/json' },
106
+ body: JSON.stringify({ action, payload }),
107
+ signal: AbortSignal.timeout ? AbortSignal.timeout(30000) : undefined,
108
+ });
109
+ const raw = await res.text();
110
+ if (!res.ok)
111
+ throw new Error(`HTTP ${res.status}: ${raw}`);
112
+ let data = {};
113
+ try {
114
+ data = raw ? JSON.parse(raw) : {};
115
+ }
116
+ catch {
117
+ data = { raw };
118
+ }
119
+ const result = data.data || data;
120
+ logControllerActionResult(opId, action, result, { source: 'XiaohongshuCollectFromLinksBlock' });
121
+ return result;
122
+ }
123
+ catch (error) {
124
+ logControllerActionError(opId, action, error, payload, { source: 'XiaohongshuCollectFromLinksBlock' });
125
+ throw error;
126
+ }
127
+ }
128
+ const delay = (ms) => new Promise((r) => setTimeout(r, ms));
129
+ async function getCurrentUrl() {
130
+ const res = await controllerAction('browser:execute', { profile, script: 'window.location.href' });
131
+ return res?.result ?? res?.data?.result ?? '';
132
+ }
133
+ async function saveDebug(kind, meta) {
134
+ if (!debugArtifactsEnabled || !debugDir)
135
+ return;
136
+ try {
137
+ await fs.mkdir(debugDir, { recursive: true });
138
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
139
+ const base = `${ts}-${sanitizeFilenamePart(kind)}`;
140
+ const pngPath = path.join(debugDir, `${base}.png`);
141
+ const jsonPath = path.join(debugDir, `${base}.json`);
142
+ const shot = await controllerAction('browser:screenshot', { profileId: profile, fullPage: false }).catch(() => null);
143
+ const b64 = extractBase64FromScreenshotResponse(shot);
144
+ if (b64)
145
+ await fs.writeFile(pngPath, Buffer.from(b64, 'base64'));
146
+ await fs.writeFile(jsonPath, JSON.stringify({
147
+ ts,
148
+ kind,
149
+ sessionId: profile,
150
+ keyword,
151
+ env,
152
+ url: await getCurrentUrl().catch(() => ''),
153
+ pngPath: b64 ? pngPath : null,
154
+ ...meta,
155
+ }, null, 2), 'utf-8');
156
+ console.log(`[Phase34FromLinks][debug] saved ${kind}: ${pngPath}`);
157
+ }
158
+ catch (e) {
159
+ console.warn(`[Phase34FromLinks][debug] save failed (${kind}): ${e?.message || String(e)}`);
160
+ }
161
+ }
162
+ async function moveNoteToRejected(options) {
163
+ const { noteId, reason, meta } = options;
164
+ try {
165
+ const src = path.join(persistedAtStart.keywordDir, noteId);
166
+ const rejectedDir = path.join(persistedAtStart.keywordDir, '_rejected');
167
+ await fs.mkdir(rejectedDir, { recursive: true });
168
+ let dest = path.join(rejectedDir, noteId);
169
+ // 若已存在同名,追加时间戳避免覆盖
170
+ try {
171
+ await fs.access(dest);
172
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
173
+ dest = path.join(rejectedDir, `${noteId}_${ts}_${sanitizeFilenamePart(reason)}`);
174
+ }
175
+ catch {
176
+ // ok
177
+ }
178
+ await fs.rename(src, dest).catch(async (e) => {
179
+ // rename 失败则尝试直接写入 reject.json(不阻塞流程)
180
+ await saveDebug('move_to_rejected_failed', { noteId, reason, error: e?.message || String(e) });
181
+ });
182
+ const rejectJsonPath = path.join(dest, 'reject.json');
183
+ await fs
184
+ .writeFile(rejectJsonPath, JSON.stringify({
185
+ ts: new Date().toISOString(),
186
+ noteId,
187
+ reason,
188
+ ...((meta && typeof meta === 'object') ? { meta } : {}),
189
+ }, null, 2), 'utf-8')
190
+ .catch(() => { });
191
+ }
192
+ catch {
193
+ // ignore
194
+ }
195
+ }
196
+ function validateEntry(raw) {
197
+ const noteId = typeof raw?.noteId === 'string' ? raw.noteId.trim() : '';
198
+ const safeUrl = typeof raw?.safeUrl === 'string' ? raw.safeUrl.trim() : '';
199
+ const searchUrl = typeof raw?.searchUrl === 'string' ? raw.searchUrl.trim() : '';
200
+ if (!noteId || !safeUrl || !searchUrl)
201
+ return null;
202
+ if (!safeUrl.includes('xsec_token='))
203
+ return null;
204
+ if (!urlKeywordEquals(searchUrl, keyword))
205
+ return null;
206
+ return { noteId, safeUrl, searchUrl, ts: typeof raw?.ts === 'string' ? raw.ts : undefined };
207
+ }
208
+ const persistedAtStart = await countPersistedNotes({
209
+ platform: 'xiaohongshu',
210
+ env,
211
+ keyword,
212
+ downloadRoot,
213
+ requiredFiles: ['content.md', 'comments.md'],
214
+ requireCommentsDone: true,
215
+ minCommentsCoverageRatio: countCoverageRatio,
216
+ });
217
+ await resetIncompleteComments().catch(() => { });
218
+ let persistedCount = persistedAtStart.count;
219
+ const initialPersistedCount = persistedAtStart.count;
220
+ const { targetTotal } = resolveTargetCount({
221
+ targetCount,
222
+ baseCount: initialPersistedCount,
223
+ mode: targetCountMode,
224
+ });
225
+ if (strictTargetCount && persistedCount > targetTotal) {
226
+ return {
227
+ success: false,
228
+ keywordDir: persistedAtStart.keywordDir,
229
+ linksPath,
230
+ expectedSearchUrl: '',
231
+ initialPersistedCount,
232
+ finalPersistedCount: persistedCount,
233
+ addedCount: 0,
234
+ processedCount: 0,
235
+ targetCount: targetTotal,
236
+ error: `existing_count_exceeds_target: ${persistedCount} > ${targetTotal}`,
237
+ };
238
+ }
239
+ if (persistedCount === targetTotal) {
240
+ const merged = await mergeMarkdownIfNeeded();
241
+ return {
242
+ success: true,
243
+ keywordDir: persistedAtStart.keywordDir,
244
+ linksPath,
245
+ expectedSearchUrl: '',
246
+ initialPersistedCount,
247
+ finalPersistedCount: persistedCount,
248
+ addedCount: 0,
249
+ processedCount: 0,
250
+ targetCount: targetTotal,
251
+ ...merged,
252
+ };
253
+ }
254
+ const rawLinks = await readJsonl(linksPath);
255
+ let links = [];
256
+ const seenLinkNoteIds = new Set();
257
+ for (const r of rawLinks) {
258
+ const e = validateEntry(r);
259
+ if (!e)
260
+ continue;
261
+ if (seenLinkNoteIds.has(e.noteId))
262
+ continue;
263
+ seenLinkNoteIds.add(e.noteId);
264
+ links.push(e);
265
+ }
266
+ if (links.length === 0) {
267
+ await saveDebug('links_empty', { linksPath });
268
+ return {
269
+ success: false,
270
+ keywordDir: persistedAtStart.keywordDir,
271
+ linksPath,
272
+ expectedSearchUrl: '',
273
+ initialPersistedCount,
274
+ finalPersistedCount: persistedCount,
275
+ addedCount: Math.max(0, persistedCount - initialPersistedCount),
276
+ processedCount: 0,
277
+ targetCount: targetTotal,
278
+ error: 'phase2_links_empty',
279
+ };
280
+ }
281
+ const invalidLinks = links.filter((e) => !urlKeywordEquals(e.searchUrl, keyword));
282
+ if (invalidLinks.length > 0) {
283
+ await saveDebug('searchurl_keyword_mismatch', { bad: invalidLinks.slice(0, 5), count: invalidLinks.length });
284
+ if (failFast) {
285
+ const first = invalidLinks[0];
286
+ return {
287
+ success: false,
288
+ keywordDir: persistedAtStart.keywordDir,
289
+ linksPath,
290
+ expectedSearchUrl: first?.searchUrl || '',
291
+ initialPersistedCount,
292
+ finalPersistedCount: persistedCount,
293
+ addedCount: Math.max(0, persistedCount - initialPersistedCount),
294
+ processedCount: 0,
295
+ targetCount: targetTotal,
296
+ error: `searchurl_keyword_mismatch: ${first?.searchUrl || 'unknown'}`,
297
+ };
298
+ }
299
+ links = links.filter((e) => urlKeywordEquals(e.searchUrl, keyword));
300
+ }
301
+ if (links.length === 0) {
302
+ await saveDebug('links_empty_after_filter', { linksPath });
303
+ return {
304
+ success: false,
305
+ keywordDir: persistedAtStart.keywordDir,
306
+ linksPath,
307
+ expectedSearchUrl: '',
308
+ initialPersistedCount,
309
+ finalPersistedCount: persistedCount,
310
+ addedCount: Math.max(0, persistedCount - initialPersistedCount),
311
+ processedCount: 0,
312
+ targetCount: targetTotal,
313
+ error: 'phase2_links_empty_after_filter',
314
+ };
315
+ }
316
+ const expectedSearchUrl = links[0].searchUrl;
317
+ async function appendCoverageShortfall(entry) {
318
+ try {
319
+ await fs.mkdir(keywordDir, { recursive: true });
320
+ const logPath = path.join(keywordDir, 'comments-coverage-shortfall.jsonl');
321
+ const line = `${JSON.stringify({ ts: new Date().toISOString(), ...entry })}\n`;
322
+ await fs.appendFile(logPath, line, 'utf-8');
323
+ }
324
+ catch {
325
+ // ignore
326
+ }
327
+ }
328
+ const processedNoteIds = new Set(persistedAtStart.noteIds);
329
+ let processedCount = 0;
330
+ let rejectedCount = 0;
331
+ // Phase34:最多 4 个“不同笔记”的详情 tab 轮换抓评论(每 tab 每次最多新增 50)
332
+ const MAX_TABS = 4;
333
+ const BATCH = maxCommentsLimit ? Math.min(50, maxCommentsLimit) : 50;
334
+ const pendingLinks = links.filter((l) => !processedNoteIds.has(l.noteId));
335
+ const linkById = new Map(links.map((l) => [l.noteId, l]));
336
+ const retryCounts = new Map();
337
+ const queuedNoteIds = new Set(pendingLinks.map((l) => l.noteId));
338
+ let cursor = 0;
339
+ const active = [];
340
+ let rr = 0;
341
+ const usedTabIndexes = new Set();
342
+ const reservedTabIndexes = new Set();
343
+ let searchTabIndex = null;
344
+ let searchTabUrl = null;
345
+ function logProgress(stage, noteId) {
346
+ const noteLabel = noteId ? ` noteId=${noteId}` : '';
347
+ console.log(`[Phase34FromLinks][progress] stage=${stage}${noteLabel} persisted=${persistedCount}/${targetTotal} processed=${processedCount}/${targetTotal} active=${active.length} cursor=${cursor}/${pendingLinks.length}`);
348
+ }
349
+ async function clearCommentsArtifacts(noteId) {
350
+ const postDir = path.join(persistedAtStart.keywordDir, noteId);
351
+ const commentsPath = path.join(postDir, 'comments.md');
352
+ const commentsDonePath = path.join(postDir, 'comments.done.json');
353
+ await fs.unlink(commentsPath).catch(() => { });
354
+ await fs.unlink(commentsDonePath).catch(() => { });
355
+ }
356
+ async function handleTaskError(options) {
357
+ const { noteId, stage, error, task } = options;
358
+ const message = error instanceof Error ? error.message : String(error);
359
+ await saveDebug('phase34_task_error', {
360
+ noteId,
361
+ stage,
362
+ error: message,
363
+ });
364
+ if (failFast)
365
+ throw error;
366
+ const attempts = (retryCounts.get(noteId) || 0) + 1;
367
+ retryCounts.set(noteId, attempts);
368
+ await clearCommentsArtifacts(noteId);
369
+ if (task) {
370
+ task.comments = [];
371
+ task.seenKeys = new Set();
372
+ task.reachedEnd = false;
373
+ task.emptyState = false;
374
+ task.totalFromHeader = null;
375
+ task.stoppedByMaxComments = false;
376
+ task.batches = 0;
377
+ task.firstRun = true;
378
+ await closeTaskTab(task).catch(() => { });
379
+ const idx = active.findIndex((t) => t.noteId === noteId);
380
+ if (idx >= 0)
381
+ active.splice(idx, 1);
382
+ }
383
+ if (attempts > maxRetryPerNote) {
384
+ console.warn(`[Phase34FromLinks] noteId=${noteId} exceeded retry limit=${maxRetryPerNote}`);
385
+ rejectedCount += 1;
386
+ const link = linkById.get(noteId);
387
+ if (link) {
388
+ await moveNoteToRejected({ noteId, reason: 'retry_exhausted', meta: { stage, error: message } });
389
+ }
390
+ return { recovered: false, rejected: true };
391
+ }
392
+ const link = linkById.get(noteId);
393
+ if (link && !queuedNoteIds.has(noteId)) {
394
+ queuedNoteIds.add(noteId);
395
+ pendingLinks.push(link);
396
+ }
397
+ logProgress('retry_enqueued', noteId);
398
+ return { recovered: true, rejected: false };
399
+ }
400
+ async function listPagesDetailed() {
401
+ const res = await controllerAction('browser:page:list', { profileId: profile }).catch(() => null);
402
+ const pages = res?.pages || res?.data?.pages || [];
403
+ const activeIndexRaw = res?.activeIndex ?? res?.data?.activeIndex;
404
+ const activeIndex = Number.isFinite(Number(activeIndexRaw)) ? Number(activeIndexRaw) : null;
405
+ return { pages: Array.isArray(pages) ? pages : [], activeIndex };
406
+ }
407
+ async function refreshSearchTabIndex(reason) {
408
+ const { pages } = await listPagesDetailed().catch(() => ({
409
+ pages: [],
410
+ activeIndex: null,
411
+ }));
412
+ const prev = searchTabIndex;
413
+ const found = pages.find((p) => {
414
+ const url = typeof p?.url === 'string' ? p.url : '';
415
+ return url.includes('/search_result') && urlKeywordEquals(url, keyword);
416
+ });
417
+ if (found && Number.isFinite(Number(found.index))) {
418
+ searchTabIndex = Number(found.index);
419
+ searchTabUrl = typeof found.url === 'string' ? found.url : searchTabUrl;
420
+ }
421
+ else {
422
+ searchTabIndex = null;
423
+ searchTabUrl = searchTabUrl || expectedSearchUrl;
424
+ }
425
+ reservedTabIndexes.clear();
426
+ if (searchTabIndex !== null)
427
+ reservedTabIndexes.add(searchTabIndex);
428
+ if (prev !== searchTabIndex) {
429
+ console.log(`[Phase34FromLinks] search tab updated: reason=${reason} index=${searchTabIndex ?? 'n/a'} url=${searchTabUrl ?? ''}`);
430
+ }
431
+ }
432
+ searchTabUrl = expectedSearchUrl;
433
+ await refreshSearchTabIndex('phase34_start');
434
+ logProgress('start');
435
+ async function openPageWithFallback(url, reason) {
436
+ await refreshSearchTabIndex(`open_page:${reason}`).catch(() => { });
437
+ const reservedIndex = searchTabIndex;
438
+ const beforeDetail = await listPagesDetailed().catch(() => ({
439
+ pages: [],
440
+ activeIndex: null,
441
+ }));
442
+ const beforeIndexes = new Set(beforeDetail.pages.map((p) => Number(p?.index)).filter((n) => Number.isFinite(n)));
443
+ // Use system-level shortcut to open new tab in same window (Cmd+T on macOS)
444
+ const created = await controllerAction('system:shortcut', { app: 'camoufox', shortcut: 'new-tab' });
445
+ const createdIndex = Number(created?.index ?? created?.data?.index ?? created?.body?.index);
446
+ if (Number.isFinite(createdIndex) && createdIndex !== reservedIndex)
447
+ return createdIndex;
448
+ await delay(500);
449
+ const afterDetail = await listPagesDetailed().catch(() => ({
450
+ pages: [],
451
+ activeIndex: null,
452
+ }));
453
+ if (Number.isFinite(afterDetail.activeIndex) && afterDetail.activeIndex !== reservedIndex) {
454
+ return Number(afterDetail.activeIndex);
455
+ }
456
+ const newPage = afterDetail.pages.find((p) => Number.isFinite(p?.index) &&
457
+ !beforeIndexes.has(Number(p.index)) &&
458
+ Number(p.index) !== reservedIndex);
459
+ if (newPage && Number.isFinite(newPage.index))
460
+ return Number(newPage.index);
461
+ const fallback = afterDetail.pages
462
+ .map((p) => Number(p?.index))
463
+ .filter((idx) => Number.isFinite(idx) && idx !== reservedIndex)
464
+ .sort((a, b) => a - b);
465
+ if (fallback.length > 0) {
466
+ return fallback[fallback.length - 1];
467
+ }
468
+ await saveDebug('page_new_invalid_index', {
469
+ reason,
470
+ url,
471
+ created,
472
+ before: beforeDetail.pages.slice(0, 6).map((p) => ({ index: p.index, url: p.url })),
473
+ after: afterDetail.pages.slice(0, 6).map((p) => ({ index: p.index, url: p.url })),
474
+ beforeActive: beforeDetail.activeIndex,
475
+ afterActive: afterDetail.activeIndex,
476
+ });
477
+ throw new Error('browser:page:new returned invalid index');
478
+ }
479
+ function parseNoteIdFromUrl(url) {
480
+ const u = typeof url === 'string' ? url : '';
481
+ const m = u.match(/\/explore\/([^/?#]+)/);
482
+ return m ? String(m[1]) : null;
483
+ }
484
+ async function resolveDetailTabIndex(noteId) {
485
+ const { pages } = await listPagesDetailed();
486
+ for (const p of pages) {
487
+ const url = typeof p?.url === 'string' ? p.url : '';
488
+ if (url.includes('/explore/') && url.includes(noteId))
489
+ return Number(p.index);
490
+ }
491
+ return null;
492
+ }
493
+ async function rebuildUsedTabIndexes(reason) {
494
+ await refreshSearchTabIndex(`rebuild_tabs:${reason}`).catch(() => { });
495
+ const pages = await listPagesDetailed()
496
+ .then((res) => res.pages)
497
+ .catch(() => []);
498
+ usedTabIndexes.clear();
499
+ for (const t of active) {
500
+ const resolved = pages.find((p) => typeof p?.url === 'string' && p.url.includes('/explore/') && p.url.includes(t.noteId));
501
+ if (resolved && Number.isFinite(resolved.index)) {
502
+ const idx = Number(resolved.index);
503
+ t.tabIndex = idx;
504
+ usedTabIndexes.add(idx);
505
+ }
506
+ else {
507
+ // keep the task, but mark unknown; will reopen when scheduled
508
+ t.tabIndex = null;
509
+ }
510
+ }
511
+ await saveDebug('tabs_rebuilt', {
512
+ reason,
513
+ pages: pages.map((p) => ({ index: p?.index, url: p?.url, active: p?.active })),
514
+ active: active.map((t) => ({ noteId: t.noteId, tabIndex: t.tabIndex ?? null })),
515
+ usedTabIndexes: Array.from(usedTabIndexes),
516
+ }).catch(() => { });
517
+ }
518
+ async function pickReusableTabIndex() {
519
+ const { pages } = await listPagesDetailed();
520
+ const candidates = pages
521
+ .filter((p) => Number.isFinite(p?.index))
522
+ .filter((p) => !usedTabIndexes.has(Number(p.index)))
523
+ .filter((p) => !reservedTabIndexes.has(Number(p.index)))
524
+ .map((p) => ({ index: Number(p.index), url: typeof p?.url === 'string' ? p.url : '' }))
525
+ .filter((p) => p.index >= 0)
526
+ // 允许复用 about:blank 等空白页(开发阶段避免无意义地新开 tab)
527
+ .filter((p) => p.url);
528
+ if (candidates.length === 0)
529
+ return null;
530
+ // 优先不复用搜索页 tab(保留搜索结果页便于人工观察)
531
+ const preferred = candidates.filter((p) => !p.url.includes('/search_result'));
532
+ const pickFrom = preferred.length > 0 ? preferred : candidates;
533
+ pickFrom.sort((a, b) => a.index - b.index);
534
+ return pickFrom[0].index;
535
+ }
536
+ async function openNewTask(link) {
537
+ processedCount += 1;
538
+ logProgress('open_tab', link.noteId);
539
+ console.log(`[Phase34FromLinks] open/reuse tab for note ${persistedCount + 1}/${targetTotal}: noteId=${link.noteId}`);
540
+ // refresh tab index bookkeeping (page indices may shift after closePage)
541
+ await rebuildUsedTabIndexes('open_new_task').catch(() => { });
542
+ // 启动 Phase34 时,可能已经存在上次中断遗留的详情 tab;
543
+ // 开发阶段要求:优先复用现有 tab,在原有基础上重定向,避免无限开新 tab。
544
+ let idx = await resolveDetailTabIndex(link.noteId);
545
+ let reused = true;
546
+ if (idx === null) {
547
+ idx = await pickReusableTabIndex();
548
+ }
549
+ if (idx !== null && idx === searchTabIndex) {
550
+ idx = null;
551
+ }
552
+ if (idx === null) {
553
+ reused = false;
554
+ idx = await openPageWithFallback(link.safeUrl, `open_new_task:${link.noteId}`);
555
+ }
556
+ usedTabIndexes.add(idx);
557
+ try {
558
+ await controllerAction('browser:page:switch', { profileId: profile, index: idx });
559
+ }
560
+ catch (e) {
561
+ // Page indices can shift; re-resolve by noteId and retry once.
562
+ await saveDebug('page_switch_failed_open_task', { noteId: link.noteId, detailIndex: idx, error: e?.message || String(e) });
563
+ const resolved = await resolveDetailTabIndex(link.noteId);
564
+ if (resolved === null || !Number.isFinite(resolved))
565
+ throw e;
566
+ usedTabIndexes.delete(idx);
567
+ idx = resolved;
568
+ usedTabIndexes.add(idx);
569
+ await controllerAction('browser:page:switch', { profileId: profile, index: idx });
570
+ }
571
+ await delay(900);
572
+ const beforeUrl = await getCurrentUrl().catch(() => '');
573
+ const beforeNoteId = parseNoteIdFromUrl(beforeUrl || '');
574
+ // 若复用的 tab 不是目标详情页,则在当前 tab 内 browser:goto 到 safeUrl
575
+ if (beforeNoteId !== link.noteId || !beforeUrl.includes('xsec_token=')) {
576
+ await controllerAction('browser:goto', { profile, url: link.safeUrl });
577
+ await delay(2200);
578
+ }
579
+ await saveDebug('after_open_detail_tab', {
580
+ noteId: link.noteId,
581
+ detailIndex: idx,
582
+ reused,
583
+ beforeUrl,
584
+ });
585
+ const urlNow = await getCurrentUrl();
586
+ if (!urlNow.includes('/explore/') || !urlNow.includes('xsec_token=') || !urlNow.includes(link.noteId)) {
587
+ await saveDebug('detail_url_mismatch', { noteId: link.noteId, expectedSafeUrl: link.safeUrl, beforeUrl, urlNow });
588
+ throw new Error(`detail_url_mismatch: ${urlNow}`);
589
+ }
590
+ // 详情:首次打开就提取并落盘(避免后续 tab 轮换时重复做重活)
591
+ const detail = await extractDetail({ sessionId, serviceUrl });
592
+ if (!detail.success) {
593
+ await saveDebug('extract_detail_failed', { noteId: link.noteId, error: detail.error || null });
594
+ throw new Error(`extract_detail_failed: ${detail.error || 'unknown'}`);
595
+ }
596
+ const persistedDetail = await persistXhsNote({
597
+ sessionId,
598
+ env,
599
+ platform: 'xiaohongshu',
600
+ keyword,
601
+ noteId: link.noteId,
602
+ searchUrl: link.searchUrl,
603
+ detailUrl: urlNow,
604
+ detail: detail.detail,
605
+ persistMode: 'detail',
606
+ });
607
+ if (!persistedDetail.success) {
608
+ await saveDebug('persist_detail_failed', { noteId: link.noteId, error: persistedDetail.error || null });
609
+ throw new Error(`persist_detail_failed: ${persistedDetail.error || 'unknown'}`);
610
+ }
611
+ return {
612
+ noteId: link.noteId,
613
+ safeUrl: link.safeUrl,
614
+ searchUrl: link.searchUrl,
615
+ detailUrl: urlNow,
616
+ tabIndex: idx,
617
+ startedAt: Date.now(),
618
+ firstRun: true,
619
+ seenKeys: new Set(),
620
+ comments: [],
621
+ reachedEnd: false,
622
+ emptyState: false,
623
+ totalFromHeader: null,
624
+ stoppedByMaxComments: false,
625
+ batches: 0,
626
+ };
627
+ }
628
+ async function closeTaskTab(task) {
629
+ const idx = await resolveDetailTabIndex(task.noteId);
630
+ if (idx === null || !Number.isFinite(idx)) {
631
+ await saveDebug('close_task_tab_not_found', {
632
+ noteId: task.noteId,
633
+ detailUrl: task.detailUrl,
634
+ tabIndex: task.tabIndex ?? null,
635
+ });
636
+ return;
637
+ }
638
+ try {
639
+ await controllerAction('browser:page:switch', { profileId: profile, index: idx });
640
+ await delay(450);
641
+ await controllerAction('keyboard:press', { profileId: profile, key: 'Escape' });
642
+ await delay(450);
643
+ }
644
+ catch {
645
+ // ignore
646
+ }
647
+ await controllerAction('browser:page:close', { profileId: profile, index: idx }).catch(async (e) => {
648
+ await saveDebug('page_close_failed', { noteId: task.noteId, detailIndex: idx, error: e?.message || String(e) });
649
+ throw e;
650
+ });
651
+ usedTabIndexes.delete(idx);
652
+ // indices may shift after close; refresh bookkeeping
653
+ await rebuildUsedTabIndexes('close_task_tab').catch(() => { });
654
+ await delay(650);
655
+ }
656
+ async function runOneBatch(task) {
657
+ // Always resolve by noteId first; page indices can shift after closePage.
658
+ let idx = await resolveDetailTabIndex(task.noteId);
659
+ if (idx === null || !Number.isFinite(idx)) {
660
+ // The tab might have been closed (by us or by the site). Reopen on demand.
661
+ await saveDebug('task_tab_missing_reopen', { noteId: task.noteId, safeUrl: task.safeUrl, detailUrl: task.detailUrl });
662
+ await rebuildUsedTabIndexes('task_tab_missing_reopen').catch(() => { });
663
+ idx = await openPageWithFallback(task.safeUrl, `reopen_task:${task.noteId}`);
664
+ }
665
+ if (idx === searchTabIndex) {
666
+ idx = await openPageWithFallback(task.safeUrl, `reopen_task_reserved:${task.noteId}`);
667
+ }
668
+ task.tabIndex = idx;
669
+ console.log(`[Phase34FromLinks] batch start noteId=${task.noteId} tabIndex=${idx} batchNo=${task.batches + 1} (maxNew=${BATCH})`);
670
+ logProgress('batch_start', task.noteId);
671
+ try {
672
+ await controllerAction('browser:page:switch', { profileId: profile, index: idx });
673
+ }
674
+ catch (e) {
675
+ // The index is stale or shifted; rebuild and retry once by re-resolving.
676
+ await saveDebug('page_switch_failed_run_batch', { noteId: task.noteId, detailIndex: idx, error: e?.message || String(e) });
677
+ await rebuildUsedTabIndexes('page_switch_failed_run_batch').catch(() => { });
678
+ const resolved = await resolveDetailTabIndex(task.noteId);
679
+ if (resolved === null || !Number.isFinite(resolved))
680
+ throw e;
681
+ task.tabIndex = resolved;
682
+ idx = resolved;
683
+ await controllerAction('browser:page:switch', { profileId: profile, index: idx });
684
+ }
685
+ await delay(900);
686
+ await saveDebug('before_comments_batch', { noteId: task.noteId, tabIndex: idx, batchNo: task.batches + 1 });
687
+ // Safety: ensure we are still on the correct detail URL; if not, navigate to safeUrl within the same tab.
688
+ try {
689
+ const urlNow = await getCurrentUrl();
690
+ if (!urlNow.includes('/explore/') || !urlNow.includes(task.noteId) || !urlNow.includes('xsec_token=')) {
691
+ await saveDebug('run_batch_detail_url_mismatch', { noteId: task.noteId, urlNow, safeUrl: task.safeUrl });
692
+ await controllerAction('browser:goto', { profile, url: task.safeUrl });
693
+ await delay(2200);
694
+ }
695
+ }
696
+ catch {
697
+ // ignore
698
+ }
699
+ const out = await expandComments({
700
+ sessionId,
701
+ serviceUrl,
702
+ maxRounds: 240,
703
+ maxNewComments: BATCH,
704
+ seedSeenKeys: Array.from(task.seenKeys),
705
+ startFromTop: task.firstRun,
706
+ ensureLatestTab: task.firstRun,
707
+ });
708
+ if (!out.success) {
709
+ await saveDebug('expand_comments_failed', { noteId: task.noteId, error: out.error || null });
710
+ throw new Error(`expand_comments_failed: ${out.error || 'unknown'}`);
711
+ }
712
+ let newCount = 0;
713
+ for (const c of out.comments || []) {
714
+ const k = typeof c?._key === 'string' ? String(c._key) : '';
715
+ if (k)
716
+ task.seenKeys.add(k);
717
+ task.comments.push(c);
718
+ newCount += 1;
719
+ }
720
+ if (maxCommentsLimit && task.comments.length >= maxCommentsLimit) {
721
+ task.comments = task.comments.slice(0, maxCommentsLimit);
722
+ task.stoppedByMaxComments = true;
723
+ }
724
+ task.firstRun = false;
725
+ task.batches += 1;
726
+ const done = Boolean(out.reachedEnd || out.emptyState || task.stoppedByMaxComments);
727
+ task.reachedEnd = Boolean(out.reachedEnd);
728
+ task.emptyState = Boolean(out.emptyState);
729
+ task.totalFromHeader = typeof out?.totalFromHeader === 'number' ? out.totalFromHeader : null;
730
+ // 若未到底/空,则必须严格达到 batch 上限(否则说明抽取/滚动异常,需要停下排查)
731
+ if (!done && !out.stoppedByMaxNew) {
732
+ await saveDebug('batch_not_reached', {
733
+ noteId: task.noteId,
734
+ newCount,
735
+ batch: BATCH,
736
+ reachedEnd: out.reachedEnd,
737
+ emptyState: out.emptyState,
738
+ });
739
+ throw new Error('batch_not_reached_but_not_at_end_marker_or_empty_state');
740
+ }
741
+ // 每个 batch 都增量落盘(comments.md 会覆盖写入;便于中断后复盘/续跑)
742
+ const persistedComments = await persistXhsNote({
743
+ sessionId,
744
+ env,
745
+ platform: 'xiaohongshu',
746
+ keyword,
747
+ noteId: task.noteId,
748
+ searchUrl: task.searchUrl,
749
+ detailUrl: task.detailUrl,
750
+ commentsResult: {
751
+ comments: task.comments,
752
+ reachedEnd: task.reachedEnd,
753
+ emptyState: task.emptyState,
754
+ stoppedByMaxComments: task.stoppedByMaxComments,
755
+ maxComments: maxCommentsLimit,
756
+ // 仅用于 comments.md 头部展示
757
+ totalFromHeader: task.totalFromHeader,
758
+ },
759
+ persistMode: 'comments',
760
+ downloadImages: false,
761
+ });
762
+ if (!persistedComments.success) {
763
+ await saveDebug('persist_comments_failed', { noteId: task.noteId, error: persistedComments.error || null });
764
+ throw new Error(`persist_comments_failed: ${persistedComments.error || 'unknown'}`);
765
+ }
766
+ // 评论覆盖率校验(必须达到 90% 标称数量):仅在“到底/空态”后执行硬校验
767
+ // 注意:这里不再 throw 终止整个 Phase34;而是将该 note 移入 _rejected,并继续用后续链接补齐 targetCount。
768
+ if (done && !task.stoppedByMaxComments && task.totalFromHeader !== null && task.totalFromHeader > 0) {
769
+ const need = Math.ceil(task.totalFromHeader * COMMENTS_COVERAGE_RATIO);
770
+ const got = task.comments.length;
771
+ if (got < need) {
772
+ const replyCount = task.comments.filter((c) => Boolean(c && typeof c === 'object' && c.is_reply)).length;
773
+ const withIdCount = task.comments.filter((c) => {
774
+ const id = c?.comment_id || c?.commentId || c?.id || '';
775
+ return typeof id === 'string' && id.trim().length > 0;
776
+ }).length;
777
+ const tail = task.comments.slice(-5).map((c) => ({
778
+ key: typeof c?._key === 'string' ? c._key : null,
779
+ id: c?.comment_id || null,
780
+ user: c?.user_name || null,
781
+ text: typeof c?.text === 'string' ? String(c.text).slice(0, 80) : null,
782
+ is_reply: Boolean(c?.is_reply),
783
+ }));
784
+ const exitReason = task.reachedEnd
785
+ ? 'reached_end'
786
+ : task.emptyState
787
+ ? 'empty_state'
788
+ : task.stoppedByMaxComments
789
+ ? 'max_comments'
790
+ : 'unknown';
791
+ const shortfall = {
792
+ noteId: task.noteId,
793
+ safeUrl: task.safeUrl,
794
+ detailUrl: task.detailUrl,
795
+ searchUrl: task.searchUrl,
796
+ got,
797
+ headerTotal: task.totalFromHeader,
798
+ needAtLeast: need,
799
+ reachedEnd: task.reachedEnd,
800
+ emptyState: task.emptyState,
801
+ stoppedByMaxComments: task.stoppedByMaxComments,
802
+ exitReason,
803
+ replyCount,
804
+ withIdCount,
805
+ tail,
806
+ };
807
+ await saveDebug('comments_coverage_shortfall', shortfall);
808
+ await appendCoverageShortfall(shortfall);
809
+ }
810
+ }
811
+ console.log(`[Phase34FromLinks] batch done noteId=${task.noteId} new=${newCount} total=${task.comments.length} reachedEnd=${task.reachedEnd} empty=${task.emptyState}`);
812
+ logProgress('batch_done', task.noteId);
813
+ return { done, newCount };
814
+ }
815
+ while (persistedCount < targetTotal) {
816
+ // 填充:按要求一个一个开 tab,开一个先抓一批 50
817
+ if (active.length < MAX_TABS && cursor < pendingLinks.length) {
818
+ const link = pendingLinks[cursor];
819
+ cursor += 1;
820
+ queuedNoteIds.delete(link.noteId);
821
+ let task = null;
822
+ try {
823
+ task = await openNewTask(link);
824
+ active.push(task);
825
+ const res = await runOneBatch(task);
826
+ if (res.done) {
827
+ await closeTaskTab(task);
828
+ active.pop();
829
+ processedNoteIds.add(task.noteId);
830
+ logProgress('note_done', task.noteId);
831
+ if (res.rejected) {
832
+ rejectedCount += 1;
833
+ await moveNoteToRejected({
834
+ noteId: task.noteId,
835
+ reason: res.rejected.reason,
836
+ meta: res.rejected.meta || {},
837
+ });
838
+ }
839
+ const persistedAfter = await countPersistedNotes({
840
+ platform: 'xiaohongshu',
841
+ env,
842
+ keyword,
843
+ downloadRoot,
844
+ requiredFiles: ['content.md', 'comments.md'],
845
+ requireCommentsDone: true,
846
+ minCommentsCoverageRatio: countCoverageRatio,
847
+ });
848
+ persistedCount = persistedAfter.count;
849
+ }
850
+ }
851
+ catch (error) {
852
+ await handleTaskError({ noteId: link.noteId, stage: 'open_or_batch', error, task });
853
+ }
854
+ continue;
855
+ }
856
+ if (active.length === 0)
857
+ break;
858
+ // 轮换:50 条切换一次
859
+ rr = rr % active.length;
860
+ const task = active[rr];
861
+ rr += 1;
862
+ let res = null;
863
+ try {
864
+ res = await runOneBatch(task);
865
+ }
866
+ catch (error) {
867
+ await handleTaskError({ noteId: task.noteId, stage: 'run_batch', error, task });
868
+ continue;
869
+ }
870
+ if (!res)
871
+ continue;
872
+ if (res.done) {
873
+ await closeTaskTab(task);
874
+ const idx = active.findIndex((t) => t.noteId === task.noteId);
875
+ if (idx >= 0)
876
+ active.splice(idx, 1);
877
+ processedNoteIds.add(task.noteId);
878
+ logProgress('note_done', task.noteId);
879
+ if (res.rejected) {
880
+ rejectedCount += 1;
881
+ await moveNoteToRejected({
882
+ noteId: task.noteId,
883
+ reason: res.rejected.reason,
884
+ meta: res.rejected.meta || {},
885
+ });
886
+ }
887
+ const persistedAfter = await countPersistedNotes({
888
+ platform: 'xiaohongshu',
889
+ env,
890
+ keyword,
891
+ downloadRoot,
892
+ requiredFiles: ['content.md', 'comments.md'],
893
+ requireCommentsDone: true,
894
+ minCommentsCoverageRatio: countCoverageRatio,
895
+ });
896
+ persistedCount = persistedAfter.count;
897
+ }
898
+ }
899
+ async function isCommentsDone(noteDir) {
900
+ const donePath = path.join(noteDir, 'comments.done.json');
901
+ try {
902
+ await fs.access(donePath);
903
+ return true;
904
+ }
905
+ catch {
906
+ // ignore
907
+ }
908
+ const commentsPath = path.join(noteDir, 'comments.md');
909
+ try {
910
+ const text = await fs.readFile(commentsPath, 'utf-8');
911
+ if (text.includes('empty=鏄?'))
912
+ return true;
913
+ if (text.includes('reachedEnd=鏄?'))
914
+ return true;
915
+ if (text.includes('stoppedByMaxComments=yes'))
916
+ return true;
917
+ return false;
918
+ }
919
+ catch {
920
+ return false;
921
+ }
922
+ }
923
+ async function resetIncompleteComments() {
924
+ const cleared = [];
925
+ const entries = await fs.readdir(keywordDir, { withFileTypes: true }).catch(() => []);
926
+ for (const ent of entries) {
927
+ if (!ent?.isDirectory?.())
928
+ continue;
929
+ if (ent.name.startsWith('_'))
930
+ continue;
931
+ const noteDir = path.join(keywordDir, ent.name);
932
+ const commentsPath = path.join(noteDir, 'comments.md');
933
+ const exists = await fs.access(commentsPath).then(() => true).catch(() => false);
934
+ if (!exists)
935
+ continue;
936
+ const done = await isCommentsDone(noteDir);
937
+ if (done)
938
+ continue;
939
+ await fs.unlink(commentsPath).catch(() => { });
940
+ await fs.unlink(path.join(noteDir, 'comments.jsonl')).catch(() => { });
941
+ await fs.unlink(path.join(noteDir, 'comments.done.json')).catch(() => { });
942
+ cleared.push(ent.name);
943
+ }
944
+ if (cleared.length > 0) {
945
+ console.log(`[Phase34FromLinks] cleared incomplete comments: ${cleared.join(', ')}`);
946
+ }
947
+ return cleared;
948
+ }
949
+ async function mergeMarkdownIfNeeded() {
950
+ try {
951
+ const merged = await mergeNotesMarkdown({
952
+ platform: 'xiaohongshu',
953
+ env,
954
+ keyword,
955
+ downloadRoot,
956
+ });
957
+ if (merged.success) {
958
+ console.log(`[Phase34FromLinks] merged markdown: ${merged.outputPath} (notes=${merged.mergedNotes})`);
959
+ return {
960
+ mergedMarkdownPath: merged.outputPath,
961
+ mergedMarkdownNotes: merged.mergedNotes,
962
+ };
963
+ }
964
+ console.warn(`[Phase34FromLinks] merge markdown skipped: ${merged.error}`);
965
+ }
966
+ catch (err) {
967
+ console.warn(`[Phase34FromLinks] merge markdown failed: ${err?.message || String(err)}`);
968
+ }
969
+ return {};
970
+ }
971
+ const finalPersistedCount = persistedCount;
972
+ const addedCount = Math.max(0, finalPersistedCount - initialPersistedCount);
973
+ if (finalPersistedCount !== targetTotal) {
974
+ await saveDebug('target_not_reached', { finalPersistedCount, targetCount: targetTotal, expectedSearchUrl });
975
+ return {
976
+ success: false,
977
+ keywordDir: persistedAtStart.keywordDir,
978
+ linksPath,
979
+ expectedSearchUrl,
980
+ initialPersistedCount,
981
+ finalPersistedCount,
982
+ addedCount,
983
+ processedCount,
984
+ rejectedCount,
985
+ targetCount: targetTotal,
986
+ error: `target_not_reached: ${finalPersistedCount}/${targetTotal}`,
987
+ };
988
+ }
989
+ const merged = await mergeMarkdownIfNeeded();
990
+ return {
991
+ success: true,
992
+ keywordDir: persistedAtStart.keywordDir,
993
+ linksPath,
994
+ expectedSearchUrl,
995
+ initialPersistedCount,
996
+ finalPersistedCount,
997
+ addedCount,
998
+ processedCount,
999
+ rejectedCount,
1000
+ targetCount: targetTotal,
1001
+ ...merged,
1002
+ };
1003
+ }
1004
+ //# sourceMappingURL=XiaohongshuCollectFromLinksBlock.js.map