@web-auto/webauto 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/desktop-console/default-settings.json +1 -0
- package/apps/desktop-console/dist/main/index.mjs +1618 -0
- package/apps/desktop-console/{src → dist}/main/preload.mjs +10 -0
- package/apps/desktop-console/dist/renderer/index.js +3063 -0
- package/apps/desktop-console/entry/ui-console.mjs +299 -0
- package/apps/webauto/entry/account.mjs +356 -0
- package/apps/webauto/entry/lib/account-detect.mjs +160 -0
- package/apps/webauto/entry/lib/account-store.mjs +587 -0
- package/apps/webauto/entry/lib/profilepool.mjs +1 -1
- package/apps/webauto/entry/xhs-install.mjs +27 -3
- package/apps/webauto/entry/xhs-status.mjs +152 -0
- package/apps/webauto/entry/xhs-unified.mjs +595 -17
- package/bin/webauto.mjs +247 -12
- package/dist/apps/webauto/server.js +66 -0
- package/dist/modules/camo-backend/src/index.js +575 -0
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +817 -0
- package/dist/modules/camo-backend/src/internal/ElementRegistry.js +61 -0
- package/dist/modules/camo-backend/src/internal/ProfileLock.js +85 -0
- package/dist/modules/camo-backend/src/internal/SessionManager.js +172 -0
- package/dist/modules/camo-backend/src/internal/container-matcher.js +852 -0
- package/dist/modules/camo-backend/src/internal/engine-manager.js +258 -0
- package/dist/modules/camo-backend/src/internal/fingerprint.js +203 -0
- package/dist/modules/camo-backend/src/internal/pageRuntime.js +29 -0
- package/dist/modules/camo-backend/src/internal/runtimeInjector.js +30 -0
- package/dist/modules/camo-backend/src/internal/state-bus.js +46 -0
- package/dist/modules/camo-backend/src/internal/storage-paths.js +36 -0
- package/dist/modules/camo-backend/src/internal/ws-server.js +1202 -0
- package/dist/modules/camo-runtime/src/utils/browser-service.mjs +423 -0
- package/dist/modules/camo-runtime/src/utils/config.mjs +77 -0
- package/dist/modules/container-registry/src/index.js +184 -0
- package/dist/modules/logging/src/index.js +92 -0
- package/dist/modules/operations/src/builtin.js +27 -0
- package/dist/modules/operations/src/container-binding.js +75 -0
- package/dist/modules/operations/src/executor.js +146 -0
- package/dist/modules/operations/src/operations/click.js +167 -0
- package/dist/modules/operations/src/operations/extract.js +204 -0
- package/dist/modules/operations/src/operations/find-child.js +17 -0
- package/dist/modules/operations/src/operations/highlight.js +138 -0
- package/dist/modules/operations/src/operations/key.js +61 -0
- package/dist/modules/operations/src/operations/navigate.js +148 -0
- package/dist/modules/operations/src/operations/scroll.js +126 -0
- package/dist/modules/operations/src/operations/type.js +190 -0
- package/dist/modules/operations/src/queue.js +100 -0
- package/dist/modules/operations/src/registry.js +11 -0
- package/dist/modules/operations/src/system/mouse.js +33 -0
- package/dist/modules/state/src/atomic-json.js +33 -0
- package/dist/modules/workflow/blocks/AnchorVerificationBlock.js +71 -0
- package/dist/modules/workflow/blocks/BehaviorRandomizer.js +26 -0
- package/dist/modules/workflow/blocks/CallWorkflowBlock.js +38 -0
- package/dist/modules/workflow/blocks/CloseDetailBlock.js +209 -0
- package/dist/modules/workflow/blocks/CollectBatch.js +137 -0
- package/dist/modules/workflow/blocks/CollectCommentsBlock.js +415 -0
- package/dist/modules/workflow/blocks/CollectSearchListBlock.js +599 -0
- package/dist/modules/workflow/blocks/CollectWeiboPosts.js +229 -0
- package/dist/modules/workflow/blocks/DetectPageStateBlock.js +259 -0
- package/dist/modules/workflow/blocks/EnsureLoginBlock.js +162 -0
- package/dist/modules/workflow/blocks/EnsureSession.js +426 -0
- package/dist/modules/workflow/blocks/ErrorClassifier.js +164 -0
- package/dist/modules/workflow/blocks/ErrorRecoveryBlock.js +319 -0
- package/dist/modules/workflow/blocks/ExpandCommentsBlock.js +1032 -0
- package/dist/modules/workflow/blocks/ExtractDetailBlock.js +310 -0
- package/dist/modules/workflow/blocks/ExtractPostFields.js +88 -0
- package/dist/modules/workflow/blocks/GenerateSmartReplyBlock.js +68 -0
- package/dist/modules/workflow/blocks/GoToSearchBlock.js +497 -0
- package/dist/modules/workflow/blocks/GracefulFallbackBlock.js +104 -0
- package/dist/modules/workflow/blocks/HighlightBlock.js +66 -0
- package/dist/modules/workflow/blocks/InitAutoScroll.js +65 -0
- package/dist/modules/workflow/blocks/LoadContainerDefinition.js +50 -0
- package/dist/modules/workflow/blocks/LoadContainerIndex.js +43 -0
- package/dist/modules/workflow/blocks/LocateAndGuardBlock.js +176 -0
- package/dist/modules/workflow/blocks/LoginRecoveryBlock.js +242 -0
- package/dist/modules/workflow/blocks/MatchContainers.js +64 -0
- package/dist/modules/workflow/blocks/MonitoringBlock.js +190 -0
- package/dist/modules/workflow/blocks/OpenDetailBlock.js +1240 -0
- package/dist/modules/workflow/blocks/OrganizeXhsNotesBlock.js +117 -0
- package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +270 -0
- package/dist/modules/workflow/blocks/PickSinglePost.js +69 -0
- package/dist/modules/workflow/blocks/ProgressTracker.js +125 -0
- package/dist/modules/workflow/blocks/RecordFixtureBlock.js +44 -0
- package/dist/modules/workflow/blocks/RenderMarkdown.js +48 -0
- package/dist/modules/workflow/blocks/SaveFile.js +54 -0
- package/dist/modules/workflow/blocks/ScrollNextBatch.js +72 -0
- package/dist/modules/workflow/blocks/SessionHealthBlock.js +73 -0
- package/dist/modules/workflow/blocks/StartBrowserService.js +45 -0
- package/dist/modules/workflow/blocks/ValidateContainerDefinition.js +67 -0
- package/dist/modules/workflow/blocks/ValidateExtract.js +35 -0
- package/dist/modules/workflow/blocks/WaitSearchPermitBlock.js +162 -0
- package/dist/modules/workflow/blocks/WaitStable.js +74 -0
- package/dist/modules/workflow/blocks/WarmupCommentsBlock.js +120 -0
- package/dist/modules/workflow/blocks/WorkflowExecutor.js +156 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectFromLinksBlock.js +1004 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectLinksBlock.js +1049 -0
- package/dist/modules/workflow/blocks/XiaohongshuFullCollectBlock.js +782 -0
- package/dist/modules/workflow/blocks/helpers/anchorVerify.js +198 -0
- package/dist/modules/workflow/blocks/helpers/asyncWorkQueue.js +53 -0
- package/dist/modules/workflow/blocks/helpers/commentScroller.js +334 -0
- package/dist/modules/workflow/blocks/helpers/commentSectionLocator.js +126 -0
- package/dist/modules/workflow/blocks/helpers/containerAnchors.js +301 -0
- package/dist/modules/workflow/blocks/helpers/debugArtifacts.js +6 -0
- package/dist/modules/workflow/blocks/helpers/downloadPaths.js +29 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsController.js +53 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsExtractor.js +129 -0
- package/dist/modules/workflow/blocks/helpers/macosVisionOcrPlugin.js +116 -0
- package/dist/modules/workflow/blocks/helpers/mergeXhsMarkdown.js +109 -0
- package/dist/modules/workflow/blocks/helpers/openDetailController.js +56 -0
- package/dist/modules/workflow/blocks/helpers/openDetailTypes.js +7 -0
- package/dist/modules/workflow/blocks/helpers/openDetailViewport.js +474 -0
- package/dist/modules/workflow/blocks/helpers/openDetailWaiter.js +104 -0
- package/dist/modules/workflow/blocks/helpers/operationLogger.js +195 -0
- package/dist/modules/workflow/blocks/helpers/persistedNotes.js +107 -0
- package/dist/modules/workflow/blocks/helpers/replyExpander.js +260 -0
- package/dist/modules/workflow/blocks/helpers/scrollIntoView.js +138 -0
- package/dist/modules/workflow/blocks/helpers/searchExecutor.js +328 -0
- package/dist/modules/workflow/blocks/helpers/searchGate.js +46 -0
- package/dist/modules/workflow/blocks/helpers/searchPageState.js +164 -0
- package/dist/modules/workflow/blocks/helpers/searchResultWaiter.js +64 -0
- package/dist/modules/workflow/blocks/helpers/simpleAnchor.js +134 -0
- package/dist/modules/workflow/blocks/helpers/smartReply.js +40 -0
- package/dist/modules/workflow/blocks/helpers/systemInput.js +635 -0
- package/dist/modules/workflow/blocks/helpers/targetCountMode.js +9 -0
- package/dist/modules/workflow/blocks/helpers/xhsCliArgs.js +80 -0
- package/dist/modules/workflow/blocks/helpers/xhsCommentDom.js +805 -0
- package/dist/modules/workflow/blocks/helpers/xhsNoteOrganizer.js +140 -0
- package/dist/modules/workflow/blocks/restore/RestorePhaseBlock.js +204 -0
- package/dist/modules/workflow/config/workflowRegistry.js +32 -0
- package/dist/modules/workflow/definitions/batch-collect-workflow.js +63 -0
- package/dist/modules/workflow/definitions/scroll-extract-workflow.js +74 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow-v2.js +81 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow.js +57 -0
- package/dist/modules/workflow/definitions/xiaohongshu-full-collect-workflow-v3.js +68 -0
- package/dist/modules/workflow/definitions/xiaohongshu-note-collect.js +49 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase1-workflow-v3.js +30 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase2-links-workflow-v3.js +40 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase3-collect-workflow-v1.js +54 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase34-from-links-workflow-v3.js +25 -0
- package/dist/modules/workflow/src/WeiboEventDrivenWorkflowRunner.js +308 -0
- package/dist/modules/workflow/src/context.js +70 -0
- package/dist/modules/workflow/src/index.js +5 -0
- package/dist/modules/workflow/src/orchestrator.js +230 -0
- package/dist/modules/workflow/src/runner.js +55 -0
- package/dist/modules/workflow/src/runtime.js +70 -0
- package/dist/modules/workflow/workflows/WeiboFeedExtractionWorkflow.js +359 -0
- package/dist/modules/workflow/workflows/XiaohongshuLoginWorkflow.js +110 -0
- package/dist/modules/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
- package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
- package/dist/modules/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +42 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
- package/dist/modules/xiaohongshu/app/src/index.js +9 -0
- package/dist/modules/xiaohongshu/app/src/utils/checkpoints.js +222 -0
- package/dist/modules/xiaohongshu/app/src/utils/controllerAction.js +43 -0
- package/dist/services/controller/src/controller.js +1476 -0
- package/dist/services/controller/src/index.js +2 -0
- package/dist/services/controller/src/payload-normalizer.js +129 -0
- package/dist/services/shared/heartbeat.js +120 -0
- package/dist/services/shared/lib/errorHandler.js +2 -0
- package/dist/services/shared/serviceProcessLogger.js +139 -0
- package/dist/services/unified-api/RemoteBrowserSession.js +176 -0
- package/dist/services/unified-api/RemoteSessionManager.js +148 -0
- package/dist/services/unified-api/container-operations-handler.js +115 -0
- package/dist/services/unified-api/server.js +652 -0
- package/dist/services/unified-api/state-registry.js +274 -0
- package/dist/services/unified-api/task-persistence.js +66 -0
- package/dist/services/unified-api/task-state.js +130 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +12 -5
- package/modules/xiaohongshu/app/pnpm-lock.yaml +24 -0
- package/package.json +37 -9
- package/.beads/README.md +0 -81
- package/.beads/config.yaml +0 -67
- package/.beads/interactions.jsonl +0 -0
- package/.beads/issues.jsonl +0 -180
- package/.beads/metadata.json +0 -4
- package/.claude/settings.local.json +0 -10
- package/.github/workflows/ci.yml +0 -55
- package/AGENTS.md +0 -253
- package/apps/desktop-console/README.md +0 -27
- package/apps/desktop-console/package-lock.json +0 -897
- package/apps/desktop-console/package.json +0 -20
- package/apps/desktop-console/scripts/build-and-install.mjs +0 -19
- package/apps/desktop-console/scripts/build.mjs +0 -45
- package/apps/desktop-console/scripts/test-preload.mjs +0 -13
- package/apps/desktop-console/src/main/config.mts +0 -26
- package/apps/desktop-console/src/main/core-daemon-manager.mts +0 -131
- package/apps/desktop-console/src/main/desktop-settings.mts +0 -267
- package/apps/desktop-console/src/main/heartbeat-watchdog.mts +0 -50
- package/apps/desktop-console/src/main/heartbeat-watchdog.test.mts +0 -68
- package/apps/desktop-console/src/main/index-streaming.test.mts +0 -20
- package/apps/desktop-console/src/main/index.mts +0 -980
- package/apps/desktop-console/src/main/profile-store.mts +0 -239
- package/apps/desktop-console/src/main/profile-store.test.mts +0 -54
- package/apps/desktop-console/src/main/state-bridge.mts +0 -114
- package/apps/desktop-console/src/main/task-state-types.ts +0 -32
- package/apps/desktop-console/src/renderer/hooks/use-task-state.mts +0 -120
- package/apps/desktop-console/src/renderer/index.mts +0 -133
- package/apps/desktop-console/src/renderer/index.test.mts +0 -34
- package/apps/desktop-console/src/renderer/path-helpers.mts +0 -46
- package/apps/desktop-console/src/renderer/path-helpers.test.mts +0 -14
- package/apps/desktop-console/src/renderer/tabs/debug.mts +0 -48
- package/apps/desktop-console/src/renderer/tabs/debug.test.mts +0 -22
- package/apps/desktop-console/src/renderer/tabs/logs.mts +0 -421
- package/apps/desktop-console/src/renderer/tabs/logs.test.mts +0 -27
- package/apps/desktop-console/src/renderer/tabs/preflight.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/preflight.test.mts +0 -33
- package/apps/desktop-console/src/renderer/tabs/profile-pool.mts +0 -213
- package/apps/desktop-console/src/renderer/tabs/results.mts +0 -171
- package/apps/desktop-console/src/renderer/tabs/run.test.mts +0 -63
- package/apps/desktop-console/src/renderer/tabs/runtime.mts +0 -151
- package/apps/desktop-console/src/renderer/tabs/settings.mts +0 -146
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/account-flow.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/guide-browser-check.mts +0 -56
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/helpers.mts +0 -262
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/layout-block.mts +0 -430
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/live-stats.mts +0 -847
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/run-flow.mts +0 -443
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu-state.mts +0 -425
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.mts +0 -497
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.test.mts +0 -291
- package/apps/desktop-console/src/renderer/ui-components.mts +0 -31
- package/docs/README_camoufox_chinese.md +0 -141
- package/docs/USAGE_V3.md +0 -163
- package/docs/arch/OCR_MACOS_PLUGIN.md +0 -39
- package/docs/arch/PORTS.md +0 -40
- package/docs/arch/REGRESSION_CHECKLIST.md +0 -121
- package/docs/arch/SEARCH_GATE.md +0 -224
- package/docs/arch/VIEWPORT_SAFETY.md +0 -182
- package/docs/arch/XIAOHONGSHU_OFFLINE_MOCK_DESIGN.md +0 -267
- package/docs/xiaohongshu-container-driven-summary.md +0 -221
- package/docs/xiaohongshu-full-collect-runbook.md +0 -134
- package/docs/xiaohongshu-next-steps.md +0 -228
- package/docs/xiaohongshu-quickstart.md +0 -73
- package/docs/xiaohongshu-workflow-summary.md +0 -227
- package/modules/container-registry/tests/container-registry.test.ts +0 -16
- package/modules/logging/tests/logging.test.ts +0 -38
- package/modules/operations/tests/operations.test.ts +0 -22
- package/modules/operations/tests/viewport-filter.test.ts +0 -161
- package/modules/operations/tests/visible-only.test.ts +0 -250
- package/modules/session-manager/tests/session-manager.test.ts +0 -23
- package/modules/state/src/atomic-json.test.ts +0 -30
- package/modules/state/src/paths.test.ts +0 -59
- package/modules/state/src/xiaohongshu-collect-state.test.ts +0 -259
- package/modules/workflow/blocks/AnchorVerificationBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/AnchorVerificationBlock.js.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.js.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.js.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.js.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.js.map +0 -1
- package/modules/workflow/blocks/helpers/downloadPaths.test.ts +0 -62
- package/modules/workflow/blocks/helpers/mergeXhsMarkdown.test.ts +0 -121
- package/modules/workflow/blocks/helpers/operationLogger.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/operationLogger.js.map +0 -1
- package/modules/workflow/blocks/helpers/persistedNotes.test.ts +0 -268
- package/modules/workflow/blocks/helpers/searchPageState.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/searchPageState.js.map +0 -1
- package/modules/workflow/blocks/helpers/targetCountMode.test.ts +0 -29
- package/modules/workflow/blocks/helpers/xhsCliArgs.test.ts +0 -75
- package/modules/workflow/tests/smartReply.test.ts +0 -32
- package/modules/xiaohongshu/app/src/blocks/Phase3Interact.matcher.test.ts +0 -33
- package/modules/xiaohongshu/app/src/utils/__tests__/checkpoints.test.ts +0 -141
- package/modules/xiaohongshu/app/tests/commentMatchDsl.test.ts +0 -50
- package/modules/xiaohongshu/app/tests/commentMatcher.test.ts +0 -46
- package/modules/xiaohongshu/app/tests/sharding.test.ts +0 -31
- package/package-scripts.json +0 -8
- package/runtime/infra/utils/README.md +0 -13
- package/runtime/infra/utils/scripts/README.md +0 -0
- package/runtime/infra/utils/scripts/development/eval-in-session.mjs +0 -40
- package/runtime/infra/utils/scripts/development/highlight-search-containers.mjs +0 -35
- package/runtime/infra/utils/scripts/service/kill-port.mjs +0 -24
- package/runtime/infra/utils/scripts/service/start-api.mjs +0 -39
- package/runtime/infra/utils/scripts/service/start-browser-service.mjs +0 -106
- package/runtime/infra/utils/scripts/service/stop-api.mjs +0 -18
- package/runtime/infra/utils/scripts/service/stop-browser-service.mjs +0 -104
- package/runtime/infra/utils/scripts/test-services.mjs +0 -94
- package/services/shared/heartbeat.test.ts +0 -102
- package/services/unified-api/__tests__/task-state.test.ts +0 -95
- package/sitecustomize.py +0 -19
- package/tests/README.md +0 -194
- package/tests/e2e/workflows/weibo-feed-extraction.test.ts +0 -171
- package/tests/fixtures/data/container-definitions.json +0 -67
- package/tests/fixtures/pages/simple-page.html +0 -69
- package/tests/integration/01-test-container-match.mjs +0 -188
- package/tests/integration/02-test-dom-branch.mjs +0 -161
- package/tests/integration/03-test-container-operation-system.mjs +0 -91
- package/tests/integration/05-test-container-lifecycle-events.mjs +0 -224
- package/tests/integration/05-test-container-lifecycle-with-events.mjs +0 -250
- package/tests/integration/06-test-container-dom-tree-drawing.mjs +0 -256
- package/tests/integration/07-test-weibo-container-lifecycle.mjs +0 -355
- package/tests/integration/08-test-weibo-feed-workflow.test.mjs +0 -164
- package/tests/integration/10-test-visual-analyzer.mjs +0 -312
- package/tests/integration/11-test-visual-loop.mjs +0 -284
- package/tests/integration/12-test-simple-visual-loop.mjs +0 -242
- package/tests/integration/13-test-visual-robust.mjs +0 -185
- package/tests/integration/14-test-visual-highlight-loop.mjs +0 -271
- package/tests/integration/inspect-page.mjs +0 -50
- package/tests/integration/run-all-tests.mjs +0 -95
- package/tests/patch_verification/CODEX_PATCH_TEST.md +0 -103
- package/tests/patch_verification/PHASE2_ANALYSIS.md +0 -179
- package/tests/patch_verification/PHASE2_OPTIMIZATION_REPORT.md +0 -55
- package/tests/patch_verification/PHASE2_TO_PHASE4_SUMMARY.md +0 -126
- package/tests/patch_verification/QUICK_TEST_SEQUENCE.md +0 -262
- package/tests/patch_verification/README.md +0 -143
- package/tests/patch_verification/RUN_TESTS.md +0 -60
- package/tests/patch_verification/TEST_EXECUTION.md +0 -99
- package/tests/patch_verification/TEST_PLAN.md +0 -328
- package/tests/patch_verification/TEST_RESULTS.md +0 -34
- package/tests/patch_verification/TOOL_TEST_PLAN.md +0 -48
- package/tests/patch_verification/run-tool-test.mjs +0 -121
- package/tests/patch_verification/temp_test_files/test01.txt +0 -1
- package/tests/patch_verification/temp_test_files/test02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test02_gnu.txt +0 -3
- package/tests/patch_verification/temp_test_files/test03.txt +0 -1
- package/tests/patch_verification/temp_test_files/test03_multiline.txt +0 -5
- package/tests/patch_verification/temp_test_files/test04_function.ts +0 -5
- package/tests/patch_verification/temp_test_files/test05_import.ts +0 -4
- package/tests/patch_verification/temp_test_files/test06_special_chars.txt +0 -4
- package/tests/patch_verification/temp_test_files/test07_indentation.ts +0 -5
- package/tests/patch_verification/temp_test_files/test08_mismatch.txt +0 -1
- package/tests/patch_verification/temp_test_files/test_add_02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test_simple.txt +0 -1
- package/tests/runner/TestReporter.mjs +0 -57
- package/tests/runner/TestRunner.mjs +0 -244
- package/tests/unit/commands/profile.test.mjs +0 -10
- package/tests/unit/container/change-notifier.test.mjs +0 -181
- package/tests/unit/lifecycle/session-registry.test.mjs +0 -135
- package/tests/unit/operations/registry.test.ts +0 -73
- package/tests/unit/utils/browser-service.test.mjs +0 -153
- package/tests/unit/utils/config.test.mjs +0 -166
- package/tests/unit/utils/fingerprint.test.mjs +0 -166
- package/tsconfig.json +0 -31
- package/tsconfig.services.json +0 -26
- /package/apps/desktop-console/{src → dist}/renderer/index.html +0 -0
- /package/apps/desktop-console/{src/renderer/tabs → dist/renderer}/run.mts +0 -0
|
@@ -0,0 +1,1049 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Block: XiaohongshuCollectLinksBlock
|
|
3
|
+
*
|
|
4
|
+
* Phase2锛氬湪鎼滅储缁撴灉椤甸€氳繃鈥滅偣鍑昏繘鍏ヨ鎯?鈫?璇诲彇鐪熷疄 URL(xsec_token) 鈫?ESC 杩斿洖鈥濈殑鏂瑰紡閲囬泦瀹夊叏閾炬帴锛?
|
|
5
|
+
* 骞跺啓鍏ワ細~/.webauto/download/xiaohongshu/{env}/{keyword}/phase2-links.jsonl
|
|
6
|
+
*
|
|
7
|
+
* 绾︽潫锛?
|
|
8
|
+
* - 涓ョ鏋勯€?URL锛涘繀椤荤偣鍑昏繘鍏ヨ鎯呰幏鍙栫湡瀹為摼鎺?
|
|
9
|
+
* - searchUrl 蹇呴』涓ユ牸绛変簬鍚屼竴涓瓧绗︿覆锛堢敤浜庡彂鐜拌鐐光€滅浉鍏虫悳绱?澶у閮藉湪鎼溾€濓級
|
|
10
|
+
* - 寮€鍙戦樁娈碉細浠讳綍寮傚父锛堣鐐?楠岃瘉鐮?閫€鍑哄け璐ワ級鐩存帴 fail-fast锛屼繚鐣欒瘉鎹?
|
|
11
|
+
*/
|
|
12
|
+
import os from 'node:os';
|
|
13
|
+
import path from 'node:path';
|
|
14
|
+
import { promises as fs } from 'node:fs';
|
|
15
|
+
import { urlKeywordEquals } from './helpers/searchPageState.js';
|
|
16
|
+
import { getPrimarySelectorByContainerId } from './helpers/containerAnchors.js';
|
|
17
|
+
import { execute as collectSearchList } from './CollectSearchListBlock.js';
|
|
18
|
+
import { execute as detectPageState } from './DetectPageStateBlock.js';
|
|
19
|
+
import { execute as openDetail } from './OpenDetailBlock.js';
|
|
20
|
+
import { execute as closeDetail } from './CloseDetailBlock.js';
|
|
21
|
+
import { execute as restorePhase } from './restore/RestorePhaseBlock.js';
|
|
22
|
+
import { resolveTargetCount } from './helpers/targetCountMode.js';
|
|
23
|
+
import { execute as waitSearchPermit } from './WaitSearchPermitBlock.js';
|
|
24
|
+
import { execute as goToSearch } from './GoToSearchBlock.js';
|
|
25
|
+
import { isDevMode } from './helpers/systemInput.js';
|
|
26
|
+
import { logControllerActionError, logControllerActionResult, logControllerActionStart, } from './helpers/operationLogger.js';
|
|
27
|
+
function sanitizeFilenamePart(value) {
|
|
28
|
+
return String(value || '')
|
|
29
|
+
.trim()
|
|
30
|
+
.replace(/[\\/:"*?<>|]+/g, '_')
|
|
31
|
+
.replace(/\s+/g, '_')
|
|
32
|
+
.slice(0, 80);
|
|
33
|
+
}
|
|
34
|
+
function resolveDownloadRoot() {
|
|
35
|
+
const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
|
|
36
|
+
if (custom && custom.trim())
|
|
37
|
+
return custom;
|
|
38
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
39
|
+
return path.join(home, '.webauto', 'download');
|
|
40
|
+
}
|
|
41
|
+
function isDebugArtifactsEnabled() {
|
|
42
|
+
return (process.env.WEBAUTO_DEBUG === '1' ||
|
|
43
|
+
process.env.WEBAUTO_DEBUG_ARTIFACTS === '1' ||
|
|
44
|
+
process.env.WEBAUTO_DEBUG_SCREENSHOT === '1');
|
|
45
|
+
}
|
|
46
|
+
function shuffleItems(items) {
|
|
47
|
+
const arr = items.slice();
|
|
48
|
+
for (let i = arr.length - 1; i > 0; i -= 1) {
|
|
49
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
50
|
+
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
51
|
+
}
|
|
52
|
+
return arr;
|
|
53
|
+
}
|
|
54
|
+
function extractBase64FromScreenshotResponse(raw) {
|
|
55
|
+
const v = raw?.data?.data ??
|
|
56
|
+
raw?.data?.body?.data ??
|
|
57
|
+
raw?.body?.data ??
|
|
58
|
+
raw?.result?.data ??
|
|
59
|
+
raw?.result ??
|
|
60
|
+
raw?.data ??
|
|
61
|
+
raw;
|
|
62
|
+
return typeof v === 'string' && v.length > 10 ? v : undefined;
|
|
63
|
+
}
|
|
64
|
+
async function readJsonl(filePath) {
|
|
65
|
+
try {
|
|
66
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
67
|
+
return content
|
|
68
|
+
.split('\n')
|
|
69
|
+
.map((l) => l.trim())
|
|
70
|
+
.filter(Boolean)
|
|
71
|
+
.map((line) => {
|
|
72
|
+
try {
|
|
73
|
+
return JSON.parse(line);
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
})
|
|
79
|
+
.filter(Boolean);
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
if (e?.code === 'ENOENT')
|
|
83
|
+
return [];
|
|
84
|
+
throw e;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
async function appendJsonl(filePath, value) {
|
|
88
|
+
const line = `${JSON.stringify(value)}\n`;
|
|
89
|
+
await fs.appendFile(filePath, line, 'utf-8');
|
|
90
|
+
}
|
|
91
|
+
export async function execute(input) {
|
|
92
|
+
const { sessionId, keyword, env = 'debug', targetCount, targetCountMode = 'absolute', maxScrollRounds = 60, strictTargetCount = true, serviceUrl = 'http://127.0.0.1:7701', } = input;
|
|
93
|
+
const profile = sessionId;
|
|
94
|
+
const controllerUrl = `${serviceUrl}/v1/controller/action`;
|
|
95
|
+
const debugArtifactsEnabled = isDebugArtifactsEnabled();
|
|
96
|
+
const keywordDir = path.join(resolveDownloadRoot(), 'xiaohongshu', env, keyword);
|
|
97
|
+
const linksPath = path.join(keywordDir, 'phase2-links.jsonl');
|
|
98
|
+
const debugDir = path.join(keywordDir, '_debug', 'phase2_links');
|
|
99
|
+
const listContainerId = 'xiaohongshu_search.search_result_list';
|
|
100
|
+
const listSelectorPromise = getPrimarySelectorByContainerId(listContainerId).catch(() => null);
|
|
101
|
+
const failFast = isDevMode();
|
|
102
|
+
const maxRecoverAttempts = Math.max(1, Number(process.env.WEBAUTO_PHASE2_RECOVER_MAX || 3));
|
|
103
|
+
let recoverAttempts = 0;
|
|
104
|
+
async function controllerAction(action, payload = {}) {
|
|
105
|
+
const opId = logControllerActionStart(action, payload, { source: 'XiaohongshuCollectLinksBlock' });
|
|
106
|
+
try {
|
|
107
|
+
const res = await fetch(controllerUrl, {
|
|
108
|
+
method: 'POST',
|
|
109
|
+
headers: { 'Content-Type': 'application/json' },
|
|
110
|
+
body: JSON.stringify({ action, payload }),
|
|
111
|
+
signal: AbortSignal.timeout ? AbortSignal.timeout(30000) : undefined,
|
|
112
|
+
});
|
|
113
|
+
const raw = await res.text();
|
|
114
|
+
if (!res.ok)
|
|
115
|
+
throw new Error(`HTTP ${res.status}: ${raw}`);
|
|
116
|
+
let data = {};
|
|
117
|
+
try {
|
|
118
|
+
data = raw ? JSON.parse(raw) : {};
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
data = { raw };
|
|
122
|
+
}
|
|
123
|
+
const result = data.data || data;
|
|
124
|
+
logControllerActionResult(opId, action, result, { source: 'XiaohongshuCollectLinksBlock' });
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
logControllerActionError(opId, action, error, payload, { source: 'XiaohongshuCollectLinksBlock' });
|
|
129
|
+
throw error;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
const delay = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
133
|
+
async function getCurrentUrl() {
|
|
134
|
+
const res = await controllerAction('browser:execute', { profile, script: 'window.location.href' });
|
|
135
|
+
return res?.result ?? res?.data?.result ?? '';
|
|
136
|
+
}
|
|
137
|
+
async function saveDebug(kind, meta) {
|
|
138
|
+
if (!debugArtifactsEnabled)
|
|
139
|
+
return;
|
|
140
|
+
try {
|
|
141
|
+
await fs.mkdir(debugDir, { recursive: true });
|
|
142
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
143
|
+
const base = `${ts}-${sanitizeFilenamePart(kind)}`;
|
|
144
|
+
const pngPath = path.join(debugDir, `${base}.png`);
|
|
145
|
+
const jsonPath = path.join(debugDir, `${base}.json`);
|
|
146
|
+
const shot = await controllerAction('browser:screenshot', { profileId: profile, fullPage: false }).catch(() => null);
|
|
147
|
+
const b64 = extractBase64FromScreenshotResponse(shot);
|
|
148
|
+
if (b64)
|
|
149
|
+
await fs.writeFile(pngPath, Buffer.from(b64, 'base64'));
|
|
150
|
+
await fs.writeFile(jsonPath, JSON.stringify({
|
|
151
|
+
ts,
|
|
152
|
+
kind,
|
|
153
|
+
sessionId: profile,
|
|
154
|
+
keyword,
|
|
155
|
+
env,
|
|
156
|
+
url: await getCurrentUrl().catch(() => ''),
|
|
157
|
+
pngPath: b64 ? pngPath : null,
|
|
158
|
+
...meta,
|
|
159
|
+
}, null, 2), 'utf-8');
|
|
160
|
+
console.log(`[Phase2Links][debug] saved ${kind}: ${pngPath}`);
|
|
161
|
+
}
|
|
162
|
+
catch (e) {
|
|
163
|
+
console.warn(`[Phase2Links][debug] save failed (${kind}): ${e?.message || String(e)}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
function validateEntry(raw) {
|
|
167
|
+
const noteId = typeof raw?.noteId === 'string' ? raw.noteId.trim() : '';
|
|
168
|
+
const safeUrl = typeof raw?.safeUrl === 'string' ? raw.safeUrl.trim() : '';
|
|
169
|
+
const searchUrl = typeof raw?.searchUrl === 'string' ? raw.searchUrl.trim() : '';
|
|
170
|
+
if (!noteId || !safeUrl || !searchUrl)
|
|
171
|
+
return null;
|
|
172
|
+
if (!isValidSafeUrl(safeUrl))
|
|
173
|
+
return null;
|
|
174
|
+
if (!isValidSearchUrl(searchUrl, keyword))
|
|
175
|
+
return null;
|
|
176
|
+
return {
|
|
177
|
+
noteId,
|
|
178
|
+
safeUrl,
|
|
179
|
+
searchUrl,
|
|
180
|
+
ts: typeof raw?.ts === 'string' ? raw.ts : new Date().toISOString(),
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
function isValidSearchUrl(searchUrl, expectedKeyword) {
|
|
184
|
+
try {
|
|
185
|
+
const url = new URL(searchUrl);
|
|
186
|
+
if (!url.hostname.endsWith('xiaohongshu.com'))
|
|
187
|
+
return false;
|
|
188
|
+
if (!url.pathname.includes('/search_result'))
|
|
189
|
+
return false;
|
|
190
|
+
return urlKeywordEquals(searchUrl, expectedKeyword);
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
function isValidSafeUrl(safeUrl) {
|
|
197
|
+
try {
|
|
198
|
+
const url = new URL(safeUrl);
|
|
199
|
+
if (!url.hostname.endsWith('xiaohongshu.com'))
|
|
200
|
+
return false;
|
|
201
|
+
if (!/\/explore\/[a-f0-9]+/.test(url.pathname))
|
|
202
|
+
return false;
|
|
203
|
+
if (!url.searchParams.get('xsec_token'))
|
|
204
|
+
return false;
|
|
205
|
+
return true;
|
|
206
|
+
}
|
|
207
|
+
catch {
|
|
208
|
+
return false;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
await fs.mkdir(keywordDir, { recursive: true });
|
|
212
|
+
// 0) 璇诲彇宸叉湁閾炬帴锛堝閲忛噰闆嗭級
|
|
213
|
+
const existingRaw = await readJsonl(linksPath);
|
|
214
|
+
const existing = [];
|
|
215
|
+
const byNoteId = new Map();
|
|
216
|
+
for (const r of existingRaw) {
|
|
217
|
+
const e = validateEntry(r);
|
|
218
|
+
if (!e)
|
|
219
|
+
continue;
|
|
220
|
+
if (byNoteId.has(e.noteId))
|
|
221
|
+
continue;
|
|
222
|
+
byNoteId.set(e.noteId, e);
|
|
223
|
+
existing.push(e);
|
|
224
|
+
}
|
|
225
|
+
const initialCount = byNoteId.size;
|
|
226
|
+
const { targetTotal } = resolveTargetCount({
|
|
227
|
+
targetCount,
|
|
228
|
+
baseCount: initialCount,
|
|
229
|
+
mode: targetCountMode,
|
|
230
|
+
});
|
|
231
|
+
if (initialCount > targetTotal) {
|
|
232
|
+
const trimmed = existing.slice(0, targetTotal);
|
|
233
|
+
const body = trimmed.length > 0 ? `${trimmed.map((e) => JSON.stringify(e)).join('\n')}\n` : '';
|
|
234
|
+
await fs.writeFile(linksPath, body, 'utf-8');
|
|
235
|
+
console.log(`[Phase2Links] existing links exceed target, trimmed ${initialCount} -> ${trimmed.length}`);
|
|
236
|
+
const expected = trimmed[0]?.searchUrl || '';
|
|
237
|
+
return {
|
|
238
|
+
success: true,
|
|
239
|
+
keywordDir,
|
|
240
|
+
linksPath,
|
|
241
|
+
expectedSearchUrl: expected,
|
|
242
|
+
initialCount,
|
|
243
|
+
finalCount: trimmed.length,
|
|
244
|
+
addedCount: 0,
|
|
245
|
+
targetCount: targetTotal,
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
if (initialCount === targetTotal) {
|
|
249
|
+
const expected = existing[0]?.searchUrl || '';
|
|
250
|
+
return {
|
|
251
|
+
success: true,
|
|
252
|
+
keywordDir,
|
|
253
|
+
linksPath,
|
|
254
|
+
expectedSearchUrl: expected,
|
|
255
|
+
initialCount,
|
|
256
|
+
finalCount: initialCount,
|
|
257
|
+
addedCount: 0,
|
|
258
|
+
targetCount: targetTotal,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
// 1) 记录本次采集的 expectedSearchUrl(允许多次搜索产生不同 URL)
|
|
262
|
+
let expectedSearchUrl = await getCurrentUrl();
|
|
263
|
+
const allowedSearchUrls = new Set();
|
|
264
|
+
const recordSearchUrl = (url) => {
|
|
265
|
+
if (!url || !url.includes('/search_result'))
|
|
266
|
+
return;
|
|
267
|
+
if (!urlKeywordEquals(url, keyword))
|
|
268
|
+
return;
|
|
269
|
+
allowedSearchUrls.add(url);
|
|
270
|
+
expectedSearchUrl = url;
|
|
271
|
+
};
|
|
272
|
+
recordSearchUrl(expectedSearchUrl);
|
|
273
|
+
if (allowedSearchUrls.size === 0) {
|
|
274
|
+
await saveDebug('not_on_expected_search_result', { expectedSearchUrl, keyword });
|
|
275
|
+
if (failFast) {
|
|
276
|
+
return {
|
|
277
|
+
success: false,
|
|
278
|
+
keywordDir,
|
|
279
|
+
linksPath,
|
|
280
|
+
expectedSearchUrl,
|
|
281
|
+
initialCount,
|
|
282
|
+
finalCount: initialCount,
|
|
283
|
+
addedCount: 0,
|
|
284
|
+
targetCount: targetTotal,
|
|
285
|
+
error: `not_on_search_result_or_keyword_mismatch: ${expectedSearchUrl}`,
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
const retryOk = await retrySearch('init_search');
|
|
289
|
+
if (!retryOk) {
|
|
290
|
+
return {
|
|
291
|
+
success: false,
|
|
292
|
+
keywordDir,
|
|
293
|
+
linksPath,
|
|
294
|
+
expectedSearchUrl,
|
|
295
|
+
initialCount,
|
|
296
|
+
finalCount: initialCount,
|
|
297
|
+
addedCount: 0,
|
|
298
|
+
targetCount: targetTotal,
|
|
299
|
+
error: `not_on_search_result_or_keyword_mismatch: ${expectedSearchUrl}`,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
// 1.1) 既有数据的 searchUrl 校验:keyword 一致即可
|
|
304
|
+
for (const e of existing) {
|
|
305
|
+
if (allowedSearchUrls.has(e.searchUrl))
|
|
306
|
+
continue;
|
|
307
|
+
if (!urlKeywordEquals(e.searchUrl, keyword)) {
|
|
308
|
+
await saveDebug('existing_searchurl_mismatch', { expectedSearchUrl, entry: e });
|
|
309
|
+
if (failFast) {
|
|
310
|
+
return {
|
|
311
|
+
success: false,
|
|
312
|
+
keywordDir,
|
|
313
|
+
linksPath,
|
|
314
|
+
expectedSearchUrl,
|
|
315
|
+
initialCount,
|
|
316
|
+
finalCount: initialCount,
|
|
317
|
+
addedCount: 0,
|
|
318
|
+
targetCount: targetTotal,
|
|
319
|
+
error: `existing_searchurl_mismatch: ${e.searchUrl}`,
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
recordSearchUrl(e.searchUrl);
|
|
325
|
+
}
|
|
326
|
+
async function assertSearchUrlStable(tag) {
|
|
327
|
+
const urlNow = await getCurrentUrl();
|
|
328
|
+
if (!allowedSearchUrls.has(urlNow)) {
|
|
329
|
+
await saveDebug(`searchurl_changed_${tag}`, { expectedSearchUrl, urlNow });
|
|
330
|
+
return { url: urlNow };
|
|
331
|
+
}
|
|
332
|
+
return true;
|
|
333
|
+
}
|
|
334
|
+
async function observePageState(tag) {
|
|
335
|
+
try {
|
|
336
|
+
const state = await detectPageState({
|
|
337
|
+
sessionId: profile,
|
|
338
|
+
platform: 'xiaohongshu',
|
|
339
|
+
serviceUrl,
|
|
340
|
+
});
|
|
341
|
+
console.log(`[Phase2Links][state:${tag}] success=${state.success} stage=${state.stage} url=${state.url} root=${state.rootId || 'n/a'} matches=${(state.matchIds || []).join(',')}`);
|
|
342
|
+
return { url: state.url, stage: state.stage };
|
|
343
|
+
}
|
|
344
|
+
catch (error) {
|
|
345
|
+
console.warn(`[Phase2Links][state:${tag}] failed: ${error?.message || error}`);
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
async function retrySearch(tag) {
|
|
350
|
+
if (recoverAttempts >= maxRecoverAttempts)
|
|
351
|
+
return false;
|
|
352
|
+
recoverAttempts += 1;
|
|
353
|
+
const permit = await waitSearchPermit({
|
|
354
|
+
sessionId: profile,
|
|
355
|
+
keyword,
|
|
356
|
+
serviceUrl,
|
|
357
|
+
});
|
|
358
|
+
if (!permit.success || !permit.granted) {
|
|
359
|
+
await saveDebug('search_permit_failed', {
|
|
360
|
+
tag,
|
|
361
|
+
keyword,
|
|
362
|
+
reason: permit.error || permit.reason || 'permit_denied',
|
|
363
|
+
});
|
|
364
|
+
return false;
|
|
365
|
+
}
|
|
366
|
+
const go = await goToSearch({
|
|
367
|
+
sessionId: profile,
|
|
368
|
+
keyword,
|
|
369
|
+
env,
|
|
370
|
+
serviceUrl,
|
|
371
|
+
});
|
|
372
|
+
if (!go.success || !go.searchPageReady) {
|
|
373
|
+
await saveDebug('search_retry_failed', {
|
|
374
|
+
tag,
|
|
375
|
+
keyword,
|
|
376
|
+
error: go.error || 'search_not_ready',
|
|
377
|
+
url: go.url || '',
|
|
378
|
+
});
|
|
379
|
+
return false;
|
|
380
|
+
}
|
|
381
|
+
const urlNow = go.url || (await getCurrentUrl().catch(() => ''));
|
|
382
|
+
if (urlNow)
|
|
383
|
+
recordSearchUrl(urlNow);
|
|
384
|
+
return allowedSearchUrls.has(urlNow);
|
|
385
|
+
}
|
|
386
|
+
async function getListRect() {
|
|
387
|
+
const selector = await listSelectorPromise;
|
|
388
|
+
if (!selector)
|
|
389
|
+
return null;
|
|
390
|
+
const res = await controllerAction('browser:execute', {
|
|
391
|
+
profile,
|
|
392
|
+
script: `(() => {
|
|
393
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
394
|
+
if (!el) return null;
|
|
395
|
+
const r = el.getBoundingClientRect();
|
|
396
|
+
return { x: r.x, y: r.y, width: r.width, height: r.height };
|
|
397
|
+
})()`,
|
|
398
|
+
});
|
|
399
|
+
const payload = res?.result ?? res?.data?.result ?? res ?? null;
|
|
400
|
+
if (!payload ||
|
|
401
|
+
typeof payload.x !== 'number' ||
|
|
402
|
+
typeof payload.y !== 'number' ||
|
|
403
|
+
typeof payload.width !== 'number' ||
|
|
404
|
+
typeof payload.height !== 'number') {
|
|
405
|
+
return null;
|
|
406
|
+
}
|
|
407
|
+
return payload;
|
|
408
|
+
}
|
|
409
|
+
async function probeViewportCandidates(tag) {
|
|
410
|
+
const res = await controllerAction('browser:execute', {
|
|
411
|
+
profile,
|
|
412
|
+
script: `(() => {
|
|
413
|
+
const viewportHeight = window.innerHeight || document.documentElement.clientHeight || 0;
|
|
414
|
+
const viewportWidth = window.innerWidth || document.documentElement.clientWidth || 0;
|
|
415
|
+
const safeTop = 180;
|
|
416
|
+
const safeBottom = 140;
|
|
417
|
+
const safeLeft = 24;
|
|
418
|
+
const safeRight = 24;
|
|
419
|
+
const cards = Array.from(document.querySelectorAll('.note-item'));
|
|
420
|
+
let inViewport = 0;
|
|
421
|
+
for (const card of cards) {
|
|
422
|
+
const rect = card.getBoundingClientRect();
|
|
423
|
+
if (!rect || rect.width <= 0 || rect.height <= 0) continue;
|
|
424
|
+
const cx = rect.x + rect.width / 2;
|
|
425
|
+
const cy = rect.y + rect.height / 2;
|
|
426
|
+
const ok =
|
|
427
|
+
cx >= safeLeft &&
|
|
428
|
+
cx <= (viewportWidth - safeRight) &&
|
|
429
|
+
cy >= safeTop &&
|
|
430
|
+
cy <= (viewportHeight - safeBottom);
|
|
431
|
+
if (ok) inViewport += 1;
|
|
432
|
+
}
|
|
433
|
+
return {
|
|
434
|
+
total: cards.length,
|
|
435
|
+
inViewport,
|
|
436
|
+
viewportHeight,
|
|
437
|
+
viewportWidth,
|
|
438
|
+
};
|
|
439
|
+
})()`,
|
|
440
|
+
});
|
|
441
|
+
const payload = res?.result ?? res?.data?.result ?? res ?? {};
|
|
442
|
+
const listRect = await getListRect().catch(() => null);
|
|
443
|
+
const viewport = {
|
|
444
|
+
width: Number(payload?.viewportWidth ?? 0),
|
|
445
|
+
height: Number(payload?.viewportHeight ?? 0),
|
|
446
|
+
};
|
|
447
|
+
return {
|
|
448
|
+
total: Number(payload?.total ?? 0),
|
|
449
|
+
inViewport: Number(payload?.inViewport ?? 0),
|
|
450
|
+
viewport,
|
|
451
|
+
listRect,
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
async function checkRisk(tag) {
|
|
455
|
+
const urlNow = await getCurrentUrl().catch(() => '');
|
|
456
|
+
if (urlNow.includes('captcha') || urlNow.includes('verify')) {
|
|
457
|
+
await saveDebug('risk_captcha_url', { tag, urlNow });
|
|
458
|
+
return { ok: false, error: `captcha_url_detected: ${urlNow}` };
|
|
459
|
+
}
|
|
460
|
+
const res = await controllerAction('browser:execute', {
|
|
461
|
+
profile,
|
|
462
|
+
script: `(() => {
|
|
463
|
+
const modal =
|
|
464
|
+
document.querySelector('.r-captcha-modal') ||
|
|
465
|
+
document.querySelector('.captcha-modal-content') ||
|
|
466
|
+
document.querySelector('[class*="captcha-modal"]') ||
|
|
467
|
+
document.querySelector('[class*="captcha"][class*="modal"]');
|
|
468
|
+
const title =
|
|
469
|
+
document.querySelector('.captcha-modal-title') ||
|
|
470
|
+
document.querySelector('.captcha-modal__header .text-h6-bold') ||
|
|
471
|
+
null;
|
|
472
|
+
const modalText = modal ? (modal.textContent || '').trim().slice(0, 120) : '';
|
|
473
|
+
const titleText = title ? (title.textContent || '').trim().slice(0, 120) : '';
|
|
474
|
+
return {
|
|
475
|
+
visible: Boolean(modal || title),
|
|
476
|
+
modalClass: modal && modal.className ? String(modal.className) : '',
|
|
477
|
+
modalText,
|
|
478
|
+
titleText,
|
|
479
|
+
};
|
|
480
|
+
})()`,
|
|
481
|
+
});
|
|
482
|
+
const payload = res?.result ?? res?.data?.result ?? res ?? {};
|
|
483
|
+
if (payload?.visible) {
|
|
484
|
+
await saveDebug('risk_captcha_modal', {
|
|
485
|
+
tag,
|
|
486
|
+
urlNow,
|
|
487
|
+
modalClass: payload?.modalClass ?? '',
|
|
488
|
+
modalText: payload?.modalText ?? '',
|
|
489
|
+
titleText: payload?.titleText ?? '',
|
|
490
|
+
});
|
|
491
|
+
return { ok: false, error: 'captcha_modal_detected' };
|
|
492
|
+
}
|
|
493
|
+
return { ok: true };
|
|
494
|
+
}
|
|
495
|
+
async function recoverToSearch(tag) {
|
|
496
|
+
await observePageState(`${tag}:before`);
|
|
497
|
+
const restore = await restorePhase({
|
|
498
|
+
sessionId: profile,
|
|
499
|
+
phase: 3,
|
|
500
|
+
serviceUrl,
|
|
501
|
+
});
|
|
502
|
+
console.log(`[Phase2Links][restore:${tag}] success=${restore.success} restored=${restore.restored} stage=${restore.finalStage} url=${restore.url}`);
|
|
503
|
+
const after = await observePageState(`${tag}:after`);
|
|
504
|
+
if (after && after.stage === 'search' && allowedSearchUrls.has(after.url)) {
|
|
505
|
+
return true;
|
|
506
|
+
}
|
|
507
|
+
if (failFast)
|
|
508
|
+
return false;
|
|
509
|
+
return retrySearch(`recover_${tag}`);
|
|
510
|
+
}
|
|
511
|
+
async function ensureExitState(tag) {
|
|
512
|
+
const state = await detectPageState({
|
|
513
|
+
sessionId: profile,
|
|
514
|
+
platform: 'xiaohongshu',
|
|
515
|
+
serviceUrl,
|
|
516
|
+
}).catch((error) => {
|
|
517
|
+
console.warn(`[Phase2Links][state:${tag}] failed: ${error?.message || error}`);
|
|
518
|
+
return null;
|
|
519
|
+
});
|
|
520
|
+
if (!state) {
|
|
521
|
+
await saveDebug('exit_state_probe_failed', { tag, expectedSearchUrl });
|
|
522
|
+
return { ok: false, error: 'exit_state_probe_failed' };
|
|
523
|
+
}
|
|
524
|
+
if (state.stage !== 'search' || !allowedSearchUrls.has(state.url)) {
|
|
525
|
+
await saveDebug('exit_state_mismatch', {
|
|
526
|
+
tag,
|
|
527
|
+
expectedSearchUrl,
|
|
528
|
+
stage: state.stage,
|
|
529
|
+
url: state.url,
|
|
530
|
+
rootId: state.rootId,
|
|
531
|
+
matchIds: state.matchIds || [],
|
|
532
|
+
});
|
|
533
|
+
const restored = await recoverToSearch(`exit_state_mismatch_${tag}`);
|
|
534
|
+
if (!restored) {
|
|
535
|
+
return { ok: false, error: 'exit_state_mismatch' };
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
return { ok: true };
|
|
539
|
+
}
|
|
540
|
+
async function ensureViewportCandidates(tag) {
|
|
541
|
+
const probe = await probeViewportCandidates(tag);
|
|
542
|
+
if (probe.inViewport > 0)
|
|
543
|
+
return { ok: true };
|
|
544
|
+
const quickDelays = [3000, 3000];
|
|
545
|
+
for (let i = 0; i < quickDelays.length; i += 1) {
|
|
546
|
+
const delayMs = quickDelays[i];
|
|
547
|
+
await delay(delayMs);
|
|
548
|
+
const retry = await probeViewportCandidates(`${tag}_wait_${delayMs}`);
|
|
549
|
+
if (retry.inViewport > 0) {
|
|
550
|
+
return { ok: true };
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
await saveDebug('viewport_empty', {
|
|
554
|
+
tag,
|
|
555
|
+
probe,
|
|
556
|
+
expectedSearchUrl,
|
|
557
|
+
});
|
|
558
|
+
if (probe.listRect && typeof probe.listRect.y === 'number' && probe.viewport.height > 0) {
|
|
559
|
+
const direction = probe.listRect.y < 0 ? 'up' : 'down';
|
|
560
|
+
await scrollSearchList(direction, 520 + Math.floor(Math.random() * 200));
|
|
561
|
+
}
|
|
562
|
+
else {
|
|
563
|
+
await scrollSearchList('down', 520 + Math.floor(Math.random() * 200));
|
|
564
|
+
}
|
|
565
|
+
await delay(900);
|
|
566
|
+
const after = await probeViewportCandidates(`${tag}_after_scroll`);
|
|
567
|
+
if (after.inViewport > 0)
|
|
568
|
+
return { ok: true };
|
|
569
|
+
const bounced = await bounceScrollOnStuck();
|
|
570
|
+
if (bounced) {
|
|
571
|
+
await delay(900);
|
|
572
|
+
const afterBounce = await probeViewportCandidates(`${tag}_after_bounce`);
|
|
573
|
+
if (afterBounce.inViewport > 0)
|
|
574
|
+
return { ok: true };
|
|
575
|
+
}
|
|
576
|
+
const backoffDelays = [10000, 20000, 50000];
|
|
577
|
+
for (let i = 0; i < backoffDelays.length; i += 1) {
|
|
578
|
+
const delayMs = backoffDelays[i];
|
|
579
|
+
await delay(delayMs);
|
|
580
|
+
const retry = await probeViewportCandidates(`${tag}_backoff_${delayMs}`);
|
|
581
|
+
if (retry.inViewport > 0) {
|
|
582
|
+
return { ok: true };
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
await saveDebug('viewport_empty_after_recover', {
|
|
586
|
+
tag,
|
|
587
|
+
initial: probe,
|
|
588
|
+
after,
|
|
589
|
+
expectedSearchUrl,
|
|
590
|
+
});
|
|
591
|
+
return { ok: false, error: 'viewport_empty' };
|
|
592
|
+
}
|
|
593
|
+
async function ensureSearchExitAndViewport(tag) {
|
|
594
|
+
const risk = await checkRisk(`${tag}:risk`);
|
|
595
|
+
if (!risk.ok)
|
|
596
|
+
return { ok: false, error: risk.error || 'risk_detected' };
|
|
597
|
+
const exitState = await ensureExitState(`${tag}:exit`);
|
|
598
|
+
if (!exitState.ok)
|
|
599
|
+
return exitState;
|
|
600
|
+
const viewport = await ensureViewportCandidates(`${tag}:viewport`);
|
|
601
|
+
if (!viewport.ok)
|
|
602
|
+
return viewport;
|
|
603
|
+
return { ok: true };
|
|
604
|
+
}
|
|
605
|
+
async function scrollSearchList(direction, amount) {
|
|
606
|
+
// 鉁?绯荤粺绾ф粴鍔細浼樺厛璧板鍣?scroll operation锛涘け璐?fallback PageDown/PageUp
|
|
607
|
+
try {
|
|
608
|
+
const op = await controllerAction('container:operation', {
|
|
609
|
+
containerId: listContainerId,
|
|
610
|
+
operationId: 'scroll',
|
|
611
|
+
sessionId: profile,
|
|
612
|
+
config: { direction, amount: Math.min(800, Math.max(120, Math.floor(amount))) },
|
|
613
|
+
});
|
|
614
|
+
const payload = op?.data ?? op;
|
|
615
|
+
const ok = Boolean(payload?.success ?? payload?.data?.success ?? op?.success);
|
|
616
|
+
await delay(1100);
|
|
617
|
+
return ok;
|
|
618
|
+
}
|
|
619
|
+
catch {
|
|
620
|
+
try {
|
|
621
|
+
await controllerAction('keyboard:press', { profileId: profile, key: direction === 'up' ? 'PageUp' : 'PageDown' });
|
|
622
|
+
await delay(1300);
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
catch {
|
|
626
|
+
return false;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
async function bounceScrollOnStuck() {
|
|
631
|
+
for (let attempt = 1; attempt <= 3; attempt += 1) {
|
|
632
|
+
const upCount = 3 + Math.floor(Math.random() * 4);
|
|
633
|
+
for (let i = 0; i < upCount; i += 1) {
|
|
634
|
+
const upOk = await scrollSearchList('up', 520 + Math.floor(Math.random() * 180));
|
|
635
|
+
if (!upOk)
|
|
636
|
+
break;
|
|
637
|
+
await delay(500 + Math.floor(Math.random() * 300));
|
|
638
|
+
}
|
|
639
|
+
for (let i = 0; i < 3; i += 1) {
|
|
640
|
+
const downOk = await scrollSearchList('down', 520 + Math.floor(Math.random() * 200));
|
|
641
|
+
if (!downOk)
|
|
642
|
+
break;
|
|
643
|
+
await delay(600 + Math.floor(Math.random() * 350));
|
|
644
|
+
}
|
|
645
|
+
const retry = await scrollSearchList('down', 800);
|
|
646
|
+
if (retry) {
|
|
647
|
+
console.log(`[Phase2Links] bounce scroll succeeded on attempt ${attempt}`);
|
|
648
|
+
return true;
|
|
649
|
+
}
|
|
650
|
+
console.warn(`[Phase2Links] bounce scroll attempt ${attempt} failed`);
|
|
651
|
+
}
|
|
652
|
+
return false;
|
|
653
|
+
}
|
|
654
|
+
// 2) 閫愬睆閲囬泦锛氭瘡灞忓彧澶勭悊褰撳墠瑙嗗彛鍐呯殑鍗$墖锛屽鐞嗗畬鍐嶆粴鍔ㄤ笅涓€灞?
|
|
655
|
+
let scrollSteps = 0;
|
|
656
|
+
let added = 0;
|
|
657
|
+
while (byNoteId.size < targetTotal && scrollSteps < maxScrollRounds) {
|
|
658
|
+
const stable = await assertSearchUrlStable('before_collect_list');
|
|
659
|
+
if (stable !== true) {
|
|
660
|
+
const restored = await recoverToSearch('searchurl_changed_before_collect');
|
|
661
|
+
if (!restored) {
|
|
662
|
+
return {
|
|
663
|
+
success: false,
|
|
664
|
+
keywordDir,
|
|
665
|
+
linksPath,
|
|
666
|
+
expectedSearchUrl,
|
|
667
|
+
initialCount,
|
|
668
|
+
finalCount: byNoteId.size,
|
|
669
|
+
addedCount: added,
|
|
670
|
+
targetCount: targetTotal,
|
|
671
|
+
error: `searchurl_changed: ${stable.url}`,
|
|
672
|
+
};
|
|
673
|
+
}
|
|
674
|
+
continue;
|
|
675
|
+
}
|
|
676
|
+
const ready = await ensureSearchExitAndViewport('before_collect_list');
|
|
677
|
+
if (!ready.ok) {
|
|
678
|
+
return {
|
|
679
|
+
success: false,
|
|
680
|
+
keywordDir,
|
|
681
|
+
linksPath,
|
|
682
|
+
expectedSearchUrl,
|
|
683
|
+
initialCount,
|
|
684
|
+
finalCount: byNoteId.size,
|
|
685
|
+
addedCount: added,
|
|
686
|
+
targetCount: targetTotal,
|
|
687
|
+
error: ready.error || 'search_exit_state_or_viewport_failed',
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
const remaining = Math.max(0, targetTotal - byNoteId.size);
|
|
691
|
+
const list = await collectSearchList({
|
|
692
|
+
sessionId,
|
|
693
|
+
targetCount: Math.min(remaining, 30),
|
|
694
|
+
maxScrollRounds: 1,
|
|
695
|
+
serviceUrl,
|
|
696
|
+
});
|
|
697
|
+
const stable2 = await assertSearchUrlStable('after_collect_list');
|
|
698
|
+
if (stable2 !== true) {
|
|
699
|
+
const restored = await recoverToSearch('searchurl_changed_after_collect_list');
|
|
700
|
+
if (!restored) {
|
|
701
|
+
return {
|
|
702
|
+
success: false,
|
|
703
|
+
keywordDir,
|
|
704
|
+
linksPath,
|
|
705
|
+
expectedSearchUrl,
|
|
706
|
+
initialCount,
|
|
707
|
+
finalCount: byNoteId.size,
|
|
708
|
+
addedCount: added,
|
|
709
|
+
targetCount: targetTotal,
|
|
710
|
+
error: `searchurl_changed_after_collect_list: ${stable2.url}`,
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
continue;
|
|
714
|
+
}
|
|
715
|
+
if (!list.success || !Array.isArray(list.items) || list.items.length === 0) {
|
|
716
|
+
await saveDebug('collect_search_list_failed', { success: Boolean(list.success), error: list.error || null });
|
|
717
|
+
const readyRetry = await ensureSearchExitAndViewport('collect_list_empty_retry');
|
|
718
|
+
if (readyRetry.ok) {
|
|
719
|
+
const retry = await collectSearchList({
|
|
720
|
+
sessionId,
|
|
721
|
+
targetCount: Math.min(remaining, 30),
|
|
722
|
+
maxScrollRounds: 1,
|
|
723
|
+
serviceUrl,
|
|
724
|
+
});
|
|
725
|
+
if (retry.success && Array.isArray(retry.items) && retry.items.length > 0) {
|
|
726
|
+
console.log('[Phase2Links] collect list retry succeeded after viewport recovery');
|
|
727
|
+
list.items = retry.items;
|
|
728
|
+
}
|
|
729
|
+
else {
|
|
730
|
+
await saveDebug('collect_search_list_retry_failed', { success: Boolean(retry.success), error: retry.error || null });
|
|
731
|
+
return {
|
|
732
|
+
success: false,
|
|
733
|
+
keywordDir,
|
|
734
|
+
linksPath,
|
|
735
|
+
expectedSearchUrl,
|
|
736
|
+
initialCount,
|
|
737
|
+
finalCount: byNoteId.size,
|
|
738
|
+
addedCount: added,
|
|
739
|
+
targetCount: targetTotal,
|
|
740
|
+
error: retry.error || 'CollectSearchListBlock returned no items (retry)',
|
|
741
|
+
};
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
else {
|
|
745
|
+
return {
|
|
746
|
+
success: false,
|
|
747
|
+
keywordDir,
|
|
748
|
+
linksPath,
|
|
749
|
+
expectedSearchUrl,
|
|
750
|
+
initialCount,
|
|
751
|
+
finalCount: byNoteId.size,
|
|
752
|
+
addedCount: added,
|
|
753
|
+
targetCount: targetTotal,
|
|
754
|
+
error: readyRetry.error || 'CollectSearchListBlock returned no items',
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
const candidates = shuffleItems(list.items);
|
|
759
|
+
console.log(`[Phase2Links] viewport candidates=${candidates.length} remaining=${remaining}`);
|
|
760
|
+
for (const item of candidates) {
|
|
761
|
+
if (byNoteId.size >= targetTotal)
|
|
762
|
+
break;
|
|
763
|
+
const domIndex = typeof item.domIndex === 'number' ? item.domIndex : undefined;
|
|
764
|
+
const noteId = typeof item.noteId === 'string' ? item.noteId : undefined;
|
|
765
|
+
const clickRect = item.rect &&
|
|
766
|
+
typeof item.rect.x === 'number' &&
|
|
767
|
+
typeof item.rect.y === 'number' &&
|
|
768
|
+
typeof item.rect.width === 'number' &&
|
|
769
|
+
typeof item.rect.height === 'number'
|
|
770
|
+
? item.rect
|
|
771
|
+
: undefined;
|
|
772
|
+
if (noteId && byNoteId.has(noteId))
|
|
773
|
+
continue;
|
|
774
|
+
const stable3 = await assertSearchUrlStable('before_open_detail');
|
|
775
|
+
if (stable3 !== true) {
|
|
776
|
+
const restored = await recoverToSearch('searchurl_changed_before_open_detail');
|
|
777
|
+
if (!restored) {
|
|
778
|
+
return {
|
|
779
|
+
success: false,
|
|
780
|
+
keywordDir,
|
|
781
|
+
linksPath,
|
|
782
|
+
expectedSearchUrl,
|
|
783
|
+
initialCount,
|
|
784
|
+
finalCount: byNoteId.size,
|
|
785
|
+
addedCount: added,
|
|
786
|
+
targetCount: targetTotal,
|
|
787
|
+
error: `searchurl_changed_before_open_detail: ${stable3.url}`,
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
const riskBeforeOpen = await checkRisk('before_open_detail');
|
|
793
|
+
if (!riskBeforeOpen.ok) {
|
|
794
|
+
return {
|
|
795
|
+
success: false,
|
|
796
|
+
keywordDir,
|
|
797
|
+
linksPath,
|
|
798
|
+
expectedSearchUrl,
|
|
799
|
+
initialCount,
|
|
800
|
+
finalCount: byNoteId.size,
|
|
801
|
+
addedCount: added,
|
|
802
|
+
targetCount: targetTotal,
|
|
803
|
+
error: riskBeforeOpen.error || 'risk_detected_before_open_detail',
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
console.log(`[Phase2Links] click candidate noteId=${noteId || 'unknown'} domIndex=${domIndex ?? 'n/a'} rect=${clickRect ? JSON.stringify(clickRect) : 'n/a'}`);
|
|
807
|
+
const opened = await openDetail({
|
|
808
|
+
sessionId,
|
|
809
|
+
containerId: item.containerId || 'xiaohongshu_search.search_result_item',
|
|
810
|
+
domIndex,
|
|
811
|
+
clickRect,
|
|
812
|
+
expectedNoteId: item.noteId,
|
|
813
|
+
expectedHref: item.hrefAttr,
|
|
814
|
+
debugDir,
|
|
815
|
+
serviceUrl,
|
|
816
|
+
});
|
|
817
|
+
if (!opened.success || !opened.safeDetailUrl || !opened.noteId) {
|
|
818
|
+
await saveDebug('open_detail_failed', { domIndex, expectedNoteId: item.noteId, error: opened.error || null });
|
|
819
|
+
const restored = await recoverToSearch('open_detail_failed');
|
|
820
|
+
if (!restored) {
|
|
821
|
+
return {
|
|
822
|
+
success: false,
|
|
823
|
+
keywordDir,
|
|
824
|
+
linksPath,
|
|
825
|
+
expectedSearchUrl,
|
|
826
|
+
initialCount,
|
|
827
|
+
finalCount: byNoteId.size,
|
|
828
|
+
addedCount: added,
|
|
829
|
+
targetCount: targetTotal,
|
|
830
|
+
error: `open_detail_failed: ${opened.error || 'unknown'}`,
|
|
831
|
+
};
|
|
832
|
+
}
|
|
833
|
+
const afterRecover = await ensureSearchExitAndViewport('after_open_detail_failed');
|
|
834
|
+
if (!afterRecover.ok) {
|
|
835
|
+
return {
|
|
836
|
+
success: false,
|
|
837
|
+
keywordDir,
|
|
838
|
+
linksPath,
|
|
839
|
+
expectedSearchUrl,
|
|
840
|
+
initialCount,
|
|
841
|
+
finalCount: byNoteId.size,
|
|
842
|
+
addedCount: added,
|
|
843
|
+
targetCount: targetTotal,
|
|
844
|
+
error: afterRecover.error || 'search_exit_state_failed_after_open_detail',
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
await delay(900);
|
|
848
|
+
continue;
|
|
849
|
+
}
|
|
850
|
+
if (byNoteId.has(opened.noteId)) {
|
|
851
|
+
const closedDup = await closeDetail({ sessionId, serviceUrl });
|
|
852
|
+
if (!closedDup.success) {
|
|
853
|
+
await saveDebug('close_detail_failed_after_duplicate', { noteId: opened.noteId, error: closedDup.error || null });
|
|
854
|
+
const restored = await recoverToSearch('close_detail_failed_after_duplicate');
|
|
855
|
+
if (!restored) {
|
|
856
|
+
return {
|
|
857
|
+
success: false,
|
|
858
|
+
keywordDir,
|
|
859
|
+
linksPath,
|
|
860
|
+
expectedSearchUrl,
|
|
861
|
+
initialCount,
|
|
862
|
+
finalCount: byNoteId.size,
|
|
863
|
+
addedCount: added,
|
|
864
|
+
targetCount: targetTotal,
|
|
865
|
+
error: `close_detail_failed: ${closedDup.error || 'unknown'}`,
|
|
866
|
+
};
|
|
867
|
+
}
|
|
868
|
+
const afterRecover = await ensureSearchExitAndViewport('after_close_detail_failed_duplicate');
|
|
869
|
+
if (!afterRecover.ok) {
|
|
870
|
+
return {
|
|
871
|
+
success: false,
|
|
872
|
+
keywordDir,
|
|
873
|
+
linksPath,
|
|
874
|
+
expectedSearchUrl,
|
|
875
|
+
initialCount,
|
|
876
|
+
finalCount: byNoteId.size,
|
|
877
|
+
addedCount: added,
|
|
878
|
+
targetCount: targetTotal,
|
|
879
|
+
error: afterRecover.error || 'search_exit_state_failed_after_close_detail_duplicate',
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
await delay(900);
|
|
883
|
+
continue;
|
|
884
|
+
}
|
|
885
|
+
await delay(700);
|
|
886
|
+
continue;
|
|
887
|
+
}
|
|
888
|
+
if (!isValidSafeUrl(opened.safeDetailUrl) || !isValidSearchUrl(expectedSearchUrl, keyword)) {
|
|
889
|
+
await saveDebug('invalid_link', {
|
|
890
|
+
noteId: opened.noteId,
|
|
891
|
+
safeUrl: opened.safeDetailUrl,
|
|
892
|
+
searchUrl: expectedSearchUrl,
|
|
893
|
+
});
|
|
894
|
+
const closedInvalid = await closeDetail({ sessionId, serviceUrl });
|
|
895
|
+
if (!closedInvalid.success) {
|
|
896
|
+
await saveDebug('close_detail_failed_after_invalid', { noteId: opened.noteId, error: closedInvalid.error || null });
|
|
897
|
+
const restored = await recoverToSearch('close_detail_failed_after_invalid');
|
|
898
|
+
if (!restored) {
|
|
899
|
+
return {
|
|
900
|
+
success: false,
|
|
901
|
+
keywordDir,
|
|
902
|
+
linksPath,
|
|
903
|
+
expectedSearchUrl,
|
|
904
|
+
initialCount,
|
|
905
|
+
finalCount: byNoteId.size,
|
|
906
|
+
addedCount: added,
|
|
907
|
+
targetCount: targetTotal,
|
|
908
|
+
error: `close_detail_failed: ${closedInvalid.error || 'unknown'}`,
|
|
909
|
+
};
|
|
910
|
+
}
|
|
911
|
+
const afterRecover = await ensureSearchExitAndViewport('after_close_detail_failed_invalid');
|
|
912
|
+
if (!afterRecover.ok) {
|
|
913
|
+
return {
|
|
914
|
+
success: false,
|
|
915
|
+
keywordDir,
|
|
916
|
+
linksPath,
|
|
917
|
+
expectedSearchUrl,
|
|
918
|
+
initialCount,
|
|
919
|
+
finalCount: byNoteId.size,
|
|
920
|
+
addedCount: added,
|
|
921
|
+
targetCount: targetTotal,
|
|
922
|
+
error: afterRecover.error || 'search_exit_state_failed_after_close_detail_invalid',
|
|
923
|
+
};
|
|
924
|
+
}
|
|
925
|
+
await delay(900);
|
|
926
|
+
continue;
|
|
927
|
+
}
|
|
928
|
+
await delay(700);
|
|
929
|
+
continue;
|
|
930
|
+
}
|
|
931
|
+
const entry = {
|
|
932
|
+
noteId: opened.noteId,
|
|
933
|
+
safeUrl: opened.safeDetailUrl,
|
|
934
|
+
searchUrl: expectedSearchUrl,
|
|
935
|
+
ts: new Date().toISOString(),
|
|
936
|
+
};
|
|
937
|
+
// 杩藉姞鍐欑洏锛堟瘡鏉℃垚鍔熼兘钀界洏锛屼究浜庝腑閫斿穿婧冨悗澧為噺缁х画锛?
|
|
938
|
+
await appendJsonl(linksPath, entry);
|
|
939
|
+
byNoteId.set(entry.noteId, entry);
|
|
940
|
+
added += 1;
|
|
941
|
+
console.log(`[Phase2Links] collected ${byNoteId.size}/${targetTotal}: noteId=${entry.noteId}`);
|
|
942
|
+
const closed = await closeDetail({ sessionId, serviceUrl });
|
|
943
|
+
if (!closed.success) {
|
|
944
|
+
await saveDebug('close_detail_failed', { noteId: entry.noteId, error: closed.error || null });
|
|
945
|
+
const restored = await recoverToSearch('close_detail_failed');
|
|
946
|
+
if (!restored) {
|
|
947
|
+
return {
|
|
948
|
+
success: false,
|
|
949
|
+
keywordDir,
|
|
950
|
+
linksPath,
|
|
951
|
+
expectedSearchUrl,
|
|
952
|
+
initialCount,
|
|
953
|
+
finalCount: byNoteId.size,
|
|
954
|
+
addedCount: added,
|
|
955
|
+
targetCount: targetTotal,
|
|
956
|
+
error: `close_detail_failed: ${closed.error || 'unknown'}`,
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
const afterRecover = await ensureSearchExitAndViewport('after_close_detail_failed');
|
|
960
|
+
if (!afterRecover.ok) {
|
|
961
|
+
return {
|
|
962
|
+
success: false,
|
|
963
|
+
keywordDir,
|
|
964
|
+
linksPath,
|
|
965
|
+
expectedSearchUrl,
|
|
966
|
+
initialCount,
|
|
967
|
+
finalCount: byNoteId.size,
|
|
968
|
+
addedCount: added,
|
|
969
|
+
targetCount: targetTotal,
|
|
970
|
+
error: afterRecover.error || 'search_exit_state_failed_after_close_detail',
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
await delay(900);
|
|
974
|
+
continue;
|
|
975
|
+
}
|
|
976
|
+
await delay(850);
|
|
977
|
+
const exitReady = await ensureSearchExitAndViewport('after_close_detail');
|
|
978
|
+
if (!exitReady.ok) {
|
|
979
|
+
return {
|
|
980
|
+
success: false,
|
|
981
|
+
keywordDir,
|
|
982
|
+
linksPath,
|
|
983
|
+
expectedSearchUrl,
|
|
984
|
+
initialCount,
|
|
985
|
+
finalCount: byNoteId.size,
|
|
986
|
+
addedCount: added,
|
|
987
|
+
targetCount: targetTotal,
|
|
988
|
+
error: exitReady.error || 'search_exit_state_failed_after_close_detail',
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
const stable4 = await assertSearchUrlStable('after_close_detail');
|
|
992
|
+
if (stable4 !== true) {
|
|
993
|
+
const restored = await recoverToSearch('searchurl_changed_after_close_detail');
|
|
994
|
+
if (!restored) {
|
|
995
|
+
return {
|
|
996
|
+
success: false,
|
|
997
|
+
keywordDir,
|
|
998
|
+
linksPath,
|
|
999
|
+
expectedSearchUrl,
|
|
1000
|
+
initialCount,
|
|
1001
|
+
finalCount: byNoteId.size,
|
|
1002
|
+
addedCount: added,
|
|
1003
|
+
targetCount: targetTotal,
|
|
1004
|
+
error: `searchurl_changed_after_close_detail: ${stable4.url}`,
|
|
1005
|
+
};
|
|
1006
|
+
}
|
|
1007
|
+
continue;
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
if (byNoteId.size >= targetTotal)
|
|
1011
|
+
break;
|
|
1012
|
+
// 涓嬩竴灞? scrollSteps += 1;
|
|
1013
|
+
const moved = await scrollSearchList('down', 800);
|
|
1014
|
+
if (!moved) {
|
|
1015
|
+
const bounced = await bounceScrollOnStuck();
|
|
1016
|
+
if (!bounced) {
|
|
1017
|
+
await saveDebug('scroll_failed', { scrollSteps, collected: byNoteId.size, reason: 'bounce_exhausted' });
|
|
1018
|
+
break;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
await delay(800);
|
|
1022
|
+
}
|
|
1023
|
+
const finalCount = byNoteId.size;
|
|
1024
|
+
if (finalCount !== targetTotal) {
|
|
1025
|
+
await saveDebug('target_not_reached', { finalCount, targetCount: targetTotal, expectedSearchUrl });
|
|
1026
|
+
return {
|
|
1027
|
+
success: false,
|
|
1028
|
+
keywordDir,
|
|
1029
|
+
linksPath,
|
|
1030
|
+
expectedSearchUrl,
|
|
1031
|
+
initialCount,
|
|
1032
|
+
finalCount,
|
|
1033
|
+
addedCount: added,
|
|
1034
|
+
targetCount: targetTotal,
|
|
1035
|
+
error: `target_not_reached: ${finalCount}/${targetTotal}`,
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
return {
|
|
1039
|
+
success: true,
|
|
1040
|
+
keywordDir,
|
|
1041
|
+
linksPath,
|
|
1042
|
+
expectedSearchUrl,
|
|
1043
|
+
initialCount,
|
|
1044
|
+
finalCount,
|
|
1045
|
+
addedCount: added,
|
|
1046
|
+
targetCount: targetTotal,
|
|
1047
|
+
};
|
|
1048
|
+
}
|
|
1049
|
+
//# sourceMappingURL=XiaohongshuCollectLinksBlock.js.map
|