@web-auto/webauto 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/desktop-console/default-settings.json +1 -0
- package/apps/desktop-console/dist/main/index.mjs +1618 -0
- package/apps/desktop-console/{src → dist}/main/preload.mjs +10 -0
- package/apps/desktop-console/dist/renderer/index.js +3063 -0
- package/apps/desktop-console/entry/ui-console.mjs +299 -0
- package/apps/webauto/entry/account.mjs +356 -0
- package/apps/webauto/entry/lib/account-detect.mjs +160 -0
- package/apps/webauto/entry/lib/account-store.mjs +587 -0
- package/apps/webauto/entry/lib/profilepool.mjs +1 -1
- package/apps/webauto/entry/xhs-install.mjs +27 -3
- package/apps/webauto/entry/xhs-status.mjs +152 -0
- package/apps/webauto/entry/xhs-unified.mjs +595 -17
- package/bin/webauto.mjs +263 -15
- package/dist/apps/webauto/server.js +66 -0
- package/dist/modules/camo-backend/src/index.js +575 -0
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +817 -0
- package/dist/modules/camo-backend/src/internal/ElementRegistry.js +61 -0
- package/dist/modules/camo-backend/src/internal/ProfileLock.js +85 -0
- package/dist/modules/camo-backend/src/internal/SessionManager.js +172 -0
- package/dist/modules/camo-backend/src/internal/container-matcher.js +852 -0
- package/dist/modules/camo-backend/src/internal/engine-manager.js +258 -0
- package/dist/modules/camo-backend/src/internal/fingerprint.js +203 -0
- package/dist/modules/camo-backend/src/internal/pageRuntime.js +29 -0
- package/dist/modules/camo-backend/src/internal/runtimeInjector.js +30 -0
- package/dist/modules/camo-backend/src/internal/state-bus.js +46 -0
- package/dist/modules/camo-backend/src/internal/storage-paths.js +36 -0
- package/dist/modules/camo-backend/src/internal/ws-server.js +1202 -0
- package/dist/modules/camo-runtime/src/utils/browser-service.mjs +423 -0
- package/dist/modules/camo-runtime/src/utils/config.mjs +77 -0
- package/dist/modules/container-registry/src/index.js +184 -0
- package/dist/modules/logging/src/index.js +92 -0
- package/dist/modules/operations/src/builtin.js +27 -0
- package/dist/modules/operations/src/container-binding.js +75 -0
- package/dist/modules/operations/src/executor.js +146 -0
- package/dist/modules/operations/src/operations/click.js +167 -0
- package/dist/modules/operations/src/operations/extract.js +204 -0
- package/dist/modules/operations/src/operations/find-child.js +17 -0
- package/dist/modules/operations/src/operations/highlight.js +138 -0
- package/dist/modules/operations/src/operations/key.js +61 -0
- package/dist/modules/operations/src/operations/navigate.js +148 -0
- package/dist/modules/operations/src/operations/scroll.js +126 -0
- package/dist/modules/operations/src/operations/type.js +190 -0
- package/dist/modules/operations/src/queue.js +100 -0
- package/dist/modules/operations/src/registry.js +11 -0
- package/dist/modules/operations/src/system/mouse.js +33 -0
- package/dist/modules/state/src/atomic-json.js +33 -0
- package/dist/modules/workflow/blocks/AnchorVerificationBlock.js +71 -0
- package/dist/modules/workflow/blocks/BehaviorRandomizer.js +26 -0
- package/dist/modules/workflow/blocks/CallWorkflowBlock.js +38 -0
- package/dist/modules/workflow/blocks/CloseDetailBlock.js +209 -0
- package/dist/modules/workflow/blocks/CollectBatch.js +137 -0
- package/dist/modules/workflow/blocks/CollectCommentsBlock.js +415 -0
- package/dist/modules/workflow/blocks/CollectSearchListBlock.js +599 -0
- package/dist/modules/workflow/blocks/CollectWeiboPosts.js +229 -0
- package/dist/modules/workflow/blocks/DetectPageStateBlock.js +259 -0
- package/dist/modules/workflow/blocks/EnsureLoginBlock.js +162 -0
- package/dist/modules/workflow/blocks/EnsureSession.js +426 -0
- package/dist/modules/workflow/blocks/ErrorClassifier.js +164 -0
- package/dist/modules/workflow/blocks/ErrorRecoveryBlock.js +319 -0
- package/dist/modules/workflow/blocks/ExpandCommentsBlock.js +1032 -0
- package/dist/modules/workflow/blocks/ExtractDetailBlock.js +310 -0
- package/dist/modules/workflow/blocks/ExtractPostFields.js +88 -0
- package/dist/modules/workflow/blocks/GenerateSmartReplyBlock.js +68 -0
- package/dist/modules/workflow/blocks/GoToSearchBlock.js +497 -0
- package/dist/modules/workflow/blocks/GracefulFallbackBlock.js +104 -0
- package/dist/modules/workflow/blocks/HighlightBlock.js +66 -0
- package/dist/modules/workflow/blocks/InitAutoScroll.js +65 -0
- package/dist/modules/workflow/blocks/LoadContainerDefinition.js +50 -0
- package/dist/modules/workflow/blocks/LoadContainerIndex.js +43 -0
- package/dist/modules/workflow/blocks/LocateAndGuardBlock.js +176 -0
- package/dist/modules/workflow/blocks/LoginRecoveryBlock.js +242 -0
- package/dist/modules/workflow/blocks/MatchContainers.js +64 -0
- package/dist/modules/workflow/blocks/MonitoringBlock.js +190 -0
- package/dist/modules/workflow/blocks/OpenDetailBlock.js +1240 -0
- package/dist/modules/workflow/blocks/OrganizeXhsNotesBlock.js +117 -0
- package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +270 -0
- package/dist/modules/workflow/blocks/PickSinglePost.js +69 -0
- package/dist/modules/workflow/blocks/ProgressTracker.js +125 -0
- package/dist/modules/workflow/blocks/RecordFixtureBlock.js +44 -0
- package/dist/modules/workflow/blocks/RenderMarkdown.js +48 -0
- package/dist/modules/workflow/blocks/SaveFile.js +54 -0
- package/dist/modules/workflow/blocks/ScrollNextBatch.js +72 -0
- package/dist/modules/workflow/blocks/SessionHealthBlock.js +73 -0
- package/dist/modules/workflow/blocks/StartBrowserService.js +45 -0
- package/dist/modules/workflow/blocks/ValidateContainerDefinition.js +67 -0
- package/dist/modules/workflow/blocks/ValidateExtract.js +35 -0
- package/dist/modules/workflow/blocks/WaitSearchPermitBlock.js +162 -0
- package/dist/modules/workflow/blocks/WaitStable.js +74 -0
- package/dist/modules/workflow/blocks/WarmupCommentsBlock.js +120 -0
- package/dist/modules/workflow/blocks/WorkflowExecutor.js +156 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectFromLinksBlock.js +1004 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectLinksBlock.js +1049 -0
- package/dist/modules/workflow/blocks/XiaohongshuFullCollectBlock.js +782 -0
- package/dist/modules/workflow/blocks/helpers/anchorVerify.js +198 -0
- package/dist/modules/workflow/blocks/helpers/asyncWorkQueue.js +53 -0
- package/dist/modules/workflow/blocks/helpers/commentScroller.js +334 -0
- package/dist/modules/workflow/blocks/helpers/commentSectionLocator.js +126 -0
- package/dist/modules/workflow/blocks/helpers/containerAnchors.js +301 -0
- package/dist/modules/workflow/blocks/helpers/debugArtifacts.js +6 -0
- package/dist/modules/workflow/blocks/helpers/downloadPaths.js +29 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsController.js +53 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsExtractor.js +129 -0
- package/dist/modules/workflow/blocks/helpers/macosVisionOcrPlugin.js +116 -0
- package/dist/modules/workflow/blocks/helpers/mergeXhsMarkdown.js +109 -0
- package/dist/modules/workflow/blocks/helpers/openDetailController.js +56 -0
- package/dist/modules/workflow/blocks/helpers/openDetailTypes.js +7 -0
- package/dist/modules/workflow/blocks/helpers/openDetailViewport.js +474 -0
- package/dist/modules/workflow/blocks/helpers/openDetailWaiter.js +104 -0
- package/dist/modules/workflow/blocks/helpers/operationLogger.js +195 -0
- package/dist/modules/workflow/blocks/helpers/persistedNotes.js +107 -0
- package/dist/modules/workflow/blocks/helpers/replyExpander.js +260 -0
- package/dist/modules/workflow/blocks/helpers/scrollIntoView.js +138 -0
- package/dist/modules/workflow/blocks/helpers/searchExecutor.js +328 -0
- package/dist/modules/workflow/blocks/helpers/searchGate.js +46 -0
- package/dist/modules/workflow/blocks/helpers/searchPageState.js +164 -0
- package/dist/modules/workflow/blocks/helpers/searchResultWaiter.js +64 -0
- package/dist/modules/workflow/blocks/helpers/simpleAnchor.js +134 -0
- package/dist/modules/workflow/blocks/helpers/smartReply.js +40 -0
- package/dist/modules/workflow/blocks/helpers/systemInput.js +635 -0
- package/dist/modules/workflow/blocks/helpers/targetCountMode.js +9 -0
- package/dist/modules/workflow/blocks/helpers/xhsCliArgs.js +80 -0
- package/dist/modules/workflow/blocks/helpers/xhsCommentDom.js +805 -0
- package/dist/modules/workflow/blocks/helpers/xhsNoteOrganizer.js +140 -0
- package/dist/modules/workflow/blocks/restore/RestorePhaseBlock.js +204 -0
- package/dist/modules/workflow/config/workflowRegistry.js +32 -0
- package/dist/modules/workflow/definitions/batch-collect-workflow.js +63 -0
- package/dist/modules/workflow/definitions/scroll-extract-workflow.js +74 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow-v2.js +81 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow.js +57 -0
- package/dist/modules/workflow/definitions/xiaohongshu-full-collect-workflow-v3.js +68 -0
- package/dist/modules/workflow/definitions/xiaohongshu-note-collect.js +49 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase1-workflow-v3.js +30 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase2-links-workflow-v3.js +40 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase3-collect-workflow-v1.js +54 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase34-from-links-workflow-v3.js +25 -0
- package/dist/modules/workflow/src/WeiboEventDrivenWorkflowRunner.js +308 -0
- package/dist/modules/workflow/src/context.js +70 -0
- package/dist/modules/workflow/src/index.js +5 -0
- package/dist/modules/workflow/src/orchestrator.js +230 -0
- package/dist/modules/workflow/src/runner.js +55 -0
- package/dist/modules/workflow/src/runtime.js +70 -0
- package/dist/modules/workflow/workflows/WeiboFeedExtractionWorkflow.js +359 -0
- package/dist/modules/workflow/workflows/XiaohongshuLoginWorkflow.js +110 -0
- package/dist/modules/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
- package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
- package/dist/modules/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +42 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
- package/dist/modules/xiaohongshu/app/src/index.js +9 -0
- package/dist/modules/xiaohongshu/app/src/utils/checkpoints.js +222 -0
- package/dist/modules/xiaohongshu/app/src/utils/controllerAction.js +43 -0
- package/dist/services/controller/src/controller.js +1476 -0
- package/dist/services/controller/src/index.js +2 -0
- package/dist/services/controller/src/payload-normalizer.js +129 -0
- package/dist/services/shared/heartbeat.js +120 -0
- package/dist/services/shared/lib/errorHandler.js +2 -0
- package/dist/services/shared/serviceProcessLogger.js +139 -0
- package/dist/services/unified-api/RemoteBrowserSession.js +176 -0
- package/dist/services/unified-api/RemoteSessionManager.js +148 -0
- package/dist/services/unified-api/container-operations-handler.js +115 -0
- package/dist/services/unified-api/server.js +652 -0
- package/dist/services/unified-api/state-registry.js +274 -0
- package/dist/services/unified-api/task-persistence.js +66 -0
- package/dist/services/unified-api/task-state.js +130 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +12 -5
- package/modules/xiaohongshu/app/pnpm-lock.yaml +24 -0
- package/package.json +38 -10
- package/.beads/README.md +0 -81
- package/.beads/config.yaml +0 -67
- package/.beads/interactions.jsonl +0 -0
- package/.beads/issues.jsonl +0 -180
- package/.beads/metadata.json +0 -4
- package/.claude/settings.local.json +0 -10
- package/.github/workflows/ci.yml +0 -55
- package/AGENTS.md +0 -253
- package/apps/desktop-console/README.md +0 -27
- package/apps/desktop-console/package-lock.json +0 -897
- package/apps/desktop-console/package.json +0 -20
- package/apps/desktop-console/scripts/build-and-install.mjs +0 -19
- package/apps/desktop-console/scripts/build.mjs +0 -45
- package/apps/desktop-console/scripts/test-preload.mjs +0 -13
- package/apps/desktop-console/src/main/config.mts +0 -26
- package/apps/desktop-console/src/main/core-daemon-manager.mts +0 -131
- package/apps/desktop-console/src/main/desktop-settings.mts +0 -267
- package/apps/desktop-console/src/main/heartbeat-watchdog.mts +0 -50
- package/apps/desktop-console/src/main/heartbeat-watchdog.test.mts +0 -68
- package/apps/desktop-console/src/main/index-streaming.test.mts +0 -20
- package/apps/desktop-console/src/main/index.mts +0 -980
- package/apps/desktop-console/src/main/profile-store.mts +0 -239
- package/apps/desktop-console/src/main/profile-store.test.mts +0 -54
- package/apps/desktop-console/src/main/state-bridge.mts +0 -114
- package/apps/desktop-console/src/main/task-state-types.ts +0 -32
- package/apps/desktop-console/src/renderer/hooks/use-task-state.mts +0 -120
- package/apps/desktop-console/src/renderer/index.mts +0 -133
- package/apps/desktop-console/src/renderer/index.test.mts +0 -34
- package/apps/desktop-console/src/renderer/path-helpers.mts +0 -46
- package/apps/desktop-console/src/renderer/path-helpers.test.mts +0 -14
- package/apps/desktop-console/src/renderer/tabs/debug.mts +0 -48
- package/apps/desktop-console/src/renderer/tabs/debug.test.mts +0 -22
- package/apps/desktop-console/src/renderer/tabs/logs.mts +0 -421
- package/apps/desktop-console/src/renderer/tabs/logs.test.mts +0 -27
- package/apps/desktop-console/src/renderer/tabs/preflight.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/preflight.test.mts +0 -33
- package/apps/desktop-console/src/renderer/tabs/profile-pool.mts +0 -213
- package/apps/desktop-console/src/renderer/tabs/results.mts +0 -171
- package/apps/desktop-console/src/renderer/tabs/run.test.mts +0 -63
- package/apps/desktop-console/src/renderer/tabs/runtime.mts +0 -151
- package/apps/desktop-console/src/renderer/tabs/settings.mts +0 -146
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/account-flow.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/guide-browser-check.mts +0 -56
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/helpers.mts +0 -262
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/layout-block.mts +0 -430
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/live-stats.mts +0 -847
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/run-flow.mts +0 -443
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu-state.mts +0 -425
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.mts +0 -497
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.test.mts +0 -291
- package/apps/desktop-console/src/renderer/ui-components.mts +0 -31
- package/docs/README_camoufox_chinese.md +0 -141
- package/docs/USAGE_V3.md +0 -163
- package/docs/arch/OCR_MACOS_PLUGIN.md +0 -39
- package/docs/arch/PORTS.md +0 -40
- package/docs/arch/REGRESSION_CHECKLIST.md +0 -121
- package/docs/arch/SEARCH_GATE.md +0 -224
- package/docs/arch/VIEWPORT_SAFETY.md +0 -182
- package/docs/arch/XIAOHONGSHU_OFFLINE_MOCK_DESIGN.md +0 -267
- package/docs/xiaohongshu-container-driven-summary.md +0 -221
- package/docs/xiaohongshu-full-collect-runbook.md +0 -134
- package/docs/xiaohongshu-next-steps.md +0 -228
- package/docs/xiaohongshu-quickstart.md +0 -73
- package/docs/xiaohongshu-workflow-summary.md +0 -227
- package/modules/container-registry/tests/container-registry.test.ts +0 -16
- package/modules/logging/tests/logging.test.ts +0 -38
- package/modules/operations/tests/operations.test.ts +0 -22
- package/modules/operations/tests/viewport-filter.test.ts +0 -161
- package/modules/operations/tests/visible-only.test.ts +0 -250
- package/modules/session-manager/tests/session-manager.test.ts +0 -23
- package/modules/state/src/atomic-json.test.ts +0 -30
- package/modules/state/src/paths.test.ts +0 -59
- package/modules/state/src/xiaohongshu-collect-state.test.ts +0 -259
- package/modules/workflow/blocks/AnchorVerificationBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/AnchorVerificationBlock.js.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.js.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.js.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.js.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.js.map +0 -1
- package/modules/workflow/blocks/helpers/downloadPaths.test.ts +0 -62
- package/modules/workflow/blocks/helpers/mergeXhsMarkdown.test.ts +0 -121
- package/modules/workflow/blocks/helpers/operationLogger.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/operationLogger.js.map +0 -1
- package/modules/workflow/blocks/helpers/persistedNotes.test.ts +0 -268
- package/modules/workflow/blocks/helpers/searchPageState.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/searchPageState.js.map +0 -1
- package/modules/workflow/blocks/helpers/targetCountMode.test.ts +0 -29
- package/modules/workflow/blocks/helpers/xhsCliArgs.test.ts +0 -75
- package/modules/workflow/tests/smartReply.test.ts +0 -32
- package/modules/xiaohongshu/app/src/blocks/Phase3Interact.matcher.test.ts +0 -33
- package/modules/xiaohongshu/app/src/utils/__tests__/checkpoints.test.ts +0 -141
- package/modules/xiaohongshu/app/tests/commentMatchDsl.test.ts +0 -50
- package/modules/xiaohongshu/app/tests/commentMatcher.test.ts +0 -46
- package/modules/xiaohongshu/app/tests/sharding.test.ts +0 -31
- package/package-scripts.json +0 -8
- package/runtime/infra/utils/README.md +0 -13
- package/runtime/infra/utils/scripts/README.md +0 -0
- package/runtime/infra/utils/scripts/development/eval-in-session.mjs +0 -40
- package/runtime/infra/utils/scripts/development/highlight-search-containers.mjs +0 -35
- package/runtime/infra/utils/scripts/service/kill-port.mjs +0 -24
- package/runtime/infra/utils/scripts/service/start-api.mjs +0 -39
- package/runtime/infra/utils/scripts/service/start-browser-service.mjs +0 -106
- package/runtime/infra/utils/scripts/service/stop-api.mjs +0 -18
- package/runtime/infra/utils/scripts/service/stop-browser-service.mjs +0 -104
- package/runtime/infra/utils/scripts/test-services.mjs +0 -94
- package/services/shared/heartbeat.test.ts +0 -102
- package/services/unified-api/__tests__/task-state.test.ts +0 -95
- package/sitecustomize.py +0 -19
- package/tests/README.md +0 -194
- package/tests/e2e/workflows/weibo-feed-extraction.test.ts +0 -171
- package/tests/fixtures/data/container-definitions.json +0 -67
- package/tests/fixtures/pages/simple-page.html +0 -69
- package/tests/integration/01-test-container-match.mjs +0 -188
- package/tests/integration/02-test-dom-branch.mjs +0 -161
- package/tests/integration/03-test-container-operation-system.mjs +0 -91
- package/tests/integration/05-test-container-lifecycle-events.mjs +0 -224
- package/tests/integration/05-test-container-lifecycle-with-events.mjs +0 -250
- package/tests/integration/06-test-container-dom-tree-drawing.mjs +0 -256
- package/tests/integration/07-test-weibo-container-lifecycle.mjs +0 -355
- package/tests/integration/08-test-weibo-feed-workflow.test.mjs +0 -164
- package/tests/integration/10-test-visual-analyzer.mjs +0 -312
- package/tests/integration/11-test-visual-loop.mjs +0 -284
- package/tests/integration/12-test-simple-visual-loop.mjs +0 -242
- package/tests/integration/13-test-visual-robust.mjs +0 -185
- package/tests/integration/14-test-visual-highlight-loop.mjs +0 -271
- package/tests/integration/inspect-page.mjs +0 -50
- package/tests/integration/run-all-tests.mjs +0 -95
- package/tests/patch_verification/CODEX_PATCH_TEST.md +0 -103
- package/tests/patch_verification/PHASE2_ANALYSIS.md +0 -179
- package/tests/patch_verification/PHASE2_OPTIMIZATION_REPORT.md +0 -55
- package/tests/patch_verification/PHASE2_TO_PHASE4_SUMMARY.md +0 -126
- package/tests/patch_verification/QUICK_TEST_SEQUENCE.md +0 -262
- package/tests/patch_verification/README.md +0 -143
- package/tests/patch_verification/RUN_TESTS.md +0 -60
- package/tests/patch_verification/TEST_EXECUTION.md +0 -99
- package/tests/patch_verification/TEST_PLAN.md +0 -328
- package/tests/patch_verification/TEST_RESULTS.md +0 -34
- package/tests/patch_verification/TOOL_TEST_PLAN.md +0 -48
- package/tests/patch_verification/run-tool-test.mjs +0 -121
- package/tests/patch_verification/temp_test_files/test01.txt +0 -1
- package/tests/patch_verification/temp_test_files/test02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test02_gnu.txt +0 -3
- package/tests/patch_verification/temp_test_files/test03.txt +0 -1
- package/tests/patch_verification/temp_test_files/test03_multiline.txt +0 -5
- package/tests/patch_verification/temp_test_files/test04_function.ts +0 -5
- package/tests/patch_verification/temp_test_files/test05_import.ts +0 -4
- package/tests/patch_verification/temp_test_files/test06_special_chars.txt +0 -4
- package/tests/patch_verification/temp_test_files/test07_indentation.ts +0 -5
- package/tests/patch_verification/temp_test_files/test08_mismatch.txt +0 -1
- package/tests/patch_verification/temp_test_files/test_add_02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test_simple.txt +0 -1
- package/tests/runner/TestReporter.mjs +0 -57
- package/tests/runner/TestRunner.mjs +0 -244
- package/tests/unit/commands/profile.test.mjs +0 -10
- package/tests/unit/container/change-notifier.test.mjs +0 -181
- package/tests/unit/lifecycle/session-registry.test.mjs +0 -135
- package/tests/unit/operations/registry.test.ts +0 -73
- package/tests/unit/utils/browser-service.test.mjs +0 -153
- package/tests/unit/utils/config.test.mjs +0 -166
- package/tests/unit/utils/fingerprint.test.mjs +0 -166
- package/tsconfig.json +0 -31
- package/tsconfig.services.json +0 -26
- /package/apps/desktop-console/{src → dist}/renderer/index.html +0 -0
- /package/apps/desktop-console/{src/renderer/tabs → dist/renderer}/run.mts +0 -0
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
# 小红书 Workflow - 下一步任务清单
|
|
2
|
-
|
|
3
|
-
## ✅ 已完成
|
|
4
|
-
|
|
5
|
-
1. **容器库架构** - 完整实现了搜索页、详情页、评论区等所有容器定义
|
|
6
|
-
2. **Workflow Block** - `XiaohongshuCrawlerBlock.ts` 实现了完整采集逻辑
|
|
7
|
-
3. **Debug 脚本** - 创建了 `debug-xhs-status/search/detail.mjs` 三个阶段测试脚本
|
|
8
|
-
4. **AGENTS.md 规则** - 新增"调试脚本必须保持浏览器会话不被破坏"规则
|
|
9
|
-
|
|
10
|
-
## 📋 当前问题分析
|
|
11
|
-
|
|
12
|
-
### 问题 1: 调试脚本频繁重启浏览器 ❌
|
|
13
|
-
**现状**:
|
|
14
|
-
- `scripts/xiaohongshu-test-comments.mjs` 中有 `startBrowserSession()` 逻辑
|
|
15
|
-
- 每次运行都可能调用 `start-headful.mjs` 启动新会话
|
|
16
|
-
- 破坏了现有 session 状态
|
|
17
|
-
|
|
18
|
-
**需要修改**:
|
|
19
|
-
- `xiaohongshu-test-comments.mjs`
|
|
20
|
-
- 其他可能启动新session的测试脚本
|
|
21
|
-
|
|
22
|
-
### 问题 2: 频繁导航到同一页面 ❌
|
|
23
|
-
**现状**:
|
|
24
|
-
- `debug-xhs-search.mjs` 中有 `ensureSearchPage()` 会直接跳转
|
|
25
|
-
- 没有先检查当前URL是否已经在目标页面
|
|
26
|
-
|
|
27
|
-
**需要修改**:
|
|
28
|
-
- 在导航前先检查 `getCurrentUrl()`
|
|
29
|
-
- 如果已在目标页,只刷新而不重新导航
|
|
30
|
-
|
|
31
|
-
### 问题 3: 脚本不是 unattached 模式 ❌
|
|
32
|
-
**现状**:
|
|
33
|
-
- 测试脚本直接操作浏览器,可能改变会话状态
|
|
34
|
-
- 没有明确的"只读"或"非侵入"模式
|
|
35
|
-
|
|
36
|
-
**需要改进**:
|
|
37
|
-
- 明确标记哪些操作是只读的(如 status 检查)
|
|
38
|
-
- 哪些操作会改变状态(如 search、navigate)
|
|
39
|
-
- 提供恢复机制(如记录初始 URL,测试后恢复)
|
|
40
|
-
|
|
41
|
-
## 🎯 下一步任务(按优先级排序)
|
|
42
|
-
|
|
43
|
-
### Task 1: 修改现有调试脚本为 unattached 模式 ⏳
|
|
44
|
-
|
|
45
|
-
**目标**: 让调试脚本复用现有 session,不重启浏览器
|
|
46
|
-
|
|
47
|
-
**子任务**:
|
|
48
|
-
|
|
49
|
-
1. **修改 `debug-xhs-status.mjs`** ✅
|
|
50
|
-
- ✅ 已经符合要求:仅读取状态,不改变页面
|
|
51
|
-
- ✅ 使用现有 PROFILE,不启动新session
|
|
52
|
-
|
|
53
|
-
2. **修改 `debug-xhs-search.mjs`** ⏳
|
|
54
|
-
- [ ] `ensureSearchPage()` 改为先检查当前URL
|
|
55
|
-
- [ ] 如果已在搜索页,优先使用 `location.reload()` 而非重新导航
|
|
56
|
-
- [ ] 记录初始URL,测试完成后可选恢复
|
|
57
|
-
|
|
58
|
-
3. **修改 `debug-xhs-detail.mjs`** ⏳
|
|
59
|
-
- [ ] 检查当前是否已在搜索页
|
|
60
|
-
- [ ] 测试完成后关闭详情模态,恢复到搜索页
|
|
61
|
-
- [ ] 不强制导航,优先使用现有页面状态
|
|
62
|
-
|
|
63
|
-
4. **修改 `xiaohongshu-test-comments.mjs`** ⏳
|
|
64
|
-
- [ ] 移除 `startBrowserSession()` 自动启动逻辑
|
|
65
|
-
- [ ] 改为检测session不存在时,提示用户手动启动
|
|
66
|
-
- [ ] 或者提供 `--ensure-session` flag,明确需要时才启动
|
|
67
|
-
|
|
68
|
-
**修改原则**:
|
|
69
|
-
```javascript
|
|
70
|
-
// ❌ 旧方式 - 直接导航
|
|
71
|
-
async function ensureSearchPage() {
|
|
72
|
-
await controllerAction('browser:execute', {
|
|
73
|
-
script: `window.location.href = 'https://www.xiaohongshu.com'`
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// ✅ 新方式 - 检查后刷新或导航
|
|
78
|
-
async function ensureSearchPage() {
|
|
79
|
-
const url = await getCurrentUrl();
|
|
80
|
-
if (url.includes('xiaohongshu.com/search_result')) {
|
|
81
|
-
console.log(' ✅ 已在搜索页,刷新...');
|
|
82
|
-
await controllerAction('browser:execute', {
|
|
83
|
-
script: 'location.reload()'
|
|
84
|
-
});
|
|
85
|
-
} else if (url.includes('xiaohongshu.com')) {
|
|
86
|
-
console.log(' ⚠️ 在小红书其他页面,导航到搜索...');
|
|
87
|
-
await controllerAction('browser:execute', {
|
|
88
|
-
script: `window.location.href = 'https://www.xiaohongshu.com/search_result?...'`
|
|
89
|
-
});
|
|
90
|
-
} else {
|
|
91
|
-
console.log(' ❌ 不在小红书页面,请先手动导航');
|
|
92
|
-
process.exit(1);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
### Task 2: 创建 Session 检查脚本 ⏳
|
|
98
|
-
|
|
99
|
-
**目标**: 提供统一的 session 状态检查工具
|
|
100
|
-
|
|
101
|
-
**文件**: `scripts/check-xiaohongshu-session.mjs`
|
|
102
|
-
|
|
103
|
-
**功能**:
|
|
104
|
-
- 检查 `xiaohongshu_fresh` session 是否存在
|
|
105
|
-
- 显示当前 URL
|
|
106
|
-
- 显示登录状态
|
|
107
|
-
- 显示 Cookie 过期时间
|
|
108
|
-
- 给出启动建议(如果 session 不存在)
|
|
109
|
-
|
|
110
|
-
**用法**:
|
|
111
|
-
```bash
|
|
112
|
-
node scripts/check-xiaohongshu-session.mjs
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
### Task 3: 更新测试脚本文档 ⏳
|
|
116
|
-
|
|
117
|
-
**目标**: 明确测试流程和最佳实践
|
|
118
|
-
|
|
119
|
-
**文件**: `docs/testing-xiaohongshu.md`
|
|
120
|
-
|
|
121
|
-
**内容**:
|
|
122
|
-
1. **Session 管理规范**
|
|
123
|
-
- 启动 session:`node scripts/start-headful.mjs --profile xiaohongshu_fresh --url https://www.xiaohongshu.com`
|
|
124
|
-
- 检查 session:`node scripts/check-xiaohongshu-session.mjs`
|
|
125
|
-
- Session 应保持运行,不要频繁重启
|
|
126
|
-
|
|
127
|
-
2. **测试流程**
|
|
128
|
-
```bash
|
|
129
|
-
# 1. 启动 session(仅首次或session丢失时)
|
|
130
|
-
node scripts/start-headful.mjs --profile xiaohongshu_fresh --url https://www.xiaohongshu.com
|
|
131
|
-
|
|
132
|
-
# 2. 运行阶段测试(session 保持运行)
|
|
133
|
-
node scripts/debug-xhs-status.mjs # Step 1: 状态诊断
|
|
134
|
-
node scripts/debug-xhs-search.mjs # Step 2: 搜索验证
|
|
135
|
-
node scripts/debug-xhs-detail.mjs # Step 3: 详情页交互
|
|
136
|
-
|
|
137
|
-
# 3. 完整 workflow 测试
|
|
138
|
-
node scripts/run-xiaohongshu-workflow.ts --keyword "oppo小平板" --count 5
|
|
139
|
-
```
|
|
140
|
-
|
|
141
|
-
3. **调试技巧**
|
|
142
|
-
- 使用 Bus 订阅监听事件:`wscat -c ws://127.0.0.1:7701/bus`
|
|
143
|
-
- 查看容器匹配日志:检查 `container:match` 事件
|
|
144
|
-
- 截图调试:在脚本中增加 `takeScreenshot()` 调用
|
|
145
|
-
|
|
146
|
-
### Task 4: 优化 XiaohongshuCrawlerBlock ⏳
|
|
147
|
-
|
|
148
|
-
**目标**: 根据实际测试结果优化 Block 逻辑
|
|
149
|
-
|
|
150
|
-
**待优化点**:
|
|
151
|
-
1. **Context Destroyed 处理**
|
|
152
|
-
- [ ] 增加 retry 机制
|
|
153
|
-
- [ ] 优化 `waitForDetailContext()` 的轮询策略
|
|
154
|
-
|
|
155
|
-
2. **评论展开优化**
|
|
156
|
-
- [ ] 根据实际 DOM 调整滚动距离和次数
|
|
157
|
-
- [ ] 优化 `show_more_button` 的查找逻辑
|
|
158
|
-
|
|
159
|
-
3. **图片下载优化**
|
|
160
|
-
- [ ] 增加并发控制
|
|
161
|
-
- [ ] 优化重试策略
|
|
162
|
-
|
|
163
|
-
4. **性能优化**
|
|
164
|
-
- [ ] 减少不必要的等待时间
|
|
165
|
-
- [ ] 并行处理部分操作
|
|
166
|
-
|
|
167
|
-
### Task 5: 小规模验证测试 ⏳
|
|
168
|
-
|
|
169
|
-
**目标**: 运行完整 workflow,采集 5 条数据验证
|
|
170
|
-
|
|
171
|
-
**步骤**:
|
|
172
|
-
```bash
|
|
173
|
-
# 1. 确保 session 运行
|
|
174
|
-
node scripts/check-xiaohongshu-session.mjs
|
|
175
|
-
|
|
176
|
-
# 2. 运行 workflow(小规模)
|
|
177
|
-
node scripts/run-xiaohongshu-workflow.ts --keyword "手机膜" --count 5
|
|
178
|
-
|
|
179
|
-
# 3. 检查输出
|
|
180
|
-
ls -la ~/.webauto/download/xiaohongshu/手机膜/
|
|
181
|
-
|
|
182
|
-
# 4. 验证数据完整性
|
|
183
|
-
# - Markdown 格式正确
|
|
184
|
-
# - 图片下载成功
|
|
185
|
-
# - 评论数据完整
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
## 🔄 迭代计划
|
|
189
|
-
|
|
190
|
-
### 第一轮:基础功能验证(本周)
|
|
191
|
-
- [x] 创建调试脚本
|
|
192
|
-
- [x] 补充 AGENTS.md 规则
|
|
193
|
-
- [ ] 修改脚本为 unattached 模式
|
|
194
|
-
- [ ] 运行 5 条数据测试
|
|
195
|
-
|
|
196
|
-
### 第二轮:稳定性优化(下周)
|
|
197
|
-
- [ ] 优化 Context Destroyed 问题
|
|
198
|
-
- [ ] 优化评论展开逻辑
|
|
199
|
-
- [ ] 增加错误恢复机制
|
|
200
|
-
- [ ] 运行 50 条数据测试
|
|
201
|
-
|
|
202
|
-
### 第三轮:性能优化(后续)
|
|
203
|
-
- [ ] 并行采集优化
|
|
204
|
-
- [ ] 图片下载优化
|
|
205
|
-
- [ ] 增加增量采集支持
|
|
206
|
-
- [ ] 运行 200+ 条数据测试
|
|
207
|
-
|
|
208
|
-
## 📊 预期成果
|
|
209
|
-
|
|
210
|
-
- ✅ 调试流程清晰,session 状态稳定
|
|
211
|
-
- ✅ 容器匹配成功率 > 95%
|
|
212
|
-
- ✅ 评论展开成功率 > 90%
|
|
213
|
-
- ✅ 图片下载成功率 > 85%
|
|
214
|
-
- ✅ 平均每条数据采集时间 < 30秒
|
|
215
|
-
|
|
216
|
-
## 🚨 风险提示
|
|
217
|
-
|
|
218
|
-
1. **反爬策略变化**:小红书可能随时调整 DOM 结构或增加反爬措施
|
|
219
|
-
2. **评论加载时机**:动态加载的评论可能需要更长等待时间
|
|
220
|
-
3. **图片防盗链**:图片 URL 可能短期有效,需要及时下载
|
|
221
|
-
|
|
222
|
-
## 📝 相关文档
|
|
223
|
-
|
|
224
|
-
- `container-library/xiaohongshu/README.md` - 容器定义
|
|
225
|
-
- `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts` - 采集逻辑
|
|
226
|
-
- `task.md` - 任务追踪
|
|
227
|
-
- `AGENTS.md` - 架构规则
|
|
228
|
-
- `docs/xiaohongshu-workflow-summary.md` - 实施总结
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# 小红书采集快速开始(新架构)
|
|
2
|
-
|
|
3
|
-
## 1. 安装与检查
|
|
4
|
-
|
|
5
|
-
```bash
|
|
6
|
-
npm install
|
|
7
|
-
npm run build:services
|
|
8
|
-
node scripts/xiaohongshu/install.mjs --check
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
## 2. 登录准备(必须)
|
|
12
|
-
|
|
13
|
-
首次运行建议先在可视模式完成登录。
|
|
14
|
-
|
|
15
|
-
```bash
|
|
16
|
-
node scripts/xiaohongshu/phase1-boot.mjs --profile xiaohongshu-batch-1 --headless false
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
## 3. 执行全流程
|
|
20
|
-
|
|
21
|
-
### 方式 A:编排入口(推荐)
|
|
22
|
-
|
|
23
|
-
```bash
|
|
24
|
-
node scripts/xiaohongshu/phase-orchestrate.mjs \
|
|
25
|
-
--mode phase1-phase2-unified \
|
|
26
|
-
--profile xiaohongshu-batch-1 \
|
|
27
|
-
--keyword "工作服定制" \
|
|
28
|
-
--target 50 \
|
|
29
|
-
--env debug \
|
|
30
|
-
--headless false
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
### 方式 B:仅运行 unified(autoscript)
|
|
34
|
-
|
|
35
|
-
```bash
|
|
36
|
-
node scripts/xiaohongshu/phase-unified-harvest.mjs \
|
|
37
|
-
--profile xiaohongshu-batch-1 \
|
|
38
|
-
--keyword "工作服定制" \
|
|
39
|
-
--max-notes 50 \
|
|
40
|
-
--do-comments true \
|
|
41
|
-
--do-likes true \
|
|
42
|
-
--like-keywords "真敬业" \
|
|
43
|
-
--headless false
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
## 4. 查看进度与状态
|
|
47
|
-
|
|
48
|
-
```bash
|
|
49
|
-
# 状态摘要
|
|
50
|
-
node scripts/xiaohongshu/state.mjs show --keyword "工作服定制" --env debug
|
|
51
|
-
|
|
52
|
-
# 状态 JSON
|
|
53
|
-
node scripts/xiaohongshu/state.mjs show --keyword "工作服定制" --env debug --json
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
## 5. 输出目录
|
|
57
|
-
|
|
58
|
-
```text
|
|
59
|
-
~/.webauto/download/xiaohongshu/{env}/{keyword}/
|
|
60
|
-
├── phase2-links.jsonl
|
|
61
|
-
├── .collect-state.json
|
|
62
|
-
├── run*.log / run-events*.jsonl
|
|
63
|
-
└── {noteId}/
|
|
64
|
-
├── README.md
|
|
65
|
-
├── comments.md
|
|
66
|
-
└── images/
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
## 注意事项
|
|
70
|
-
|
|
71
|
-
- 详情页必须通过页面点击进入,禁止 URL 直跳(避免 `xsec_token` 风险)。
|
|
72
|
-
- 调试阶段建议 `--headless false`,便于观察容器匹配与动画时序。
|
|
73
|
-
- 若需要仅采集链接,可单独运行 `phase2-collect.mjs`。
|
|
@@ -1,227 +0,0 @@
|
|
|
1
|
-
# 小红书 Workflow 实施总结
|
|
2
|
-
|
|
3
|
-
## ✅ 已完成工作
|
|
4
|
-
|
|
5
|
-
### 1. 容器库架构
|
|
6
|
-
根据 `container-library/xiaohongshu/README.md`,完整实现了以下容器:
|
|
7
|
-
|
|
8
|
-
#### 根容器
|
|
9
|
-
- `xiaohongshu_search`: 搜索结果页 (`.feeds-page`)
|
|
10
|
-
- `xiaohongshu_detail`: 笔记详情页 (`.note-detail-mask`)
|
|
11
|
-
- `xiaohongshu_login`: 登录页
|
|
12
|
-
- `xiaohongshu_home`: 主页/推荐流
|
|
13
|
-
|
|
14
|
-
#### 搜索页容器层次
|
|
15
|
-
```
|
|
16
|
-
xiaohongshu_search/
|
|
17
|
-
├── login_anchor/ # 登录锚点
|
|
18
|
-
├── search_bar/ # 搜索框
|
|
19
|
-
└── search_result_list/ # 结果列表
|
|
20
|
-
└── search_result_item/ # 单个结果项
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
**search_result_item 核心能力**:
|
|
24
|
-
- `extract`: 提取 title/link/detail_url/image/text/note_id/xsec_token
|
|
25
|
-
- `navigate`: 直接读取 `a[href*='/explore/']` 并执行 `window.location.href`
|
|
26
|
-
- `click`: 点击图片获取带 xtoken 的 URL
|
|
27
|
-
|
|
28
|
-
#### 详情页容器层次
|
|
29
|
-
```
|
|
30
|
-
xiaohongshu_detail/
|
|
31
|
-
├── login_anchor/
|
|
32
|
-
└── modal_shell/ # 详情模态框 (.note-detail-mask)
|
|
33
|
-
├── header/ # 作者信息
|
|
34
|
-
├── content/ # 正文
|
|
35
|
-
├── gallery/ # 图片区域
|
|
36
|
-
└── comment_section/ # 评论区域
|
|
37
|
-
├── show_more_button/ # 展开更多回复
|
|
38
|
-
├── comment_item/ # 评论项
|
|
39
|
-
├── end_marker/ # "THE END" 标记
|
|
40
|
-
└── empty_state/ # 无评论状态
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
**comment_section 核心能力**:
|
|
44
|
-
- `scroll`: 向下滚动加载更多评论
|
|
45
|
-
- `find-child`: 查找并触发 `show_more_button` 的自动点击
|
|
46
|
-
- 自动检测 `end_marker` 和 `empty_state` 判断评论加载完成
|
|
47
|
-
|
|
48
|
-
### 2. Workflow Block 实现
|
|
49
|
-
|
|
50
|
-
#### XiaohongshuCrawlerBlock
|
|
51
|
-
完整实现的主采集 Block,位于 `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts`:
|
|
52
|
-
|
|
53
|
-
**核心功能**:
|
|
54
|
-
1. **登录守护**: `ensureLoginState()` - 检测登录页并等待人工登录
|
|
55
|
-
2. **搜索管理**: `runSearch()` + `ensureSearchPageContext()`
|
|
56
|
-
3. **列表采集**: `collectSearchItems()` - 基于容器树提取搜索结果
|
|
57
|
-
4. **详情导航**: `openDetailFromItem()` - 使用 `navigate` operation
|
|
58
|
-
5. **评论展开**: `scrollComments()` - 自动滚动并触发展开按钮
|
|
59
|
-
6. **数据提取**: `collectDetailData()` - 提取header/content/gallery/comments
|
|
60
|
-
7. **图片下载**: `saveNoteData()` - 保存 Markdown + 图片
|
|
61
|
-
8. **去重机制**: 基于已存在目录的 note_id 去重
|
|
62
|
-
|
|
63
|
-
**数据流**:
|
|
64
|
-
```
|
|
65
|
-
搜索页 → match SEARCH_ROOT → find SEARCH_LIST → extract SEARCH_ITEM
|
|
66
|
-
↓
|
|
67
|
-
navigate → wait DETAIL_ROOT → find MODAL_SHELL
|
|
68
|
-
↓
|
|
69
|
-
extract HEADER/CONTENT/GALLERY → scroll COMMENT_SECTION → extract COMMENT_ITEM
|
|
70
|
-
↓
|
|
71
|
-
save Markdown + images → close modal → back to search
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
### 3. Debug 脚本(新增)
|
|
75
|
-
|
|
76
|
-
创建了3个原子化调试脚本:
|
|
77
|
-
|
|
78
|
-
#### scripts/debug-xhs-status.mjs
|
|
79
|
-
- 获取当前 URL
|
|
80
|
-
- 截图当前页面
|
|
81
|
-
- 分析 DOM 摘要(`.note-item`、`#search-input`、登录锚点)
|
|
82
|
-
- 高亮关键元素
|
|
83
|
-
|
|
84
|
-
#### scripts/debug-xhs-search.mjs
|
|
85
|
-
- 确保在小红书页面
|
|
86
|
-
- 高亮搜索框
|
|
87
|
-
- 随机选择关键字(oppo小平板/手机膜/雷军/小米/华为/鸿蒙)
|
|
88
|
-
- 执行搜索并等待结果稳定
|
|
89
|
-
|
|
90
|
-
#### scripts/debug-xhs-detail.mjs
|
|
91
|
-
- 获取列表第一个笔记
|
|
92
|
-
- 高亮并打开详情页
|
|
93
|
-
- 检查详情页加载(Modal/Title/Comments)
|
|
94
|
-
- 自动展开评论(滚动 + 点击展开按钮)
|
|
95
|
-
- 统计评论数量和状态
|
|
96
|
-
|
|
97
|
-
### 4. Workflow 定义
|
|
98
|
-
`modules/workflow/definitions/xiaohongshu-collect-workflow.ts`:
|
|
99
|
-
```typescript
|
|
100
|
-
{
|
|
101
|
-
id: 'xiaohongshu-collect',
|
|
102
|
-
name: '小红书关键词采集',
|
|
103
|
-
steps: [
|
|
104
|
-
{ blockName: 'StartBrowserService', ... },
|
|
105
|
-
{ blockName: 'EnsureSession', ... },
|
|
106
|
-
{ blockName: 'XiaohongshuCrawlerBlock', ... }
|
|
107
|
-
]
|
|
108
|
-
}
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
## 📋 调试计划(按 task.md)
|
|
112
|
-
|
|
113
|
-
### Step 1: 状态诊断 ✅
|
|
114
|
-
```bash
|
|
115
|
-
node scripts/debug-xhs-status.mjs
|
|
116
|
-
```
|
|
117
|
-
验证:
|
|
118
|
-
- 当前 URL
|
|
119
|
-
- DOM 结构
|
|
120
|
-
- 关键元素高亮
|
|
121
|
-
|
|
122
|
-
### Step 2: 搜索验证 ⏳
|
|
123
|
-
```bash
|
|
124
|
-
node scripts/debug-xhs-search.mjs
|
|
125
|
-
```
|
|
126
|
-
验证:
|
|
127
|
-
- 搜索框定位
|
|
128
|
-
- 关键字轮换
|
|
129
|
-
- 结果加载
|
|
130
|
-
|
|
131
|
-
### Step 3: 详情页交互 ⏳
|
|
132
|
-
```bash
|
|
133
|
-
node scripts/debug-xhs-detail.mjs
|
|
134
|
-
```
|
|
135
|
-
验证:
|
|
136
|
-
- 详情页打开
|
|
137
|
-
- 评论展开
|
|
138
|
-
- 数据完整性
|
|
139
|
-
|
|
140
|
-
### Step 4: 完整 Workflow ⏳
|
|
141
|
-
```bash
|
|
142
|
-
# 方式1: 直接调用 Block
|
|
143
|
-
node -e "import('./modules/workflow/blocks/XiaohongshuCrawlerBlock.ts').then(m => m.execute({ sessionId: 'xiaohongshu_fresh', keyword: 'oppo小平板', targetCount: 5 }))"
|
|
144
|
-
|
|
145
|
-
# 方式2: 通过 Workflow Runner
|
|
146
|
-
node scripts/run-xiaohongshu-workflow.ts
|
|
147
|
-
```
|
|
148
|
-
|
|
149
|
-
## 🔧 技术栈
|
|
150
|
-
|
|
151
|
-
- **统一 API**: `http://127.0.0.1:7701` (HTTP/WS/Bus)
|
|
152
|
-
- **Browser Service**: `http://127.0.0.1:7704` + `ws://127.0.0.1:8765`
|
|
153
|
-
- **容器操作**: `/v1/container/<containerId>/execute`
|
|
154
|
-
- **Controller 动作**: `/v1/controller/action`
|
|
155
|
-
- **事件总线**: `ws://127.0.0.1:7701/bus` (订阅 `container:*`/`ui:*`)
|
|
156
|
-
|
|
157
|
-
## 🚨 已知问题与对策
|
|
158
|
-
|
|
159
|
-
### 1. Navigation Context Destroyed
|
|
160
|
-
**问题**: 页面跳转时脚本执行被中断
|
|
161
|
-
**对策**:
|
|
162
|
-
- `waitForDetailContext()` - 轮询等待详情容器出现
|
|
163
|
-
- `ensureSearchPageContext()` - 确保回到搜索页后重新匹配
|
|
164
|
-
|
|
165
|
-
### 2. 评论展开时机
|
|
166
|
-
**问题**: 动态加载的评论需要滚动触发
|
|
167
|
-
**对策**:
|
|
168
|
-
- `scrollComments()` - 多轮滚动 + `find-child` 触发 `show_more_button`
|
|
169
|
-
- 检测 `end_marker` 和 `empty_state` 判断结束
|
|
170
|
-
|
|
171
|
-
### 3. 图片下载反爬
|
|
172
|
-
**问题**: 小红书图片需要 UA + Cookie
|
|
173
|
-
**对策**:
|
|
174
|
-
- `fetchBrowserHeaders()` - 读取浏览器 UA 和 Cookie
|
|
175
|
-
- 重试机制(最多3次)
|
|
176
|
-
|
|
177
|
-
## 📁 输出结构
|
|
178
|
-
|
|
179
|
-
```
|
|
180
|
-
~/.webauto/download/xiaohongshu/{keyword}/
|
|
181
|
-
├── {title}_{noteId}/
|
|
182
|
-
│ ├── content.md
|
|
183
|
-
│ └── images/
|
|
184
|
-
│ ├── 1.jpg
|
|
185
|
-
│ ├── 2.jpg
|
|
186
|
-
│ └── ...
|
|
187
|
-
└── ...
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
### Markdown 格式
|
|
191
|
-
```markdown
|
|
192
|
-
# 标题
|
|
193
|
-
|
|
194
|
-
- **关键字**: oppo小平板
|
|
195
|
-
- **作者**: xxx | [主页](link)
|
|
196
|
-
- **Note ID**: 12345
|
|
197
|
-
- **评论统计**: 10 条 / 结尾标记:是 / 空状态:否
|
|
198
|
-
|
|
199
|
-
## 正文
|
|
200
|
-
(正文内容)
|
|
201
|
-
|
|
202
|
-
## 图片
|
|
203
|
-

|
|
204
|
-

|
|
205
|
-
|
|
206
|
-
## 评论(10)
|
|
207
|
-
### 1. 用户名 (userId)
|
|
208
|
-
- 时间:2025-01-05
|
|
209
|
-
|
|
210
|
-
评论内容...
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
## 🎯 下一步
|
|
214
|
-
|
|
215
|
-
1. **运行 Debug 脚本**: 验证当前 Session 状态
|
|
216
|
-
2. **调整容器定义**: 根据实际 DOM 微调选择器
|
|
217
|
-
3. **测试完整流程**: 5条数据小规模测试
|
|
218
|
-
4. **优化性能**: 减少等待时间、提高提取成功率
|
|
219
|
-
5. **扩展 Block**: 支持更多操作(如批量导出、数据分析)
|
|
220
|
-
|
|
221
|
-
## 📝 参考文档
|
|
222
|
-
|
|
223
|
-
- `container-library/xiaohongshu/README.md` - 容器定义规范
|
|
224
|
-
- `modules/workflow/blocks/XiaohongshuCrawlerBlock.ts` - 主采集逻辑
|
|
225
|
-
- `task.md` - 任务追踪与调试计划
|
|
226
|
-
- `AGENTS.md` - 架构设计原则
|
|
227
|
-
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import test from 'node:test';
|
|
2
|
-
import assert from 'node:assert/strict';
|
|
3
|
-
import { ContainerRegistry } from '../src/index.js';
|
|
4
|
-
|
|
5
|
-
test('getContainersForUrl returns known container', () => {
|
|
6
|
-
const registry = new ContainerRegistry();
|
|
7
|
-
const containers = registry.getContainersForUrl('https://weibo.com/');
|
|
8
|
-
assert.ok(containers.weibo_main_page, 'should include weibo_main_page');
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
test('listSites contains weibo entry', () => {
|
|
12
|
-
const registry = new ContainerRegistry();
|
|
13
|
-
const sites = registry.listSites();
|
|
14
|
-
const hasWeibo = sites.some((site) => site.key.includes('weibo'));
|
|
15
|
-
assert.ok(hasWeibo, 'should list weibo site');
|
|
16
|
-
});
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import test from 'node:test';
|
|
2
|
-
import assert from 'node:assert/strict';
|
|
3
|
-
import os from 'node:os';
|
|
4
|
-
import path from 'node:path';
|
|
5
|
-
import fs from 'node:fs/promises';
|
|
6
|
-
import { run as runCli } from '../src/cli.js';
|
|
7
|
-
|
|
8
|
-
async function withTempLog(content: string, fn: (file: string) => Promise<void>) {
|
|
9
|
-
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'logging-test-'));
|
|
10
|
-
const file = path.join(dir, 'sample.log');
|
|
11
|
-
await fs.writeFile(file, content, 'utf-8');
|
|
12
|
-
try {
|
|
13
|
-
await fn(file);
|
|
14
|
-
} finally {
|
|
15
|
-
await fs.rm(dir, { recursive: true, force: true });
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
test('logging cli stream returns tail lines', async () => {
|
|
20
|
-
await withTempLog('line1\nline2\nline3\n', async (file) => {
|
|
21
|
-
const result = await runCli(['stream', '--file', file, '--lines', '2']);
|
|
22
|
-
assert.equal(result.success, true);
|
|
23
|
-
assert.deepEqual(result.data.lines, ['line2', 'line3']);
|
|
24
|
-
});
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
test('logging cli flush can truncate file', async () => {
|
|
28
|
-
await withTempLog('foo\nbar\n', async (file) => {
|
|
29
|
-
const flush = await runCli(['flush', '--file', file, '--truncate', 'false']);
|
|
30
|
-
assert.equal(flush.success, true);
|
|
31
|
-
assert.equal(flush.data.lines.length, 2);
|
|
32
|
-
|
|
33
|
-
const flushTruncate = await runCli(['flush', '--file', file]);
|
|
34
|
-
assert.equal(flushTruncate.success, true);
|
|
35
|
-
const stats = await fs.stat(file);
|
|
36
|
-
assert.equal(stats.size, 0);
|
|
37
|
-
});
|
|
38
|
-
});
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import test from 'node:test';
|
|
2
|
-
import assert from 'node:assert/strict';
|
|
3
|
-
import { run as runCli } from '../src/cli.js';
|
|
4
|
-
|
|
5
|
-
test('operations cli list returns registered operations', async () => {
|
|
6
|
-
const result = await runCli(['list']);
|
|
7
|
-
assert.equal(result.success, true);
|
|
8
|
-
assert.ok(result.data.some((op: any) => op.id === 'highlight'));
|
|
9
|
-
assert.ok(result.data.some((op: any) => op.id === 'scroll'));
|
|
10
|
-
});
|
|
11
|
-
|
|
12
|
-
test('operations cli run works with mock page context', async () => {
|
|
13
|
-
const result = await runCli(['run', '--op', 'highlight', '--config', '{"selector":"#app"}']);
|
|
14
|
-
assert.equal(result.success, true);
|
|
15
|
-
assert.equal(result.data.mock, true);
|
|
16
|
-
});
|
|
17
|
-
|
|
18
|
-
// Skipped: robotjs has been removed from dependencies
|
|
19
|
-
test.skip('operations cli run supports system mouse', async () => {
|
|
20
|
-
const result = await runCli(['run', '--op', 'system:mouse-move', '--config', '{"x":10,"y":20}']);
|
|
21
|
-
assert.equal(result.success, true);
|
|
22
|
-
});
|