@web-auto/webauto 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/desktop-console/default-settings.json +1 -0
- package/apps/desktop-console/dist/main/index.mjs +1618 -0
- package/apps/desktop-console/{src → dist}/main/preload.mjs +10 -0
- package/apps/desktop-console/dist/renderer/index.js +3063 -0
- package/apps/desktop-console/entry/ui-console.mjs +299 -0
- package/apps/webauto/entry/account.mjs +356 -0
- package/apps/webauto/entry/lib/account-detect.mjs +160 -0
- package/apps/webauto/entry/lib/account-store.mjs +587 -0
- package/apps/webauto/entry/lib/profilepool.mjs +1 -1
- package/apps/webauto/entry/xhs-install.mjs +27 -3
- package/apps/webauto/entry/xhs-status.mjs +152 -0
- package/apps/webauto/entry/xhs-unified.mjs +595 -17
- package/bin/webauto.mjs +263 -15
- package/dist/apps/webauto/server.js +66 -0
- package/dist/modules/camo-backend/src/index.js +575 -0
- package/dist/modules/camo-backend/src/internal/BrowserSession.js +817 -0
- package/dist/modules/camo-backend/src/internal/ElementRegistry.js +61 -0
- package/dist/modules/camo-backend/src/internal/ProfileLock.js +85 -0
- package/dist/modules/camo-backend/src/internal/SessionManager.js +172 -0
- package/dist/modules/camo-backend/src/internal/container-matcher.js +852 -0
- package/dist/modules/camo-backend/src/internal/engine-manager.js +258 -0
- package/dist/modules/camo-backend/src/internal/fingerprint.js +203 -0
- package/dist/modules/camo-backend/src/internal/pageRuntime.js +29 -0
- package/dist/modules/camo-backend/src/internal/runtimeInjector.js +30 -0
- package/dist/modules/camo-backend/src/internal/state-bus.js +46 -0
- package/dist/modules/camo-backend/src/internal/storage-paths.js +36 -0
- package/dist/modules/camo-backend/src/internal/ws-server.js +1202 -0
- package/dist/modules/camo-runtime/src/utils/browser-service.mjs +423 -0
- package/dist/modules/camo-runtime/src/utils/config.mjs +77 -0
- package/dist/modules/container-registry/src/index.js +184 -0
- package/dist/modules/logging/src/index.js +92 -0
- package/dist/modules/operations/src/builtin.js +27 -0
- package/dist/modules/operations/src/container-binding.js +75 -0
- package/dist/modules/operations/src/executor.js +146 -0
- package/dist/modules/operations/src/operations/click.js +167 -0
- package/dist/modules/operations/src/operations/extract.js +204 -0
- package/dist/modules/operations/src/operations/find-child.js +17 -0
- package/dist/modules/operations/src/operations/highlight.js +138 -0
- package/dist/modules/operations/src/operations/key.js +61 -0
- package/dist/modules/operations/src/operations/navigate.js +148 -0
- package/dist/modules/operations/src/operations/scroll.js +126 -0
- package/dist/modules/operations/src/operations/type.js +190 -0
- package/dist/modules/operations/src/queue.js +100 -0
- package/dist/modules/operations/src/registry.js +11 -0
- package/dist/modules/operations/src/system/mouse.js +33 -0
- package/dist/modules/state/src/atomic-json.js +33 -0
- package/dist/modules/workflow/blocks/AnchorVerificationBlock.js +71 -0
- package/dist/modules/workflow/blocks/BehaviorRandomizer.js +26 -0
- package/dist/modules/workflow/blocks/CallWorkflowBlock.js +38 -0
- package/dist/modules/workflow/blocks/CloseDetailBlock.js +209 -0
- package/dist/modules/workflow/blocks/CollectBatch.js +137 -0
- package/dist/modules/workflow/blocks/CollectCommentsBlock.js +415 -0
- package/dist/modules/workflow/blocks/CollectSearchListBlock.js +599 -0
- package/dist/modules/workflow/blocks/CollectWeiboPosts.js +229 -0
- package/dist/modules/workflow/blocks/DetectPageStateBlock.js +259 -0
- package/dist/modules/workflow/blocks/EnsureLoginBlock.js +162 -0
- package/dist/modules/workflow/blocks/EnsureSession.js +426 -0
- package/dist/modules/workflow/blocks/ErrorClassifier.js +164 -0
- package/dist/modules/workflow/blocks/ErrorRecoveryBlock.js +319 -0
- package/dist/modules/workflow/blocks/ExpandCommentsBlock.js +1032 -0
- package/dist/modules/workflow/blocks/ExtractDetailBlock.js +310 -0
- package/dist/modules/workflow/blocks/ExtractPostFields.js +88 -0
- package/dist/modules/workflow/blocks/GenerateSmartReplyBlock.js +68 -0
- package/dist/modules/workflow/blocks/GoToSearchBlock.js +497 -0
- package/dist/modules/workflow/blocks/GracefulFallbackBlock.js +104 -0
- package/dist/modules/workflow/blocks/HighlightBlock.js +66 -0
- package/dist/modules/workflow/blocks/InitAutoScroll.js +65 -0
- package/dist/modules/workflow/blocks/LoadContainerDefinition.js +50 -0
- package/dist/modules/workflow/blocks/LoadContainerIndex.js +43 -0
- package/dist/modules/workflow/blocks/LocateAndGuardBlock.js +176 -0
- package/dist/modules/workflow/blocks/LoginRecoveryBlock.js +242 -0
- package/dist/modules/workflow/blocks/MatchContainers.js +64 -0
- package/dist/modules/workflow/blocks/MonitoringBlock.js +190 -0
- package/dist/modules/workflow/blocks/OpenDetailBlock.js +1240 -0
- package/dist/modules/workflow/blocks/OrganizeXhsNotesBlock.js +117 -0
- package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +270 -0
- package/dist/modules/workflow/blocks/PickSinglePost.js +69 -0
- package/dist/modules/workflow/blocks/ProgressTracker.js +125 -0
- package/dist/modules/workflow/blocks/RecordFixtureBlock.js +44 -0
- package/dist/modules/workflow/blocks/RenderMarkdown.js +48 -0
- package/dist/modules/workflow/blocks/SaveFile.js +54 -0
- package/dist/modules/workflow/blocks/ScrollNextBatch.js +72 -0
- package/dist/modules/workflow/blocks/SessionHealthBlock.js +73 -0
- package/dist/modules/workflow/blocks/StartBrowserService.js +45 -0
- package/dist/modules/workflow/blocks/ValidateContainerDefinition.js +67 -0
- package/dist/modules/workflow/blocks/ValidateExtract.js +35 -0
- package/dist/modules/workflow/blocks/WaitSearchPermitBlock.js +162 -0
- package/dist/modules/workflow/blocks/WaitStable.js +74 -0
- package/dist/modules/workflow/blocks/WarmupCommentsBlock.js +120 -0
- package/dist/modules/workflow/blocks/WorkflowExecutor.js +156 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectFromLinksBlock.js +1004 -0
- package/dist/modules/workflow/blocks/XiaohongshuCollectLinksBlock.js +1049 -0
- package/dist/modules/workflow/blocks/XiaohongshuFullCollectBlock.js +782 -0
- package/dist/modules/workflow/blocks/helpers/anchorVerify.js +198 -0
- package/dist/modules/workflow/blocks/helpers/asyncWorkQueue.js +53 -0
- package/dist/modules/workflow/blocks/helpers/commentScroller.js +334 -0
- package/dist/modules/workflow/blocks/helpers/commentSectionLocator.js +126 -0
- package/dist/modules/workflow/blocks/helpers/containerAnchors.js +301 -0
- package/dist/modules/workflow/blocks/helpers/debugArtifacts.js +6 -0
- package/dist/modules/workflow/blocks/helpers/downloadPaths.js +29 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsController.js +53 -0
- package/dist/modules/workflow/blocks/helpers/expandCommentsExtractor.js +129 -0
- package/dist/modules/workflow/blocks/helpers/macosVisionOcrPlugin.js +116 -0
- package/dist/modules/workflow/blocks/helpers/mergeXhsMarkdown.js +109 -0
- package/dist/modules/workflow/blocks/helpers/openDetailController.js +56 -0
- package/dist/modules/workflow/blocks/helpers/openDetailTypes.js +7 -0
- package/dist/modules/workflow/blocks/helpers/openDetailViewport.js +474 -0
- package/dist/modules/workflow/blocks/helpers/openDetailWaiter.js +104 -0
- package/dist/modules/workflow/blocks/helpers/operationLogger.js +195 -0
- package/dist/modules/workflow/blocks/helpers/persistedNotes.js +107 -0
- package/dist/modules/workflow/blocks/helpers/replyExpander.js +260 -0
- package/dist/modules/workflow/blocks/helpers/scrollIntoView.js +138 -0
- package/dist/modules/workflow/blocks/helpers/searchExecutor.js +328 -0
- package/dist/modules/workflow/blocks/helpers/searchGate.js +46 -0
- package/dist/modules/workflow/blocks/helpers/searchPageState.js +164 -0
- package/dist/modules/workflow/blocks/helpers/searchResultWaiter.js +64 -0
- package/dist/modules/workflow/blocks/helpers/simpleAnchor.js +134 -0
- package/dist/modules/workflow/blocks/helpers/smartReply.js +40 -0
- package/dist/modules/workflow/blocks/helpers/systemInput.js +635 -0
- package/dist/modules/workflow/blocks/helpers/targetCountMode.js +9 -0
- package/dist/modules/workflow/blocks/helpers/xhsCliArgs.js +80 -0
- package/dist/modules/workflow/blocks/helpers/xhsCommentDom.js +805 -0
- package/dist/modules/workflow/blocks/helpers/xhsNoteOrganizer.js +140 -0
- package/dist/modules/workflow/blocks/restore/RestorePhaseBlock.js +204 -0
- package/dist/modules/workflow/config/workflowRegistry.js +32 -0
- package/dist/modules/workflow/definitions/batch-collect-workflow.js +63 -0
- package/dist/modules/workflow/definitions/scroll-extract-workflow.js +74 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow-v2.js +81 -0
- package/dist/modules/workflow/definitions/xiaohongshu-collect-workflow.js +57 -0
- package/dist/modules/workflow/definitions/xiaohongshu-full-collect-workflow-v3.js +68 -0
- package/dist/modules/workflow/definitions/xiaohongshu-note-collect.js +49 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase1-workflow-v3.js +30 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase2-links-workflow-v3.js +40 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase3-collect-workflow-v1.js +54 -0
- package/dist/modules/workflow/definitions/xiaohongshu-phase34-from-links-workflow-v3.js +25 -0
- package/dist/modules/workflow/src/WeiboEventDrivenWorkflowRunner.js +308 -0
- package/dist/modules/workflow/src/context.js +70 -0
- package/dist/modules/workflow/src/index.js +5 -0
- package/dist/modules/workflow/src/orchestrator.js +230 -0
- package/dist/modules/workflow/src/runner.js +55 -0
- package/dist/modules/workflow/src/runtime.js +70 -0
- package/dist/modules/workflow/workflows/WeiboFeedExtractionWorkflow.js +359 -0
- package/dist/modules/workflow/workflows/XiaohongshuLoginWorkflow.js +110 -0
- package/dist/modules/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
- package/dist/modules/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
- package/dist/modules/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
- package/dist/modules/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +42 -0
- package/dist/modules/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
- package/dist/modules/xiaohongshu/app/src/index.js +9 -0
- package/dist/modules/xiaohongshu/app/src/utils/checkpoints.js +222 -0
- package/dist/modules/xiaohongshu/app/src/utils/controllerAction.js +43 -0
- package/dist/services/controller/src/controller.js +1476 -0
- package/dist/services/controller/src/index.js +2 -0
- package/dist/services/controller/src/payload-normalizer.js +129 -0
- package/dist/services/shared/heartbeat.js +120 -0
- package/dist/services/shared/lib/errorHandler.js +2 -0
- package/dist/services/shared/serviceProcessLogger.js +139 -0
- package/dist/services/unified-api/RemoteBrowserSession.js +176 -0
- package/dist/services/unified-api/RemoteSessionManager.js +148 -0
- package/dist/services/unified-api/container-operations-handler.js +115 -0
- package/dist/services/unified-api/server.js +652 -0
- package/dist/services/unified-api/state-registry.js +274 -0
- package/dist/services/unified-api/task-persistence.js +66 -0
- package/dist/services/unified-api/task-state.js +130 -0
- package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +12 -5
- package/modules/xiaohongshu/app/pnpm-lock.yaml +24 -0
- package/package.json +38 -10
- package/.beads/README.md +0 -81
- package/.beads/config.yaml +0 -67
- package/.beads/interactions.jsonl +0 -0
- package/.beads/issues.jsonl +0 -180
- package/.beads/metadata.json +0 -4
- package/.claude/settings.local.json +0 -10
- package/.github/workflows/ci.yml +0 -55
- package/AGENTS.md +0 -253
- package/apps/desktop-console/README.md +0 -27
- package/apps/desktop-console/package-lock.json +0 -897
- package/apps/desktop-console/package.json +0 -20
- package/apps/desktop-console/scripts/build-and-install.mjs +0 -19
- package/apps/desktop-console/scripts/build.mjs +0 -45
- package/apps/desktop-console/scripts/test-preload.mjs +0 -13
- package/apps/desktop-console/src/main/config.mts +0 -26
- package/apps/desktop-console/src/main/core-daemon-manager.mts +0 -131
- package/apps/desktop-console/src/main/desktop-settings.mts +0 -267
- package/apps/desktop-console/src/main/heartbeat-watchdog.mts +0 -50
- package/apps/desktop-console/src/main/heartbeat-watchdog.test.mts +0 -68
- package/apps/desktop-console/src/main/index-streaming.test.mts +0 -20
- package/apps/desktop-console/src/main/index.mts +0 -980
- package/apps/desktop-console/src/main/profile-store.mts +0 -239
- package/apps/desktop-console/src/main/profile-store.test.mts +0 -54
- package/apps/desktop-console/src/main/state-bridge.mts +0 -114
- package/apps/desktop-console/src/main/task-state-types.ts +0 -32
- package/apps/desktop-console/src/renderer/hooks/use-task-state.mts +0 -120
- package/apps/desktop-console/src/renderer/index.mts +0 -133
- package/apps/desktop-console/src/renderer/index.test.mts +0 -34
- package/apps/desktop-console/src/renderer/path-helpers.mts +0 -46
- package/apps/desktop-console/src/renderer/path-helpers.test.mts +0 -14
- package/apps/desktop-console/src/renderer/tabs/debug.mts +0 -48
- package/apps/desktop-console/src/renderer/tabs/debug.test.mts +0 -22
- package/apps/desktop-console/src/renderer/tabs/logs.mts +0 -421
- package/apps/desktop-console/src/renderer/tabs/logs.test.mts +0 -27
- package/apps/desktop-console/src/renderer/tabs/preflight.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/preflight.test.mts +0 -33
- package/apps/desktop-console/src/renderer/tabs/profile-pool.mts +0 -213
- package/apps/desktop-console/src/renderer/tabs/results.mts +0 -171
- package/apps/desktop-console/src/renderer/tabs/run.test.mts +0 -63
- package/apps/desktop-console/src/renderer/tabs/runtime.mts +0 -151
- package/apps/desktop-console/src/renderer/tabs/settings.mts +0 -146
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/account-flow.mts +0 -486
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/guide-browser-check.mts +0 -56
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/helpers.mts +0 -262
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/layout-block.mts +0 -430
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/live-stats.mts +0 -847
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu/run-flow.mts +0 -443
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu-state.mts +0 -425
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.mts +0 -497
- package/apps/desktop-console/src/renderer/tabs/xiaohongshu.test.mts +0 -291
- package/apps/desktop-console/src/renderer/ui-components.mts +0 -31
- package/docs/README_camoufox_chinese.md +0 -141
- package/docs/USAGE_V3.md +0 -163
- package/docs/arch/OCR_MACOS_PLUGIN.md +0 -39
- package/docs/arch/PORTS.md +0 -40
- package/docs/arch/REGRESSION_CHECKLIST.md +0 -121
- package/docs/arch/SEARCH_GATE.md +0 -224
- package/docs/arch/VIEWPORT_SAFETY.md +0 -182
- package/docs/arch/XIAOHONGSHU_OFFLINE_MOCK_DESIGN.md +0 -267
- package/docs/xiaohongshu-container-driven-summary.md +0 -221
- package/docs/xiaohongshu-full-collect-runbook.md +0 -134
- package/docs/xiaohongshu-next-steps.md +0 -228
- package/docs/xiaohongshu-quickstart.md +0 -73
- package/docs/xiaohongshu-workflow-summary.md +0 -227
- package/modules/container-registry/tests/container-registry.test.ts +0 -16
- package/modules/logging/tests/logging.test.ts +0 -38
- package/modules/operations/tests/operations.test.ts +0 -22
- package/modules/operations/tests/viewport-filter.test.ts +0 -161
- package/modules/operations/tests/visible-only.test.ts +0 -250
- package/modules/session-manager/tests/session-manager.test.ts +0 -23
- package/modules/state/src/atomic-json.test.ts +0 -30
- package/modules/state/src/paths.test.ts +0 -59
- package/modules/state/src/xiaohongshu-collect-state.test.ts +0 -259
- package/modules/workflow/blocks/AnchorVerificationBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/AnchorVerificationBlock.js.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/DetectPageStateBlock.js.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/ErrorRecoveryBlock.js.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.d.ts.map +0 -1
- package/modules/workflow/blocks/WaitSearchPermitBlock.js.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/containerAnchors.js.map +0 -1
- package/modules/workflow/blocks/helpers/downloadPaths.test.ts +0 -62
- package/modules/workflow/blocks/helpers/mergeXhsMarkdown.test.ts +0 -121
- package/modules/workflow/blocks/helpers/operationLogger.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/operationLogger.js.map +0 -1
- package/modules/workflow/blocks/helpers/persistedNotes.test.ts +0 -268
- package/modules/workflow/blocks/helpers/searchPageState.d.ts.map +0 -1
- package/modules/workflow/blocks/helpers/searchPageState.js.map +0 -1
- package/modules/workflow/blocks/helpers/targetCountMode.test.ts +0 -29
- package/modules/workflow/blocks/helpers/xhsCliArgs.test.ts +0 -75
- package/modules/workflow/tests/smartReply.test.ts +0 -32
- package/modules/xiaohongshu/app/src/blocks/Phase3Interact.matcher.test.ts +0 -33
- package/modules/xiaohongshu/app/src/utils/__tests__/checkpoints.test.ts +0 -141
- package/modules/xiaohongshu/app/tests/commentMatchDsl.test.ts +0 -50
- package/modules/xiaohongshu/app/tests/commentMatcher.test.ts +0 -46
- package/modules/xiaohongshu/app/tests/sharding.test.ts +0 -31
- package/package-scripts.json +0 -8
- package/runtime/infra/utils/README.md +0 -13
- package/runtime/infra/utils/scripts/README.md +0 -0
- package/runtime/infra/utils/scripts/development/eval-in-session.mjs +0 -40
- package/runtime/infra/utils/scripts/development/highlight-search-containers.mjs +0 -35
- package/runtime/infra/utils/scripts/service/kill-port.mjs +0 -24
- package/runtime/infra/utils/scripts/service/start-api.mjs +0 -39
- package/runtime/infra/utils/scripts/service/start-browser-service.mjs +0 -106
- package/runtime/infra/utils/scripts/service/stop-api.mjs +0 -18
- package/runtime/infra/utils/scripts/service/stop-browser-service.mjs +0 -104
- package/runtime/infra/utils/scripts/test-services.mjs +0 -94
- package/services/shared/heartbeat.test.ts +0 -102
- package/services/unified-api/__tests__/task-state.test.ts +0 -95
- package/sitecustomize.py +0 -19
- package/tests/README.md +0 -194
- package/tests/e2e/workflows/weibo-feed-extraction.test.ts +0 -171
- package/tests/fixtures/data/container-definitions.json +0 -67
- package/tests/fixtures/pages/simple-page.html +0 -69
- package/tests/integration/01-test-container-match.mjs +0 -188
- package/tests/integration/02-test-dom-branch.mjs +0 -161
- package/tests/integration/03-test-container-operation-system.mjs +0 -91
- package/tests/integration/05-test-container-lifecycle-events.mjs +0 -224
- package/tests/integration/05-test-container-lifecycle-with-events.mjs +0 -250
- package/tests/integration/06-test-container-dom-tree-drawing.mjs +0 -256
- package/tests/integration/07-test-weibo-container-lifecycle.mjs +0 -355
- package/tests/integration/08-test-weibo-feed-workflow.test.mjs +0 -164
- package/tests/integration/10-test-visual-analyzer.mjs +0 -312
- package/tests/integration/11-test-visual-loop.mjs +0 -284
- package/tests/integration/12-test-simple-visual-loop.mjs +0 -242
- package/tests/integration/13-test-visual-robust.mjs +0 -185
- package/tests/integration/14-test-visual-highlight-loop.mjs +0 -271
- package/tests/integration/inspect-page.mjs +0 -50
- package/tests/integration/run-all-tests.mjs +0 -95
- package/tests/patch_verification/CODEX_PATCH_TEST.md +0 -103
- package/tests/patch_verification/PHASE2_ANALYSIS.md +0 -179
- package/tests/patch_verification/PHASE2_OPTIMIZATION_REPORT.md +0 -55
- package/tests/patch_verification/PHASE2_TO_PHASE4_SUMMARY.md +0 -126
- package/tests/patch_verification/QUICK_TEST_SEQUENCE.md +0 -262
- package/tests/patch_verification/README.md +0 -143
- package/tests/patch_verification/RUN_TESTS.md +0 -60
- package/tests/patch_verification/TEST_EXECUTION.md +0 -99
- package/tests/patch_verification/TEST_PLAN.md +0 -328
- package/tests/patch_verification/TEST_RESULTS.md +0 -34
- package/tests/patch_verification/TOOL_TEST_PLAN.md +0 -48
- package/tests/patch_verification/run-tool-test.mjs +0 -121
- package/tests/patch_verification/temp_test_files/test01.txt +0 -1
- package/tests/patch_verification/temp_test_files/test02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test02_gnu.txt +0 -3
- package/tests/patch_verification/temp_test_files/test03.txt +0 -1
- package/tests/patch_verification/temp_test_files/test03_multiline.txt +0 -5
- package/tests/patch_verification/temp_test_files/test04_function.ts +0 -5
- package/tests/patch_verification/temp_test_files/test05_import.ts +0 -4
- package/tests/patch_verification/temp_test_files/test06_special_chars.txt +0 -4
- package/tests/patch_verification/temp_test_files/test07_indentation.ts +0 -5
- package/tests/patch_verification/temp_test_files/test08_mismatch.txt +0 -1
- package/tests/patch_verification/temp_test_files/test_add_02.txt +0 -3
- package/tests/patch_verification/temp_test_files/test_simple.txt +0 -1
- package/tests/runner/TestReporter.mjs +0 -57
- package/tests/runner/TestRunner.mjs +0 -244
- package/tests/unit/commands/profile.test.mjs +0 -10
- package/tests/unit/container/change-notifier.test.mjs +0 -181
- package/tests/unit/lifecycle/session-registry.test.mjs +0 -135
- package/tests/unit/operations/registry.test.ts +0 -73
- package/tests/unit/utils/browser-service.test.mjs +0 -153
- package/tests/unit/utils/config.test.mjs +0 -166
- package/tests/unit/utils/fingerprint.test.mjs +0 -166
- package/tsconfig.json +0 -31
- package/tsconfig.services.json +0 -26
- /package/apps/desktop-console/{src → dist}/renderer/index.html +0 -0
- /package/apps/desktop-console/{src/renderer/tabs → dist/renderer}/run.mts +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Block: OrganizeXhsNotesBlock
|
|
3
|
+
*
|
|
4
|
+
* 采集完成后的整理:
|
|
5
|
+
* 1) OCR 扫描每个 note 的 images/,输出 ocr.md
|
|
6
|
+
* 2) 合并 ocr + 正文 + 评论,输出 merged.md
|
|
7
|
+
* 3) 合并所有帖子,输出 ALL.md(编号 + 链接 + 合并正文)
|
|
8
|
+
*
|
|
9
|
+
* 注意:
|
|
10
|
+
* - 只做本地落盘整理(~/.webauto/download),不做任何浏览器操作。
|
|
11
|
+
*/
|
|
12
|
+
import os from 'node:os';
|
|
13
|
+
import path from 'node:path';
|
|
14
|
+
import { promises as fs } from 'node:fs';
|
|
15
|
+
import { countPersistedNotes } from './helpers/persistedNotes.js';
|
|
16
|
+
import { organizeOneNote } from './helpers/xhsNoteOrganizer.js';
|
|
17
|
+
function sanitizeForPath(name) {
|
|
18
|
+
if (!name)
|
|
19
|
+
return '';
|
|
20
|
+
return name.replace(/[\\/:"*?<>|]+/g, '_').trim();
|
|
21
|
+
}
|
|
22
|
+
function resolveDownloadRoot() {
|
|
23
|
+
const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
|
|
24
|
+
if (custom && custom.trim())
|
|
25
|
+
return custom;
|
|
26
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
27
|
+
return path.join(home, '.webauto', 'download');
|
|
28
|
+
}
|
|
29
|
+
export async function execute(input) {
|
|
30
|
+
const platform = input.platform || 'xiaohongshu';
|
|
31
|
+
const env = input.env || 'debug';
|
|
32
|
+
const keyword = String(input.keyword || '').trim();
|
|
33
|
+
if (!keyword) {
|
|
34
|
+
return { success: false, keywordDir: '', noteCount: 0, ocrLanguagesUsed: 'eng', processed: [], error: 'missing_keyword' };
|
|
35
|
+
}
|
|
36
|
+
const persisted = await countPersistedNotes({
|
|
37
|
+
platform,
|
|
38
|
+
env,
|
|
39
|
+
keyword,
|
|
40
|
+
downloadRoot: resolveDownloadRoot(),
|
|
41
|
+
requiredFiles: ['content.md', 'comments.md'],
|
|
42
|
+
requireCommentsDone: true,
|
|
43
|
+
});
|
|
44
|
+
const keywordDir = persisted.keywordDir;
|
|
45
|
+
const noteIds = persisted.noteIds.slice().sort();
|
|
46
|
+
const ocrLang = String(input.ocrLanguages || '').trim() || 'chi_sim+eng';
|
|
47
|
+
const processed = [];
|
|
48
|
+
// 汇总 ALL.md
|
|
49
|
+
const allLines = [];
|
|
50
|
+
allLines.push(`# 合并汇总(${keyword})`);
|
|
51
|
+
allLines.push('');
|
|
52
|
+
allLines.push(`- 关键词: ${keyword}`);
|
|
53
|
+
allLines.push(`- 环境: ${env}`);
|
|
54
|
+
allLines.push(`- 目录: ${keywordDir}`);
|
|
55
|
+
allLines.push(`- 数量: ${noteIds.length}`);
|
|
56
|
+
allLines.push(`- OCR 语言: ${ocrLang}`);
|
|
57
|
+
allLines.push('');
|
|
58
|
+
for (let i = 0; i < noteIds.length; i += 1) {
|
|
59
|
+
const noteId = noteIds[i];
|
|
60
|
+
const noteDir = path.join(keywordDir, noteId);
|
|
61
|
+
try {
|
|
62
|
+
const res = await organizeOneNote({
|
|
63
|
+
noteDir,
|
|
64
|
+
noteId,
|
|
65
|
+
keyword,
|
|
66
|
+
ocrLanguages: ocrLang,
|
|
67
|
+
runOcr: input.runOcr === true,
|
|
68
|
+
requireExistingOcr: input.runOcr !== true,
|
|
69
|
+
});
|
|
70
|
+
allLines.push(`## ${i + 1}. ${noteId}`);
|
|
71
|
+
allLines.push('');
|
|
72
|
+
if (res.link)
|
|
73
|
+
allLines.push(`- 链接: ${res.link}`);
|
|
74
|
+
allLines.push('');
|
|
75
|
+
const merged = await fs.readFile(res.mergedPath, 'utf-8');
|
|
76
|
+
allLines.push(merged.trim());
|
|
77
|
+
allLines.push('');
|
|
78
|
+
allLines.push('---');
|
|
79
|
+
allLines.push('');
|
|
80
|
+
processed.push({
|
|
81
|
+
noteId,
|
|
82
|
+
ocrPath: res.ocrPath,
|
|
83
|
+
mergedPath: res.mergedPath,
|
|
84
|
+
imageCount: res.imageCount,
|
|
85
|
+
ocrErrors: res.ocrErrors,
|
|
86
|
+
ok: true,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
catch (e) {
|
|
90
|
+
processed.push({
|
|
91
|
+
noteId,
|
|
92
|
+
imageCount: 0,
|
|
93
|
+
ocrErrors: 0,
|
|
94
|
+
ok: false,
|
|
95
|
+
error: e?.message || String(e),
|
|
96
|
+
});
|
|
97
|
+
// 不阻断其它 note
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const safeKeyword = sanitizeForPath(keyword) || 'unknown';
|
|
102
|
+
const allPath = path.join(keywordDir, `ALL-${safeKeyword}.md`);
|
|
103
|
+
await fs.writeFile(allPath, `${allLines.join('\n')}\n`, 'utf-8');
|
|
104
|
+
const failed = processed.filter((p) => !p.ok);
|
|
105
|
+
return {
|
|
106
|
+
success: failed.length === 0,
|
|
107
|
+
keywordDir,
|
|
108
|
+
noteCount: noteIds.length,
|
|
109
|
+
ocrLanguagesUsed: ocrLang,
|
|
110
|
+
allPath,
|
|
111
|
+
processed,
|
|
112
|
+
...(failed.length
|
|
113
|
+
? { error: `organize_failed_notes: ${failed.length}/${noteIds.length}` }
|
|
114
|
+
: {}),
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
//# sourceMappingURL=OrganizeXhsNotesBlock.js.map
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PersistXhsNoteBlock
|
|
3
|
+
*
|
|
4
|
+
* 将小红书帖子详情 + 评论持久化到本地目录:
|
|
5
|
+
* ~/.webauto/download/xiaohongshu/{env}/{keyword}/{noteId}/
|
|
6
|
+
* - content.md
|
|
7
|
+
* - images/{index}.jpg
|
|
8
|
+
*/
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { promises as fs } from 'node:fs';
|
|
12
|
+
function sanitizeForPath(name) {
|
|
13
|
+
if (!name)
|
|
14
|
+
return '';
|
|
15
|
+
return name.replace(/[\\/:"*?<>|]+/g, '_').trim();
|
|
16
|
+
}
|
|
17
|
+
function resolveDownloadRoot() {
|
|
18
|
+
const custom = process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR;
|
|
19
|
+
if (custom && custom.trim())
|
|
20
|
+
return custom;
|
|
21
|
+
const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
|
|
22
|
+
return path.join(home, '.webauto', 'download');
|
|
23
|
+
}
|
|
24
|
+
function formatUrlForLog(url, maxLen = 180) {
|
|
25
|
+
const s = (url || '').toString();
|
|
26
|
+
if (s.length <= maxLen)
|
|
27
|
+
return s;
|
|
28
|
+
return `${s.slice(0, maxLen)}…`;
|
|
29
|
+
}
|
|
30
|
+
async function ensureDir(dir) {
|
|
31
|
+
await fs.mkdir(dir, { recursive: true });
|
|
32
|
+
}
|
|
33
|
+
async function downloadImage(url, imagesDir, index) {
|
|
34
|
+
if (!url)
|
|
35
|
+
return null;
|
|
36
|
+
// 过滤明显是头像/图标等小图的最小体积阈值(字节)
|
|
37
|
+
const MIN_IMAGE_BYTES = 20 * 1024; // 约 20KB
|
|
38
|
+
let normalized = String(url).trim();
|
|
39
|
+
if (!normalized)
|
|
40
|
+
return null;
|
|
41
|
+
if (normalized.startsWith('//')) {
|
|
42
|
+
normalized = `https:${normalized}`;
|
|
43
|
+
}
|
|
44
|
+
// 绝大多数头像/静态图标不属于帖子图片,直接跳过(避免大量无意义的 fetch)
|
|
45
|
+
const skipByUrl = /sns-avatar-[^/]+\.xhscdn\.com\/avatar\//i.test(normalized) ||
|
|
46
|
+
/picasso-static\.xiaohongshu\.com\/fe-platform\//i.test(normalized) ||
|
|
47
|
+
normalized.startsWith('data:') ||
|
|
48
|
+
normalized.startsWith('blob:');
|
|
49
|
+
if (skipByUrl) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
if (!/^https?:/i.test(normalized)) {
|
|
53
|
+
console.warn(`[PersistXhsNote] Skip non-http image url: ${formatUrlForLog(normalized)}`);
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
const res = await fetch(normalized);
|
|
58
|
+
if (!res.ok) {
|
|
59
|
+
console.warn(`[PersistXhsNote] Image fetch failed: ${formatUrlForLog(normalized)} status=${res.status}`);
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
63
|
+
// 根据字节大小粗略过滤掉头像/小图标等小尺寸图片
|
|
64
|
+
if (buf.length < MIN_IMAGE_BYTES) {
|
|
65
|
+
console.warn(`[PersistXhsNote] Skip tiny image (${buf.length}B < ${MIN_IMAGE_BYTES}B): ${formatUrlForLog(normalized)}`);
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
const filename = `${String(index).padStart(2, '0')}.jpg`;
|
|
69
|
+
const filepath = path.join(imagesDir, filename);
|
|
70
|
+
await fs.writeFile(filepath, buf);
|
|
71
|
+
return path.join('images', filename);
|
|
72
|
+
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
console.warn(`[PersistXhsNote] Image download error: ${formatUrlForLog(normalized)} - ${err?.message || err}`);
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
export async function execute(input) {
|
|
79
|
+
const { env, platform = 'xiaohongshu', keyword, noteId, searchUrl, detailUrl, detail, commentsResult, persistMode = 'both', downloadImages = true, maxImagesToDownload, } = input;
|
|
80
|
+
if (!env || !keyword || !noteId) {
|
|
81
|
+
return {
|
|
82
|
+
success: false,
|
|
83
|
+
error: 'Missing env, keyword or noteId',
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
try {
|
|
87
|
+
const baseDir = path.join(resolveDownloadRoot(), platform, env);
|
|
88
|
+
const safeKeyword = sanitizeForPath(keyword) || 'unknown';
|
|
89
|
+
const keywordDir = path.join(baseDir, safeKeyword);
|
|
90
|
+
const postDir = path.join(keywordDir, noteId);
|
|
91
|
+
const imagesDir = path.join(postDir, 'images');
|
|
92
|
+
await ensureDir(keywordDir);
|
|
93
|
+
await ensureDir(postDir);
|
|
94
|
+
if (downloadImages) {
|
|
95
|
+
await ensureDir(imagesDir);
|
|
96
|
+
}
|
|
97
|
+
const wantDetail = persistMode === 'detail' || persistMode === 'both';
|
|
98
|
+
const wantComments = persistMode === 'comments' || persistMode === 'both';
|
|
99
|
+
const detailData = detail || {};
|
|
100
|
+
const titleFromDetail = detailData.title ||
|
|
101
|
+
detailData.note_title ||
|
|
102
|
+
detailData.header?.title ||
|
|
103
|
+
detailData.content?.title ||
|
|
104
|
+
'';
|
|
105
|
+
const title = titleFromDetail || '无标题';
|
|
106
|
+
const author = detailData.author ||
|
|
107
|
+
detailData.header?.author ||
|
|
108
|
+
detailData.header?.user_name ||
|
|
109
|
+
detailData.header?.nickname ||
|
|
110
|
+
'';
|
|
111
|
+
const contentText = detailData.contentText ||
|
|
112
|
+
detailData.content?.text ||
|
|
113
|
+
detailData.content?.desc ||
|
|
114
|
+
detailData.content?.content ||
|
|
115
|
+
'';
|
|
116
|
+
const detailPath = path.join(postDir, 'content.md');
|
|
117
|
+
const commentsPath = path.join(postDir, 'comments.md');
|
|
118
|
+
const commentsDonePath = path.join(postDir, 'comments.done.json');
|
|
119
|
+
// 1) 详情(正文/图片)
|
|
120
|
+
if (wantDetail) {
|
|
121
|
+
const existingDetail = await fs.stat(detailPath).catch(() => null);
|
|
122
|
+
// 图片下载:如果 images 目录已有文件则不重复下载
|
|
123
|
+
let hasAnyImage = false;
|
|
124
|
+
if (downloadImages) {
|
|
125
|
+
try {
|
|
126
|
+
const existing = await fs.readdir(imagesDir).catch(() => []);
|
|
127
|
+
hasAnyImage = Array.isArray(existing) && existing.length > 0;
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
hasAnyImage = false;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
const localImages = [];
|
|
134
|
+
const images = Array.isArray(detailData?.gallery?.images)
|
|
135
|
+
? detailData.gallery.images
|
|
136
|
+
: [];
|
|
137
|
+
const maxImages = typeof maxImagesToDownload === 'number' && Number.isFinite(maxImagesToDownload)
|
|
138
|
+
? Math.max(0, Math.floor(maxImagesToDownload))
|
|
139
|
+
: 6;
|
|
140
|
+
if (downloadImages && !hasAnyImage && images.length > 0 && maxImages > 0) {
|
|
141
|
+
let imgIndex = 0;
|
|
142
|
+
for (const url of images) {
|
|
143
|
+
imgIndex += 1;
|
|
144
|
+
if (imgIndex > maxImages)
|
|
145
|
+
break;
|
|
146
|
+
const rel = await downloadImage(url, imagesDir, imgIndex);
|
|
147
|
+
if (rel)
|
|
148
|
+
localImages.push(rel);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (!existingDetail || !existingDetail.isFile()) {
|
|
152
|
+
const lines = [];
|
|
153
|
+
lines.push(`# ${title}`);
|
|
154
|
+
lines.push('');
|
|
155
|
+
lines.push(`- Note ID: ${noteId || '未知'}`);
|
|
156
|
+
lines.push(`- 关键词: ${keyword || '未知'}`);
|
|
157
|
+
if (searchUrl)
|
|
158
|
+
lines.push(`- Search URL: ${searchUrl}`);
|
|
159
|
+
if (detailUrl)
|
|
160
|
+
lines.push(`- 链接: ${detailUrl}`);
|
|
161
|
+
if (author)
|
|
162
|
+
lines.push(`- 作者: ${author}`);
|
|
163
|
+
lines.push(`- 采集时间: ${new Date().toISOString()}`);
|
|
164
|
+
lines.push('');
|
|
165
|
+
lines.push('## 正文');
|
|
166
|
+
lines.push('');
|
|
167
|
+
lines.push(contentText || '(无正文)');
|
|
168
|
+
lines.push('');
|
|
169
|
+
// 图片渲染:优先使用本地已下载的图片;否则仅记录数量
|
|
170
|
+
const imagesToRender = localImages.length > 0 ? localImages : [];
|
|
171
|
+
if (imagesToRender.length > 0) {
|
|
172
|
+
lines.push('## 图片');
|
|
173
|
+
lines.push('');
|
|
174
|
+
for (const rel of imagesToRender) {
|
|
175
|
+
const safeRel = rel.replace(/\\/g, '/');
|
|
176
|
+
lines.push(``);
|
|
177
|
+
}
|
|
178
|
+
lines.push('');
|
|
179
|
+
}
|
|
180
|
+
await fs.writeFile(detailPath, lines.join('\n'), 'utf-8');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// 2) 评论(独立文件,支持后续增量更新)
|
|
184
|
+
if (wantComments) {
|
|
185
|
+
const comments = Array.isArray(commentsResult?.comments) ? commentsResult.comments : [];
|
|
186
|
+
const headerTotal = typeof commentsResult?.totalFromHeader === 'number' ? commentsResult.totalFromHeader : null;
|
|
187
|
+
const headerPositive = typeof headerTotal === 'number' && Number.isFinite(headerTotal) && headerTotal > 0;
|
|
188
|
+
const coverageRatio = headerPositive ? comments.length / headerTotal : null;
|
|
189
|
+
const coverageNeed = headerPositive ? Math.ceil(headerTotal * 0.9) : null;
|
|
190
|
+
const coverageOk = headerPositive ? comments.length >= (coverageNeed || 0) : null;
|
|
191
|
+
const maxComments = typeof commentsResult?.maxComments === 'number' && Number.isFinite(commentsResult.maxComments)
|
|
192
|
+
? Math.floor(commentsResult.maxComments)
|
|
193
|
+
: null;
|
|
194
|
+
const stoppedByMaxComments = Boolean(commentsResult?.stoppedByMaxComments);
|
|
195
|
+
// 没有任何评论且没有明确 reachedEnd/emptyState 信号时,不写入 comments.md
|
|
196
|
+
const hasAnySignal = comments.length > 0 ||
|
|
197
|
+
typeof commentsResult?.reachedEnd === 'boolean' ||
|
|
198
|
+
typeof commentsResult?.emptyState === 'boolean' ||
|
|
199
|
+
typeof headerTotal === 'number';
|
|
200
|
+
if (hasAnySignal) {
|
|
201
|
+
const lines = [];
|
|
202
|
+
lines.push(`# 评论(${comments.length})`);
|
|
203
|
+
lines.push('');
|
|
204
|
+
lines.push(`- Note ID: ${noteId || '未知'}`);
|
|
205
|
+
lines.push(`- 关键词: ${keyword || '未知'}`);
|
|
206
|
+
if (searchUrl)
|
|
207
|
+
lines.push(`- Search URL: ${searchUrl}`);
|
|
208
|
+
if (detailUrl)
|
|
209
|
+
lines.push(`- 链接: ${detailUrl}`);
|
|
210
|
+
lines.push(`- 采集时间: ${new Date().toISOString()}`);
|
|
211
|
+
lines.push(`- 评论统计: 抓取=${comments.length}, header=${headerTotal !== null ? headerTotal : '未知'}(reachedEnd=${commentsResult?.reachedEnd ? '是' : '否'}, empty=${commentsResult?.emptyState ? '是' : '否'}${headerPositive
|
|
212
|
+
? `, coverage=${Math.round((coverageRatio || 0) * 100)}% (need>=${coverageNeed}, ok=${coverageOk ? '是' : '否'})`
|
|
213
|
+
: ''})`);
|
|
214
|
+
lines.push('');
|
|
215
|
+
if (comments.length === 0) {
|
|
216
|
+
lines.push('(无评论)');
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
for (const c of comments) {
|
|
220
|
+
if (!c)
|
|
221
|
+
continue;
|
|
222
|
+
const user = c.user_name || c.username || '未知用户';
|
|
223
|
+
const uid = c.user_id || '';
|
|
224
|
+
const ts = c.timestamp || '';
|
|
225
|
+
const text = c.text || '';
|
|
226
|
+
const idPart = uid ? ` (${uid})` : '';
|
|
227
|
+
const tsPart = ts ? ` [${ts}]` : '';
|
|
228
|
+
lines.push(`- **${user}**${idPart}${tsPart}:${text}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (stoppedByMaxComments && maxComments !== null) {
|
|
232
|
+
lines.push(`- MaxComments: ${maxComments} (stoppedByMaxComments=yes)`);
|
|
233
|
+
}
|
|
234
|
+
await fs.writeFile(commentsPath, lines.join('\n'), 'utf-8');
|
|
235
|
+
}
|
|
236
|
+
const done = Boolean(commentsResult?.reachedEnd === true ||
|
|
237
|
+
commentsResult?.emptyState === true ||
|
|
238
|
+
commentsResult?.stoppedByMaxComments === true);
|
|
239
|
+
if (done) {
|
|
240
|
+
await fs.writeFile(commentsDonePath, JSON.stringify({
|
|
241
|
+
noteId,
|
|
242
|
+
keyword,
|
|
243
|
+
done: true,
|
|
244
|
+
reachedEnd: Boolean(commentsResult?.reachedEnd),
|
|
245
|
+
emptyState: Boolean(commentsResult?.emptyState),
|
|
246
|
+
stoppedByMaxComments: Boolean(commentsResult?.stoppedByMaxComments),
|
|
247
|
+
totalComments: comments.length,
|
|
248
|
+
headerTotal,
|
|
249
|
+
ts: new Date().toISOString(),
|
|
250
|
+
}, null, 2), 'utf-8');
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
await fs.unlink(commentsDonePath).catch(() => { });
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
success: true,
|
|
258
|
+
outputDir: postDir,
|
|
259
|
+
contentPath: detailPath,
|
|
260
|
+
imagesDir,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
catch (err) {
|
|
264
|
+
return {
|
|
265
|
+
success: false,
|
|
266
|
+
error: err?.message || String(err),
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
//# sourceMappingURL=PersistXhsNoteBlock.js.map
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Block: PickSinglePost
|
|
3
|
+
*
|
|
4
|
+
* 在 snapshot 中定位单条帖子容器
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* 定位单条帖子容器
|
|
8
|
+
*
|
|
9
|
+
* @param input - 输入参数
|
|
10
|
+
* @returns Promise<PickSinglePostOutput>
|
|
11
|
+
*/
|
|
12
|
+
export async function execute(input) {
|
|
13
|
+
const { sessionId, containerSelector, index = 0, serviceUrl = 'http://127.0.0.1:7704' } = input;
|
|
14
|
+
const commandUrl = `${serviceUrl}/command`;
|
|
15
|
+
try {
|
|
16
|
+
const evalRes = await fetch(commandUrl, {
|
|
17
|
+
method: 'POST',
|
|
18
|
+
headers: { 'Content-Type': 'application/json' },
|
|
19
|
+
body: JSON.stringify({
|
|
20
|
+
action: 'evaluate',
|
|
21
|
+
args: {
|
|
22
|
+
profileId: sessionId,
|
|
23
|
+
script: `
|
|
24
|
+
(() => {
|
|
25
|
+
const containers = document.querySelectorAll('${containerSelector}');
|
|
26
|
+
if (!containers || containers.length === 0) {
|
|
27
|
+
return { error: 'No containers found' };
|
|
28
|
+
}
|
|
29
|
+
const index = ${index};
|
|
30
|
+
if (index >= containers.length) {
|
|
31
|
+
return { error: 'Index out of range' };
|
|
32
|
+
}
|
|
33
|
+
const element = containers[index];
|
|
34
|
+
return {
|
|
35
|
+
tag: element.tagName,
|
|
36
|
+
classes: Array.from(element.classList),
|
|
37
|
+
text: element.textContent?.slice(0, 200),
|
|
38
|
+
html: element.outerHTML.slice(0, 500)
|
|
39
|
+
};
|
|
40
|
+
})()
|
|
41
|
+
`
|
|
42
|
+
}
|
|
43
|
+
})
|
|
44
|
+
});
|
|
45
|
+
const evalData = await evalRes.json();
|
|
46
|
+
if (!evalData.success || evalData.data?.error) {
|
|
47
|
+
return {
|
|
48
|
+
element: null,
|
|
49
|
+
index: 0,
|
|
50
|
+
containerId: '',
|
|
51
|
+
error: evalData.data?.error || 'Failed to pick element'
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
element: evalData.data,
|
|
56
|
+
index,
|
|
57
|
+
containerId: `post-${index}`
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
return {
|
|
62
|
+
element: null,
|
|
63
|
+
index: 0,
|
|
64
|
+
containerId: '',
|
|
65
|
+
error: `Pick error: ${error.message}`
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=PickSinglePost.js.map
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ProgressTracker - 任务进度持久化与恢复
|
|
3
|
+
*
|
|
4
|
+
* 功能:
|
|
5
|
+
* 1. 保存采集进度(已采集 noteId、关键词索引、搜索轮次)
|
|
6
|
+
* - P2.1: 增加容器维度去重(noteId + containerId),避免多容器路径指向同一 note 时重复采集
|
|
7
|
+
* - 向后兼容:旧版本进度文件只有 seenNoteIds,新版本增加 seenKeys
|
|
8
|
+
* 2. 支持断点续采(进程崩溃后可恢复)
|
|
9
|
+
* 3. 提供去重依据(seenNoteIds 集合)
|
|
10
|
+
* 4. 成功完成后自动清理进度文件
|
|
11
|
+
*/
|
|
12
|
+
import { promises as fs } from 'node:fs';
|
|
13
|
+
import path from 'node:path';
|
|
14
|
+
import { atomicWriteJson, readJsonMaybe } from '../../state/src/atomic-json.js';
|
|
15
|
+
export class ProgressTracker {
|
|
16
|
+
dataDir;
|
|
17
|
+
sessionId;
|
|
18
|
+
progressPath;
|
|
19
|
+
constructor(dataDir, sessionId) {
|
|
20
|
+
this.dataDir = dataDir;
|
|
21
|
+
this.sessionId = sessionId;
|
|
22
|
+
this.progressPath = path.join(dataDir, `.progress_${sessionId}.json`);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* 保存当前进度
|
|
26
|
+
*/
|
|
27
|
+
async save(state) {
|
|
28
|
+
const fullState = {
|
|
29
|
+
version: 1,
|
|
30
|
+
updatedAt: new Date().toISOString(),
|
|
31
|
+
...state
|
|
32
|
+
};
|
|
33
|
+
await atomicWriteJson(this.progressPath, fullState);
|
|
34
|
+
console.log(`[ProgressTracker] 进度已保存: ${state.collectedCount} 条, ` +
|
|
35
|
+
`keys=${state.seenKeys?.length || state.seenNoteIds?.length || 0}, ` +
|
|
36
|
+
`keywordIndex=${state.keywordIndex}, searchRound=${state.searchRound}`);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* 加载保存的进度
|
|
40
|
+
*
|
|
41
|
+
* 向后兼容策略:
|
|
42
|
+
* - 旧版本进度文件只有 seenNoteIds,新版本会自动填充 seenKeys(假设 containerId 为空)
|
|
43
|
+
* - 新版本进度文件同时保存 seenNoteIds 和 seenKeys
|
|
44
|
+
*/
|
|
45
|
+
async load() {
|
|
46
|
+
try {
|
|
47
|
+
const state = await readJsonMaybe(this.progressPath);
|
|
48
|
+
if (!state)
|
|
49
|
+
return null;
|
|
50
|
+
// 版本兼容性检查
|
|
51
|
+
if (state.version !== 1) {
|
|
52
|
+
console.warn(`[ProgressTracker] 不支持的进度版本: ${state.version}`);
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
// 向后兼容:如果没有 seenKeys,从 seenNoteIds 生成
|
|
56
|
+
if (!state.seenKeys && state.seenNoteIds) {
|
|
57
|
+
state.seenKeys = state.seenNoteIds.map(noteId => `${noteId}||`);
|
|
58
|
+
console.log(`[ProgressTracker] 兼容旧版本进度文件,从 seenNoteIds 生成 ${state.seenKeys.length} 个 seenKeys`);
|
|
59
|
+
}
|
|
60
|
+
console.log(`[ProgressTracker] 发现保存的进度: ${state.collectedCount} 条, 最后更新: ${state.updatedAt}`);
|
|
61
|
+
return state;
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
console.warn(`[ProgressTracker] 加载进度失败: ${err.message}`);
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* 生成去重键
|
|
70
|
+
*
|
|
71
|
+
* @param noteId - 笔记 ID
|
|
72
|
+
* @param containerId - 容器 ID(可选)
|
|
73
|
+
* @returns 去重键:noteId||containerId
|
|
74
|
+
*/
|
|
75
|
+
static makeDedupeKey(noteId, containerId) {
|
|
76
|
+
return `${noteId}||${containerId || ''}`;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* 解析去重键
|
|
80
|
+
*
|
|
81
|
+
* @param key - 去重键
|
|
82
|
+
* @returns DedupeEntry
|
|
83
|
+
*/
|
|
84
|
+
static parseDedupeKey(key) {
|
|
85
|
+
const [noteId, containerId] = key.split('||');
|
|
86
|
+
return {
|
|
87
|
+
noteId: noteId || '',
|
|
88
|
+
containerId: containerId || '',
|
|
89
|
+
key
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* 清理进度文件(任务成功完成后调用)
|
|
94
|
+
*/
|
|
95
|
+
async cleanup() {
|
|
96
|
+
try {
|
|
97
|
+
await fs.unlink(this.progressPath);
|
|
98
|
+
console.log('[ProgressTracker] 进度文件已清理');
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
if (err.code !== 'ENOENT') {
|
|
102
|
+
console.warn(`[ProgressTracker] 清理进度文件失败: ${err.message}`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* 检查是否存在保存的进度
|
|
108
|
+
*/
|
|
109
|
+
async exists() {
|
|
110
|
+
try {
|
|
111
|
+
await fs.access(this.progressPath);
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* 便捷函数:创建进度跟踪器
|
|
121
|
+
*/
|
|
122
|
+
export function createProgressTracker(dataDir, sessionId) {
|
|
123
|
+
return new ProgressTracker(dataDir, sessionId);
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=ProgressTracker.js.map
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RecordFixtureBlock
|
|
3
|
+
*
|
|
4
|
+
* 通用 fixture 录制 Block,将结构化数据写入用户目录:
|
|
5
|
+
* ~/.webauto/fixtures/{platform}/{category}-{id}.json
|
|
6
|
+
*/
|
|
7
|
+
import os from 'node:os';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
import { promises as fs } from 'node:fs';
|
|
10
|
+
export async function execute(input) {
|
|
11
|
+
const { platform, category, id, data } = input;
|
|
12
|
+
if (!platform || !category || !id) {
|
|
13
|
+
return {
|
|
14
|
+
success: false,
|
|
15
|
+
error: 'platform, category and id are required',
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
try {
|
|
19
|
+
const homeDir = os.homedir();
|
|
20
|
+
const baseDir = path.join(homeDir, '.webauto', 'fixtures', platform);
|
|
21
|
+
await fs.mkdir(baseDir, { recursive: true });
|
|
22
|
+
const filename = `${category}-${id}.json`;
|
|
23
|
+
const filePath = path.join(baseDir, filename);
|
|
24
|
+
const payload = {
|
|
25
|
+
platform,
|
|
26
|
+
category,
|
|
27
|
+
id,
|
|
28
|
+
capturedAt: new Date().toISOString(),
|
|
29
|
+
data,
|
|
30
|
+
};
|
|
31
|
+
await fs.writeFile(filePath, JSON.stringify(payload, null, 2), 'utf-8');
|
|
32
|
+
return {
|
|
33
|
+
success: true,
|
|
34
|
+
path: filePath,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
catch (err) {
|
|
38
|
+
return {
|
|
39
|
+
success: false,
|
|
40
|
+
error: err?.message || String(err),
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=RecordFixtureBlock.js.map
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Block: RenderMarkdown
|
|
3
|
+
*
|
|
4
|
+
* 渲染 Markdown 输出
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* 渲染 Markdown
|
|
8
|
+
*
|
|
9
|
+
* @param input - 输入参数
|
|
10
|
+
* @returns RenderMarkdownOutput
|
|
11
|
+
*/
|
|
12
|
+
export async function execute(input) {
|
|
13
|
+
const { posts, template = 'default' } = input;
|
|
14
|
+
if (!posts || !Array.isArray(posts)) {
|
|
15
|
+
return {
|
|
16
|
+
markdown: '',
|
|
17
|
+
count: 0,
|
|
18
|
+
error: 'Invalid posts data'
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
const lines = [];
|
|
22
|
+
lines.push(`# 微博采集结果`);
|
|
23
|
+
lines.push(`采集时间: ${new Date().toISOString()}`);
|
|
24
|
+
lines.push(`总计: ${posts.length} 条`);
|
|
25
|
+
lines.push('');
|
|
26
|
+
posts.forEach((post, index) => {
|
|
27
|
+
lines.push(`## ${index + 1}`);
|
|
28
|
+
if (post.author) {
|
|
29
|
+
lines.push(`**作者**: ${post.author}`);
|
|
30
|
+
}
|
|
31
|
+
if (post.time) {
|
|
32
|
+
lines.push(`**时间**: ${post.time}`);
|
|
33
|
+
}
|
|
34
|
+
if (post.postLinks && post.postLinks.length > 0) {
|
|
35
|
+
lines.push(`**帖子链接**: ${post.postLinks[0]}`);
|
|
36
|
+
}
|
|
37
|
+
lines.push('');
|
|
38
|
+
lines.push(post.content || '(无内容)');
|
|
39
|
+
lines.push('');
|
|
40
|
+
lines.push('---');
|
|
41
|
+
lines.push('');
|
|
42
|
+
});
|
|
43
|
+
return {
|
|
44
|
+
markdown: lines.join('\n'),
|
|
45
|
+
count: posts.length
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=RenderMarkdown.js.map
|