@jackwener/opencli 1.6.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/CONTRIBUTING.md +1 -1
- package/README.md +27 -45
- package/README.zh-CN.md +32 -34
- package/autoresearch/browse-tasks.json +18 -20
- package/autoresearch/commands/debug.ts +163 -0
- package/autoresearch/commands/fix.ts +145 -0
- package/autoresearch/commands/plan.ts +88 -0
- package/autoresearch/commands/run.ts +138 -0
- package/autoresearch/config.ts +82 -0
- package/autoresearch/engine.ts +359 -0
- package/autoresearch/eval-all.ts +127 -0
- package/autoresearch/eval-browse.ts +1 -1
- package/autoresearch/eval-publish.ts +238 -0
- package/autoresearch/eval-save.ts +249 -0
- package/autoresearch/eval-skill.ts +14 -8
- package/autoresearch/eval-v2ex.ts +220 -0
- package/autoresearch/eval-zhihu.ts +230 -0
- package/autoresearch/logger.ts +69 -0
- package/autoresearch/presets/combined-reliability.ts +27 -0
- package/autoresearch/presets/index.ts +23 -0
- package/autoresearch/presets/operate-reliability.ts +24 -0
- package/autoresearch/presets/save-reliability.ts +26 -0
- package/autoresearch/presets/skill-quality.ts +20 -0
- package/autoresearch/presets/v2ex-reliability.ts +24 -0
- package/autoresearch/presets/zhihu-reliability.ts +25 -0
- package/autoresearch/publish-tasks.json +345 -0
- package/autoresearch/run-save.sh +11 -0
- package/autoresearch/save-adapters/xhs-explore-deep.ts +64 -0
- package/autoresearch/save-adapters/xhs-note-comments.ts +61 -0
- package/autoresearch/save-adapters/xhs-search-full.ts +62 -0
- package/autoresearch/save-adapters/zhihu-hot-detail.ts +52 -0
- package/autoresearch/save-adapters/zhihu-question-full.ts +57 -0
- package/autoresearch/save-adapters/zhihu-search-detail.ts +53 -0
- package/autoresearch/save-tasks.json +281 -0
- package/autoresearch/v2ex-tasks.json +899 -0
- package/autoresearch/zhihu-tasks.json +848 -0
- package/bun.lock +615 -0
- package/dist/browser/base-page.d.ts +4 -2
- package/dist/browser/base-page.js +37 -4
- package/dist/browser/bridge.js +10 -8
- package/dist/browser/cdp.js +2 -6
- package/dist/browser/daemon-client.d.ts +11 -1
- package/dist/browser/daemon-client.js +3 -0
- package/dist/browser/dom-helpers.d.ts +4 -2
- package/dist/browser/dom-helpers.js +42 -31
- package/dist/browser/dom-snapshot.js +23 -1
- package/dist/browser/page.d.ts +7 -2
- package/dist/browser/page.js +112 -30
- package/dist/browser.test.js +1 -1
- package/dist/build-manifest.d.ts +1 -0
- package/dist/build-manifest.js +1 -0
- package/dist/cli-manifest.json +1133 -182
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +48 -7
- package/dist/cli.test.d.ts +1 -0
- package/dist/cli.test.js +88 -0
- package/dist/clis/1688/item.d.ts +70 -0
- package/dist/clis/1688/item.js +187 -0
- package/dist/clis/1688/item.test.d.ts +1 -0
- package/dist/clis/1688/item.test.js +67 -0
- package/dist/clis/1688/search.d.ts +56 -0
- package/dist/clis/1688/search.js +309 -0
- package/dist/clis/1688/search.test.d.ts +1 -0
- package/dist/clis/1688/search.test.js +75 -0
- package/dist/clis/1688/shared.d.ts +112 -0
- package/dist/clis/1688/shared.js +514 -0
- package/dist/clis/1688/shared.test.d.ts +1 -0
- package/dist/clis/1688/shared.test.js +57 -0
- package/dist/clis/1688/store.d.ts +45 -0
- package/dist/clis/1688/store.js +226 -0
- package/dist/clis/1688/store.test.d.ts +1 -0
- package/dist/clis/1688/store.test.js +62 -0
- package/dist/clis/amazon/bestsellers.d.ts +0 -20
- package/dist/clis/amazon/bestsellers.js +6 -129
- package/dist/clis/amazon/bestsellers.test.js +12 -3
- package/dist/clis/amazon/movers-shakers.d.ts +1 -0
- package/dist/clis/amazon/movers-shakers.js +7 -0
- package/dist/clis/amazon/new-releases.d.ts +1 -0
- package/dist/clis/amazon/new-releases.js +7 -0
- package/dist/clis/amazon/rankings.d.ts +59 -0
- package/dist/clis/amazon/rankings.js +226 -0
- package/dist/clis/amazon/rankings.test.d.ts +1 -0
- package/dist/clis/amazon/rankings.test.js +41 -0
- package/dist/clis/amazon/shared.d.ts +11 -0
- package/dist/clis/amazon/shared.js +121 -11
- package/dist/clis/amazon/shared.test.js +11 -0
- package/dist/clis/bilibili/comments.js +2 -2
- package/dist/clis/bilibili/comments.test.js +3 -2
- package/dist/clis/bilibili/download.js +2 -1
- package/dist/clis/bilibili/subtitle.js +4 -3
- package/dist/clis/bilibili/subtitle.test.js +2 -1
- package/dist/clis/bilibili/utils.d.ts +5 -0
- package/dist/clis/bilibili/utils.js +30 -0
- package/dist/clis/bilibili/utils.test.d.ts +1 -0
- package/dist/clis/bilibili/utils.test.js +17 -0
- package/dist/clis/douban/marks.js +1 -1
- package/dist/clis/douban/subject.yaml +50 -19
- package/dist/clis/doubao/utils.js +32 -12
- package/dist/clis/douyin/_shared/browser-fetch.test.js +0 -1
- package/dist/clis/douyin/_shared/transcode.test.js +0 -2
- package/dist/clis/douyin/draft.test.js +0 -2
- package/dist/clis/facebook/search.test.js +0 -2
- package/dist/clis/gemini/ask.js +9 -3
- package/dist/clis/gemini/ask.test.d.ts +1 -0
- package/dist/clis/gemini/ask.test.js +100 -0
- package/dist/clis/gemini/reply-state.test.d.ts +1 -0
- package/dist/clis/gemini/reply-state.test.js +641 -0
- package/dist/clis/gemini/utils.d.ts +44 -1
- package/dist/clis/gemini/utils.js +528 -61
- package/dist/clis/gemini/utils.test.js +149 -2
- package/dist/clis/hupu/detail.d.ts +1 -0
- package/dist/clis/hupu/detail.js +72 -0
- package/dist/clis/hupu/hot.yaml +43 -0
- package/dist/clis/hupu/like.d.ts +1 -0
- package/dist/clis/hupu/like.js +75 -0
- package/dist/clis/hupu/reply.d.ts +1 -0
- package/dist/clis/hupu/reply.js +71 -0
- package/dist/clis/hupu/search.d.ts +1 -0
- package/dist/clis/hupu/search.js +59 -0
- package/dist/clis/hupu/unlike.d.ts +1 -0
- package/dist/clis/hupu/unlike.js +75 -0
- package/dist/clis/hupu/utils.d.ts +20 -0
- package/dist/clis/hupu/utils.js +319 -0
- package/dist/clis/instagram/_shared/private-publish.d.ts +138 -0
- package/dist/clis/instagram/_shared/private-publish.js +1030 -0
- package/dist/clis/instagram/_shared/private-publish.test.d.ts +1 -0
- package/dist/clis/instagram/_shared/private-publish.test.js +705 -0
- package/dist/clis/instagram/_shared/protocol-capture.d.ts +26 -0
- package/dist/clis/instagram/_shared/protocol-capture.js +282 -0
- package/dist/clis/instagram/_shared/protocol-capture.test.d.ts +1 -0
- package/dist/clis/instagram/_shared/protocol-capture.test.js +114 -0
- package/dist/clis/instagram/_shared/runtime-info.d.ts +9 -0
- package/dist/clis/instagram/_shared/runtime-info.js +81 -0
- package/dist/clis/instagram/note.d.ts +1 -0
- package/dist/clis/instagram/note.js +222 -0
- package/dist/clis/instagram/note.test.d.ts +1 -0
- package/dist/clis/instagram/note.test.js +81 -0
- package/dist/clis/instagram/post.d.ts +4 -0
- package/dist/clis/instagram/post.js +1496 -0
- package/dist/clis/instagram/post.test.d.ts +1 -0
- package/dist/clis/instagram/post.test.js +1647 -0
- package/dist/clis/instagram/reel.d.ts +1 -0
- package/dist/clis/instagram/reel.js +826 -0
- package/dist/clis/instagram/reel.test.d.ts +1 -0
- package/dist/clis/instagram/reel.test.js +167 -0
- package/dist/clis/instagram/story.d.ts +1 -0
- package/dist/clis/instagram/story.js +115 -0
- package/dist/clis/instagram/story.test.d.ts +1 -0
- package/dist/clis/instagram/story.test.js +167 -0
- package/dist/clis/sinafinance/stock-rank.d.ts +4 -0
- package/dist/clis/sinafinance/stock-rank.js +65 -0
- package/dist/clis/substack/utils.test.js +0 -2
- package/dist/clis/twitter/post.js +72 -45
- package/dist/clis/twitter/post.test.d.ts +1 -0
- package/dist/clis/twitter/post.test.js +116 -0
- package/dist/clis/twitter/reply.d.ts +12 -0
- package/dist/clis/twitter/reply.js +257 -35
- package/dist/clis/twitter/reply.test.d.ts +1 -0
- package/dist/clis/twitter/reply.test.js +151 -0
- package/dist/clis/twitter/search.js +67 -5
- package/dist/clis/twitter/search.test.js +83 -5
- package/dist/clis/xianyu/chat.d.ts +7 -0
- package/dist/clis/xianyu/chat.js +146 -0
- package/dist/clis/xianyu/chat.test.d.ts +1 -0
- package/dist/clis/xianyu/chat.test.js +15 -0
- package/dist/clis/xianyu/item.d.ts +7 -0
- package/dist/clis/xianyu/item.js +152 -0
- package/dist/clis/xianyu/item.test.d.ts +1 -0
- package/dist/clis/xianyu/item.test.js +56 -0
- package/dist/clis/xianyu/search.d.ts +10 -0
- package/dist/clis/xianyu/search.js +134 -0
- package/dist/clis/xianyu/search.test.d.ts +1 -0
- package/dist/clis/xianyu/search.test.js +17 -0
- package/dist/clis/xianyu/utils.d.ts +1 -0
- package/dist/clis/xianyu/utils.js +8 -0
- package/dist/clis/xiaoe/catalog.yaml +129 -0
- package/dist/clis/xiaoe/content.yaml +43 -0
- package/dist/clis/xiaoe/courses.yaml +73 -0
- package/dist/clis/xiaoe/detail.yaml +39 -0
- package/dist/clis/xiaoe/play-url.yaml +124 -0
- package/dist/clis/xiaohongshu/comments.test.js +0 -2
- package/dist/clis/xiaohongshu/creator-note-detail.test.js +0 -2
- package/dist/clis/xiaohongshu/creator-notes.test.js +0 -2
- package/dist/clis/xiaohongshu/download.test.js +0 -2
- package/dist/clis/xiaohongshu/note.test.js +0 -2
- package/dist/clis/xiaohongshu/publish.test.js +0 -2
- package/dist/clis/xiaohongshu/search.js +29 -20
- package/dist/clis/xiaohongshu/search.test.js +56 -48
- package/dist/clis/yuanbao/ask.d.ts +21 -0
- package/dist/clis/yuanbao/ask.js +427 -0
- package/dist/clis/yuanbao/ask.test.d.ts +1 -0
- package/dist/clis/yuanbao/ask.test.js +124 -0
- package/dist/clis/yuanbao/new.d.ts +1 -0
- package/dist/clis/yuanbao/new.js +70 -0
- package/dist/clis/yuanbao/new.test.d.ts +1 -0
- package/dist/clis/yuanbao/new.test.js +30 -0
- package/dist/clis/yuanbao/shared.d.ts +13 -0
- package/dist/clis/yuanbao/shared.js +49 -0
- package/dist/clis/zhihu/question.js +30 -19
- package/dist/clis/zhihu/question.test.js +34 -16
- package/dist/commanderAdapter.js +8 -4
- package/dist/commanderAdapter.test.js +42 -0
- package/dist/completion.js +3 -1
- package/dist/completion.test.d.ts +1 -0
- package/dist/completion.test.js +23 -0
- package/dist/doctor.js +1 -1
- package/dist/electron-apps.d.ts +2 -0
- package/dist/electron-apps.js +7 -1
- package/dist/errors.js +1 -1
- package/dist/execution.js +25 -35
- package/dist/explore.js +1 -1
- package/dist/launcher.d.ts +4 -0
- package/dist/launcher.js +64 -8
- package/dist/launcher.test.js +88 -7
- package/dist/output.d.ts +2 -0
- package/dist/output.js +10 -1
- package/dist/output.test.d.ts +0 -3
- package/dist/output.test.js +59 -92
- package/dist/pipeline/executor.test.js +0 -2
- package/dist/pipeline/steps/download.test.js +0 -2
- package/dist/registry.d.ts +2 -0
- package/dist/serialization.d.ts +1 -0
- package/dist/serialization.js +1 -0
- package/dist/types.d.ts +9 -2
- package/docs/.vitepress/config.mts +4 -0
- package/docs/adapters/browser/1688.md +52 -0
- package/docs/adapters/browser/36kr.md +2 -1
- package/docs/adapters/browser/doubao.md +5 -1
- package/docs/adapters/browser/hupu.md +53 -0
- package/docs/adapters/browser/sinafinance.md +32 -2
- package/docs/adapters/browser/weibo.md +6 -1
- package/docs/adapters/browser/wikipedia.md +2 -0
- package/docs/adapters/browser/xianyu.md +42 -0
- package/docs/adapters/browser/xiaoe.md +44 -0
- package/docs/adapters/browser/yuanbao.md +64 -0
- package/docs/adapters/index.md +14 -5
- package/docs/comparison.md +1 -1
- package/docs/developer/ai-workflow.md +2 -2
- package/docs/developer/contributing.md +1 -1
- package/docs/developer/testing.md +2 -0
- package/docs/guide/plugins.md +1 -0
- package/docs/guide/troubleshooting.md +11 -0
- package/docs/superpowers/specs/2026-04-03-v2ex-autoresearch-design.md +41 -0
- package/docs/zh/guide/plugins.md +1 -0
- package/extension/dist/background.js +1127 -0
- package/extension/src/background.test.ts +39 -0
- package/extension/src/background.ts +223 -34
- package/extension/src/cdp.ts +194 -4
- package/extension/src/protocol.ts +22 -1
- package/package.json +3 -2
- package/scripts/postinstall.js +1 -1
- package/skills/opencli-explorer/SKILL.md +1 -1
- package/skills/opencli-oneshot/SKILL.md +2 -2
- package/skills/opencli-operate/SKILL.md +120 -27
- package/skills/opencli-usage/SKILL.md +31 -20
- package/skills/opencli-usage/browser.md +114 -16
- package/skills/opencli-usage/public-api.md +32 -3
- package/skills/smart-search/SKILL.md +156 -0
- package/skills/smart-search/references/sources-ai.md +74 -0
- package/skills/smart-search/references/sources-info.md +43 -0
- package/skills/smart-search/references/sources-media.md +50 -0
- package/skills/smart-search/references/sources-other.md +42 -0
- package/skills/smart-search/references/sources-shopping.md +31 -0
- package/skills/smart-search/references/sources-social.md +51 -0
- package/skills/smart-search/references/sources-tech.md +42 -0
- package/skills/smart-search/references/sources-travel.md +20 -0
- package/src/browser/base-page.ts +41 -6
- package/src/browser/bridge.ts +11 -8
- package/src/browser/cdp.ts +1 -8
- package/src/browser/daemon-client.ts +11 -1
- package/src/browser/dom-helpers.ts +43 -31
- package/src/browser/dom-snapshot.ts +23 -1
- package/src/browser/page.ts +115 -31
- package/src/browser.test.ts +1 -1
- package/src/build-manifest.ts +2 -0
- package/src/cli.test.ts +133 -0
- package/src/cli.ts +73 -11
- package/src/clis/1688/item.test.ts +69 -0
- package/src/clis/1688/item.ts +282 -0
- package/src/clis/1688/search.test.ts +81 -0
- package/src/clis/1688/search.ts +402 -0
- package/src/clis/1688/shared.test.ts +75 -0
- package/src/clis/1688/shared.ts +623 -0
- package/src/clis/1688/store.test.ts +69 -0
- package/src/clis/1688/store.ts +300 -0
- package/src/clis/amazon/bestsellers.test.ts +12 -3
- package/src/clis/amazon/bestsellers.ts +6 -178
- package/src/clis/amazon/movers-shakers.ts +8 -0
- package/src/clis/amazon/new-releases.ts +8 -0
- package/src/clis/amazon/rankings.test.ts +47 -0
- package/src/clis/amazon/rankings.ts +312 -0
- package/src/clis/amazon/shared.test.ts +16 -0
- package/src/clis/amazon/shared.ts +134 -12
- package/src/clis/bilibili/comments.test.ts +4 -3
- package/src/clis/bilibili/comments.ts +2 -2
- package/src/clis/bilibili/download.ts +2 -1
- package/src/clis/bilibili/subtitle.test.ts +2 -1
- package/src/clis/bilibili/subtitle.ts +4 -3
- package/src/clis/bilibili/utils.test.ts +21 -0
- package/src/clis/bilibili/utils.ts +27 -0
- package/src/clis/douban/marks.ts +1 -1
- package/src/clis/douban/subject.yaml +50 -19
- package/src/clis/doubao/utils.ts +32 -12
- package/src/clis/douyin/_shared/browser-fetch.test.ts +0 -1
- package/src/clis/douyin/_shared/transcode.test.ts +0 -2
- package/src/clis/douyin/draft.test.ts +0 -2
- package/src/clis/facebook/search.test.ts +0 -2
- package/src/clis/gemini/ask.test.ts +116 -0
- package/src/clis/gemini/ask.ts +10 -3
- package/src/clis/gemini/reply-state.test.ts +708 -0
- package/src/clis/gemini/utils.test.ts +184 -2
- package/src/clis/gemini/utils.ts +588 -60
- package/src/clis/hupu/detail.ts +126 -0
- package/src/clis/hupu/hot.yaml +43 -0
- package/src/clis/hupu/like.ts +76 -0
- package/src/clis/hupu/reply.ts +76 -0
- package/src/clis/hupu/search.ts +95 -0
- package/src/clis/hupu/unlike.ts +76 -0
- package/src/clis/hupu/utils.ts +381 -0
- package/src/clis/instagram/_shared/private-publish.test.ts +827 -0
- package/src/clis/instagram/_shared/private-publish.ts +1303 -0
- package/src/clis/instagram/_shared/protocol-capture.test.ts +148 -0
- package/src/clis/instagram/_shared/protocol-capture.ts +321 -0
- package/src/clis/instagram/_shared/runtime-info.ts +91 -0
- package/src/clis/instagram/note.test.ts +96 -0
- package/src/clis/instagram/note.ts +254 -0
- package/src/clis/instagram/post.test.ts +1716 -0
- package/src/clis/instagram/post.ts +1620 -0
- package/src/clis/instagram/reel.test.ts +191 -0
- package/src/clis/instagram/reel.ts +886 -0
- package/src/clis/instagram/story.test.ts +191 -0
- package/src/clis/instagram/story.ts +151 -0
- package/src/clis/sinafinance/stock-rank.ts +68 -0
- package/src/clis/substack/utils.test.ts +0 -2
- package/src/clis/twitter/post.test.ts +157 -0
- package/src/clis/twitter/post.ts +82 -48
- package/src/clis/twitter/reply.test.ts +177 -0
- package/src/clis/twitter/reply.ts +285 -39
- package/src/clis/twitter/search.test.ts +88 -5
- package/src/clis/twitter/search.ts +68 -5
- package/src/clis/xianyu/chat.test.ts +20 -0
- package/src/clis/xianyu/chat.ts +175 -0
- package/src/clis/xianyu/item.test.ts +67 -0
- package/src/clis/xianyu/item.ts +172 -0
- package/src/clis/xianyu/search.test.ts +22 -0
- package/src/clis/xianyu/search.ts +151 -0
- package/src/clis/xianyu/utils.ts +9 -0
- package/src/clis/xiaoe/catalog.yaml +129 -0
- package/src/clis/xiaoe/content.yaml +43 -0
- package/src/clis/xiaoe/courses.yaml +73 -0
- package/src/clis/xiaoe/detail.yaml +39 -0
- package/src/clis/xiaoe/play-url.yaml +124 -0
- package/src/clis/xiaohongshu/comments.test.ts +0 -2
- package/src/clis/xiaohongshu/creator-note-detail.test.ts +0 -2
- package/src/clis/xiaohongshu/creator-notes.test.ts +0 -2
- package/src/clis/xiaohongshu/download.test.ts +0 -2
- package/src/clis/xiaohongshu/note.test.ts +0 -2
- package/src/clis/xiaohongshu/publish.test.ts +0 -2
- package/src/clis/xiaohongshu/search.test.ts +59 -48
- package/src/clis/xiaohongshu/search.ts +31 -21
- package/src/clis/yuanbao/ask.test.ts +156 -0
- package/src/clis/yuanbao/ask.ts +522 -0
- package/src/clis/yuanbao/new.test.ts +36 -0
- package/src/clis/yuanbao/new.ts +81 -0
- package/src/clis/yuanbao/shared.ts +57 -0
- package/src/clis/zhihu/question.test.ts +42 -17
- package/src/clis/zhihu/question.ts +31 -26
- package/src/commanderAdapter.test.ts +51 -0
- package/src/commanderAdapter.ts +8 -4
- package/src/completion.test.ts +30 -0
- package/src/completion.ts +3 -1
- package/src/doctor.ts +1 -1
- package/src/electron-apps.ts +9 -1
- package/src/errors.ts +1 -1
- package/src/execution.ts +26 -30
- package/src/explore.ts +1 -1
- package/src/launcher.test.ts +121 -7
- package/src/launcher.ts +87 -9
- package/src/output.test.ts +50 -90
- package/src/output.ts +10 -1
- package/src/pipeline/executor.test.ts +0 -2
- package/src/pipeline/steps/download.test.ts +0 -2
- package/src/registry.ts +2 -0
- package/src/serialization.ts +2 -0
- package/src/types.ts +9 -2
- package/tests/e2e/browser-auth.test.ts +9 -0
- package/CLI-EXPLORER.md +0 -724
- package/CLI-ONESHOT.md +0 -216
- package/SKILL.md +0 -59
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { CommandExecutionError } from '../../errors.js';
|
|
2
|
+
import { Strategy } from '../../registry.js';
|
|
3
|
+
import { assertUsableState, buildProvenance, cleanText, extractAsin, extractCategoryNodeId, extractReviewCountFromCardText, firstMeaningfulLine, gotoAndReadState, isRankingPaginationUrl, normalizeProductUrl, parsePriceText, parseRatingValue, parseReviewCount, resolveRankingUrl, toAbsoluteAmazonUrl, uniqueNonEmpty, } from './shared.js';
|
|
4
|
+
function parseRank(rawRank, fallback) {
|
|
5
|
+
const normalized = cleanText(rawRank);
|
|
6
|
+
const match = normalized.match(/(\d{1,4})/);
|
|
7
|
+
if (!match)
|
|
8
|
+
return fallback;
|
|
9
|
+
const parsed = Number.parseInt(match[1], 10);
|
|
10
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
|
11
|
+
}
|
|
12
|
+
function normalizeVisibleCategoryLinks(links) {
|
|
13
|
+
const normalized = (links ?? [])
|
|
14
|
+
.map((entry) => ({
|
|
15
|
+
title: cleanText(entry?.title),
|
|
16
|
+
url: toAbsoluteAmazonUrl(entry?.url) ?? '',
|
|
17
|
+
node_id: cleanText(entry?.node_id) || extractCategoryNodeId(entry?.url) || null,
|
|
18
|
+
}))
|
|
19
|
+
.filter((entry) => Boolean(entry.title) && Boolean(entry.url));
|
|
20
|
+
const seen = new Set();
|
|
21
|
+
const deduped = [];
|
|
22
|
+
for (const entry of normalized) {
|
|
23
|
+
if (seen.has(entry.url))
|
|
24
|
+
continue;
|
|
25
|
+
seen.add(entry.url);
|
|
26
|
+
deduped.push(entry);
|
|
27
|
+
}
|
|
28
|
+
return deduped;
|
|
29
|
+
}
|
|
30
|
+
export function normalizeRankingCandidate(candidate, context) {
|
|
31
|
+
const productUrl = normalizeProductUrl(candidate.href);
|
|
32
|
+
const asin = extractAsin(candidate.asin ?? '') ?? extractAsin(productUrl ?? '') ?? null;
|
|
33
|
+
const title = cleanText(candidate.title) || firstMeaningfulLine(candidate.card_text);
|
|
34
|
+
const price = parsePriceText(cleanText(candidate.price_text) || candidate.card_text);
|
|
35
|
+
const ratingText = cleanText(candidate.rating_text) || null;
|
|
36
|
+
const reviewCountText = cleanText(candidate.review_count_text)
|
|
37
|
+
|| extractReviewCountFromCardText(candidate.card_text)
|
|
38
|
+
|| null;
|
|
39
|
+
const provenance = buildProvenance(context.sourceUrl);
|
|
40
|
+
const categoryUrl = context.categoryUrl || context.sourceUrl;
|
|
41
|
+
return {
|
|
42
|
+
list_type: context.listType,
|
|
43
|
+
rank: parseRank(candidate.rank_text, context.rankFallback),
|
|
44
|
+
asin,
|
|
45
|
+
title: title || null,
|
|
46
|
+
product_url: productUrl,
|
|
47
|
+
price_text: price.price_text,
|
|
48
|
+
price_value: price.price_value,
|
|
49
|
+
currency: price.currency,
|
|
50
|
+
rating_text: ratingText,
|
|
51
|
+
rating_value: parseRatingValue(ratingText),
|
|
52
|
+
review_count_text: reviewCountText,
|
|
53
|
+
review_count: parseReviewCount(reviewCountText),
|
|
54
|
+
list_title: context.listTitle,
|
|
55
|
+
category_title: context.categoryTitle,
|
|
56
|
+
category_url: categoryUrl,
|
|
57
|
+
category_node_id: extractCategoryNodeId(categoryUrl),
|
|
58
|
+
category_path: context.categoryPath,
|
|
59
|
+
visible_category_links: context.visibleCategoryLinks,
|
|
60
|
+
...provenance,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
async function readRankingPage(page, listType, url) {
|
|
64
|
+
const state = await gotoAndReadState(page, url, 2500, listType);
|
|
65
|
+
assertUsableState(state, listType);
|
|
66
|
+
return await page.evaluate(`
|
|
67
|
+
(() => ({
|
|
68
|
+
href: window.location.href,
|
|
69
|
+
title: document.title || '',
|
|
70
|
+
list_title:
|
|
71
|
+
document.querySelector('#zg_banner_text')?.textContent
|
|
72
|
+
|| document.querySelector('h1')?.textContent
|
|
73
|
+
|| '',
|
|
74
|
+
category_title:
|
|
75
|
+
document.querySelector('#zg_browseRoot .zg_selected')?.textContent
|
|
76
|
+
|| document.querySelector('#wayfinding-breadcrumbs_feature_div ul li:last-child')?.textContent
|
|
77
|
+
|| document.querySelector('#wayfinding-breadcrumbs_container ul li:last-child')?.textContent
|
|
78
|
+
|| '',
|
|
79
|
+
category_path: Array.from(document.querySelectorAll(
|
|
80
|
+
'#zg_browseRoot ul li a, #zg_browseRoot ul li span, ' +
|
|
81
|
+
'#wayfinding-breadcrumbs_feature_div ul li a, #wayfinding-breadcrumbs_feature_div ul li span.a-list-item, ' +
|
|
82
|
+
'#wayfinding-breadcrumbs_container ul li a, #wayfinding-breadcrumbs_container ul li span.a-list-item'
|
|
83
|
+
))
|
|
84
|
+
.map((entry) => (entry.textContent || '').trim())
|
|
85
|
+
.filter(Boolean),
|
|
86
|
+
cards: Array.from(document.querySelectorAll(
|
|
87
|
+
'.p13n-sc-uncoverable-faceout, .zg-grid-general-faceout, [data-asin][class*="p13n"]'
|
|
88
|
+
)).map((card) => ({
|
|
89
|
+
rank_text:
|
|
90
|
+
card.querySelector('.zg-bdg-text')?.textContent
|
|
91
|
+
|| card.querySelector('[class*="rank"]')?.textContent
|
|
92
|
+
|| '',
|
|
93
|
+
asin:
|
|
94
|
+
card.getAttribute('data-asin')
|
|
95
|
+
|| card.getAttribute('id')
|
|
96
|
+
|| '',
|
|
97
|
+
title:
|
|
98
|
+
card.querySelector('[class*="line-clamp"]')?.textContent
|
|
99
|
+
|| card.querySelector('img')?.getAttribute('alt')
|
|
100
|
+
|| card.querySelector('a[href*="/dp/"]')?.textContent
|
|
101
|
+
|| '',
|
|
102
|
+
href:
|
|
103
|
+
card.querySelector('a[href*="/dp/"], a[href*="/gp/product/"]')?.href
|
|
104
|
+
|| '',
|
|
105
|
+
price_text:
|
|
106
|
+
card.querySelector('.a-price .a-offscreen')?.textContent
|
|
107
|
+
|| card.querySelector('.a-color-price')?.textContent
|
|
108
|
+
|| '',
|
|
109
|
+
rating_text:
|
|
110
|
+
card.querySelector('[aria-label*="out of 5 stars"]')?.getAttribute('aria-label')
|
|
111
|
+
|| '',
|
|
112
|
+
review_count_text:
|
|
113
|
+
card.querySelector('a[href*="#customerReviews"]')?.textContent
|
|
114
|
+
|| card.querySelector('.a-size-small')?.textContent
|
|
115
|
+
|| '',
|
|
116
|
+
card_text: card.innerText || '',
|
|
117
|
+
})),
|
|
118
|
+
page_links: Array.from(document.querySelectorAll('.a-pagination a[href], li.a-normal a[href], li.a-selected a[href]'))
|
|
119
|
+
.map((anchor) => anchor.href || '')
|
|
120
|
+
.filter(Boolean),
|
|
121
|
+
visible_category_links: Array.from(document.querySelectorAll(
|
|
122
|
+
'#zg_browseRoot a[href], #zg-left-col a[href], [class*="zg-browse"] a[href]'
|
|
123
|
+
)).map((anchor) => ({
|
|
124
|
+
title: (anchor.textContent || '').trim(),
|
|
125
|
+
url: anchor.href || '',
|
|
126
|
+
node_id:
|
|
127
|
+
anchor.getAttribute('data-node-id')
|
|
128
|
+
|| anchor.dataset?.nodeid
|
|
129
|
+
|| '',
|
|
130
|
+
}))
|
|
131
|
+
.filter((entry) => entry.title && entry.url),
|
|
132
|
+
}))()
|
|
133
|
+
`);
|
|
134
|
+
}
|
|
135
|
+
function createEmptyResultHint(commandName) {
|
|
136
|
+
return [
|
|
137
|
+
`Open the same Amazon ${commandName} page in shared Chrome and verify ranked items are visible.`,
|
|
138
|
+
'If the page shows a robot check, clear it manually and retry.',
|
|
139
|
+
].join(' ');
|
|
140
|
+
}
|
|
141
|
+
export function createRankingCliOptions(definition) {
|
|
142
|
+
return {
|
|
143
|
+
site: 'amazon',
|
|
144
|
+
name: definition.commandName,
|
|
145
|
+
description: definition.description,
|
|
146
|
+
domain: 'amazon.com',
|
|
147
|
+
strategy: Strategy.COOKIE,
|
|
148
|
+
navigateBefore: false,
|
|
149
|
+
args: [
|
|
150
|
+
{
|
|
151
|
+
name: 'input',
|
|
152
|
+
positional: true,
|
|
153
|
+
help: 'Ranking URL or supported Amazon path. Omit to use the list root.',
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
name: 'limit',
|
|
157
|
+
type: 'int',
|
|
158
|
+
default: 100,
|
|
159
|
+
help: 'Maximum number of ranked items to return (default 100)',
|
|
160
|
+
},
|
|
161
|
+
],
|
|
162
|
+
columns: ['list_type', 'rank', 'asin', 'title', 'price_text', 'rating_value', 'review_count'],
|
|
163
|
+
func: async (page, kwargs) => {
|
|
164
|
+
const limit = Math.max(1, Number(kwargs.limit) || 100);
|
|
165
|
+
const initialUrl = resolveRankingUrl(definition.listType, typeof kwargs.input === 'string' ? kwargs.input : undefined);
|
|
166
|
+
const queue = [initialUrl];
|
|
167
|
+
const visited = new Set();
|
|
168
|
+
const seenEntityKeys = new Set();
|
|
169
|
+
const results = [];
|
|
170
|
+
let listTitle = null;
|
|
171
|
+
while (queue.length > 0 && results.length < limit) {
|
|
172
|
+
const nextUrl = queue.shift();
|
|
173
|
+
if (visited.has(nextUrl))
|
|
174
|
+
continue;
|
|
175
|
+
visited.add(nextUrl);
|
|
176
|
+
const payload = await readRankingPage(page, definition.listType, nextUrl);
|
|
177
|
+
const sourceUrl = cleanText(payload.href) || nextUrl;
|
|
178
|
+
listTitle = cleanText(payload.list_title) || cleanText(payload.title) || listTitle;
|
|
179
|
+
const categoryPath = uniqueNonEmpty(payload.category_path ?? []);
|
|
180
|
+
const categoryTitle = cleanText(payload.category_title)
|
|
181
|
+
|| (categoryPath.length > 0 ? categoryPath[categoryPath.length - 1] : '');
|
|
182
|
+
const visibleCategoryLinks = normalizeVisibleCategoryLinks(payload.visible_category_links);
|
|
183
|
+
const cards = payload.cards ?? [];
|
|
184
|
+
for (const card of cards) {
|
|
185
|
+
const normalized = normalizeRankingCandidate(card, {
|
|
186
|
+
listType: definition.listType,
|
|
187
|
+
rankFallback: results.length + 1,
|
|
188
|
+
listTitle,
|
|
189
|
+
sourceUrl,
|
|
190
|
+
categoryTitle: categoryTitle || null,
|
|
191
|
+
categoryUrl: sourceUrl,
|
|
192
|
+
categoryPath,
|
|
193
|
+
visibleCategoryLinks,
|
|
194
|
+
});
|
|
195
|
+
const dedupeKey = cleanText(String(normalized.asin ?? ''))
|
|
196
|
+
|| cleanText(String(normalized.product_url ?? ''));
|
|
197
|
+
if (dedupeKey && seenEntityKeys.has(dedupeKey))
|
|
198
|
+
continue;
|
|
199
|
+
if (dedupeKey)
|
|
200
|
+
seenEntityKeys.add(dedupeKey);
|
|
201
|
+
results.push(normalized);
|
|
202
|
+
if (results.length >= limit)
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
const pageLinks = uniqueNonEmpty(payload.page_links ?? []);
|
|
206
|
+
for (const href of pageLinks) {
|
|
207
|
+
const absolute = toAbsoluteAmazonUrl(href);
|
|
208
|
+
if (!absolute || !isRankingPaginationUrl(definition.listType, absolute))
|
|
209
|
+
continue;
|
|
210
|
+
if (!visited.has(absolute) && !queue.includes(absolute)) {
|
|
211
|
+
queue.push(absolute);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
if (results.length === 0) {
|
|
216
|
+
throw new CommandExecutionError(`amazon ${definition.commandName} did not expose any ranked items`, createEmptyResultHint(definition.commandName));
|
|
217
|
+
}
|
|
218
|
+
return results.slice(0, limit);
|
|
219
|
+
},
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
export const __test__ = {
|
|
223
|
+
parseRank,
|
|
224
|
+
normalizeVisibleCategoryLinks,
|
|
225
|
+
normalizeRankingCandidate,
|
|
226
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './rankings.js';
|
|
3
|
+
describe('amazon rankings helpers', () => {
|
|
4
|
+
it('normalizes ranking candidates with unified schema', () => {
|
|
5
|
+
const result = __test__.normalizeRankingCandidate({
|
|
6
|
+
rank_text: '#3',
|
|
7
|
+
asin: 'B0DR31GC3D',
|
|
8
|
+
title: 'Desk Shelves Desktop Organizer',
|
|
9
|
+
href: 'https://www.amazon.com/dp/B0DR31GC3D/ref=zg_bs',
|
|
10
|
+
price_text: '$25.92',
|
|
11
|
+
rating_text: '4.3 out of 5 stars',
|
|
12
|
+
review_count_text: '435',
|
|
13
|
+
}, {
|
|
14
|
+
listType: 'new_releases',
|
|
15
|
+
rankFallback: 3,
|
|
16
|
+
listTitle: 'Amazon New Releases',
|
|
17
|
+
sourceUrl: 'https://www.amazon.com/gp/new-releases',
|
|
18
|
+
categoryTitle: 'Home & Kitchen',
|
|
19
|
+
categoryUrl: 'https://www.amazon.com/gp/new-releases/home-garden',
|
|
20
|
+
categoryPath: ['Home & Kitchen'],
|
|
21
|
+
visibleCategoryLinks: [{ title: 'Storage', url: 'https://www.amazon.com/gp/new-releases/storage', node_id: null }],
|
|
22
|
+
});
|
|
23
|
+
expect(result.list_type).toBe('new_releases');
|
|
24
|
+
expect(result.rank).toBe(3);
|
|
25
|
+
expect(result.asin).toBe('B0DR31GC3D');
|
|
26
|
+
expect(result.product_url).toBe('https://www.amazon.com/dp/B0DR31GC3D');
|
|
27
|
+
expect(result.category_title).toBe('Home & Kitchen');
|
|
28
|
+
expect(result.visible_category_links).toEqual([
|
|
29
|
+
{ title: 'Storage', url: 'https://www.amazon.com/gp/new-releases/storage', node_id: null },
|
|
30
|
+
]);
|
|
31
|
+
});
|
|
32
|
+
it('deduplicates category links and parses rank fallback', () => {
|
|
33
|
+
const links = __test__.normalizeVisibleCategoryLinks([
|
|
34
|
+
{ title: 'Kitchen', url: '/gp/new-releases/home-garden' },
|
|
35
|
+
{ title: 'Kitchen', url: 'https://www.amazon.com/gp/new-releases/home-garden' },
|
|
36
|
+
{ title: 'Storage', url: '/gp/new-releases/storage', node_id: '1064954' },
|
|
37
|
+
]);
|
|
38
|
+
expect(links.length).toBe(2);
|
|
39
|
+
expect(__test__.parseRank('N/A', 8)).toBe(8);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
@@ -3,11 +3,14 @@ export declare const SITE = "amazon";
|
|
|
3
3
|
export declare const DOMAIN = "amazon.com";
|
|
4
4
|
export declare const HOME_URL = "https://www.amazon.com/";
|
|
5
5
|
export declare const BESTSELLERS_URL = "https://www.amazon.com/Best-Sellers/zgbs";
|
|
6
|
+
export declare const NEW_RELEASES_URL = "https://www.amazon.com/gp/new-releases";
|
|
7
|
+
export declare const MOVERS_SHAKERS_URL = "https://www.amazon.com/gp/movers-and-shakers";
|
|
6
8
|
export declare const SEARCH_URL_PREFIX = "https://www.amazon.com/s?k=";
|
|
7
9
|
export declare const PRODUCT_URL_PREFIX = "https://www.amazon.com/dp/";
|
|
8
10
|
export declare const DISCUSSION_URL_PREFIX = "https://www.amazon.com/product-reviews/";
|
|
9
11
|
export declare const STRATEGY = "cookie";
|
|
10
12
|
export declare const PRIMARY_PRICE_SELECTORS: string[];
|
|
13
|
+
export type AmazonRankingListType = 'bestsellers' | 'new_releases' | 'movers_shakers';
|
|
11
14
|
export interface ProvenanceFields {
|
|
12
15
|
source_url: string;
|
|
13
16
|
fetched_at: string;
|
|
@@ -31,6 +34,10 @@ export declare function buildSearchUrl(query: string): string;
|
|
|
31
34
|
export declare function extractAsin(input: string): string | null;
|
|
32
35
|
export declare function buildProductUrl(input: string): string;
|
|
33
36
|
export declare function buildDiscussionUrl(input: string): string;
|
|
37
|
+
export declare function isSupportedRankingPath(listType: AmazonRankingListType, inputUrl: string): boolean;
|
|
38
|
+
export declare function resolveRankingUrl(listType: AmazonRankingListType, input?: string): string;
|
|
39
|
+
export declare function isRankingPaginationUrl(listType: AmazonRankingListType, inputUrl: string): boolean;
|
|
40
|
+
export declare function extractCategoryNodeId(inputUrl: string | null | undefined): string | null;
|
|
34
41
|
export declare function resolveBestsellersUrl(input?: string): string;
|
|
35
42
|
export declare function canonicalizeAmazonUrl(input: string): string;
|
|
36
43
|
export declare function toAbsoluteAmazonUrl(value: string | null | undefined): string | null;
|
|
@@ -53,6 +60,10 @@ export declare const __test__: {
|
|
|
53
60
|
buildProductUrl: typeof buildProductUrl;
|
|
54
61
|
buildDiscussionUrl: typeof buildDiscussionUrl;
|
|
55
62
|
resolveBestsellersUrl: typeof resolveBestsellersUrl;
|
|
63
|
+
resolveRankingUrl: typeof resolveRankingUrl;
|
|
64
|
+
isSupportedRankingPath: typeof isSupportedRankingPath;
|
|
65
|
+
isRankingPaginationUrl: typeof isRankingPaginationUrl;
|
|
66
|
+
extractCategoryNodeId: typeof extractCategoryNodeId;
|
|
56
67
|
parsePriceText: typeof parsePriceText;
|
|
57
68
|
parseRatingValue: typeof parseRatingValue;
|
|
58
69
|
parseReviewCount: typeof parseReviewCount;
|
|
@@ -3,6 +3,8 @@ export const SITE = 'amazon';
|
|
|
3
3
|
export const DOMAIN = 'amazon.com';
|
|
4
4
|
export const HOME_URL = 'https://www.amazon.com/';
|
|
5
5
|
export const BESTSELLERS_URL = 'https://www.amazon.com/Best-Sellers/zgbs';
|
|
6
|
+
export const NEW_RELEASES_URL = 'https://www.amazon.com/gp/new-releases';
|
|
7
|
+
export const MOVERS_SHAKERS_URL = 'https://www.amazon.com/gp/movers-and-shakers';
|
|
6
8
|
export const SEARCH_URL_PREFIX = 'https://www.amazon.com/s?k=';
|
|
7
9
|
export const PRODUCT_URL_PREFIX = 'https://www.amazon.com/dp/';
|
|
8
10
|
export const DISCUSSION_URL_PREFIX = 'https://www.amazon.com/product-reviews/';
|
|
@@ -24,6 +26,29 @@ const ROBOT_TEXT_PATTERNS = [
|
|
|
24
26
|
'Type the characters you see in this image',
|
|
25
27
|
'To discuss automated access to Amazon data please contact',
|
|
26
28
|
];
|
|
29
|
+
const AMAZON_RANKING_SPECS = {
|
|
30
|
+
bestsellers: {
|
|
31
|
+
commandName: 'bestsellers',
|
|
32
|
+
rootUrl: BESTSELLERS_URL,
|
|
33
|
+
pathPattern: /(?:^|\/)zgbs(?:\/|$)/i,
|
|
34
|
+
invalidInputMessage: 'amazon bestsellers expects a best sellers URL or /zgbs path',
|
|
35
|
+
invalidInputHint: 'Example: opencli amazon bestsellers https://www.amazon.com/Best-Sellers/zgbs',
|
|
36
|
+
},
|
|
37
|
+
new_releases: {
|
|
38
|
+
commandName: 'new-releases',
|
|
39
|
+
rootUrl: NEW_RELEASES_URL,
|
|
40
|
+
pathPattern: /\/gp\/new-releases(?:\/|$)/i,
|
|
41
|
+
invalidInputMessage: 'amazon new-releases expects a new releases URL or /gp/new-releases path',
|
|
42
|
+
invalidInputHint: 'Example: opencli amazon new-releases https://www.amazon.com/gp/new-releases',
|
|
43
|
+
},
|
|
44
|
+
movers_shakers: {
|
|
45
|
+
commandName: 'movers-shakers',
|
|
46
|
+
rootUrl: MOVERS_SHAKERS_URL,
|
|
47
|
+
pathPattern: /\/gp\/movers-and-shakers(?:\/|$)/i,
|
|
48
|
+
invalidInputMessage: 'amazon movers-shakers expects a movers-and-shakers URL or /gp/movers-and-shakers path',
|
|
49
|
+
invalidInputHint: 'Example: opencli amazon movers-shakers https://www.amazon.com/gp/movers-and-shakers',
|
|
50
|
+
},
|
|
51
|
+
};
|
|
27
52
|
export function cleanText(value) {
|
|
28
53
|
return typeof value === 'string'
|
|
29
54
|
? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim()
|
|
@@ -80,22 +105,103 @@ export function buildDiscussionUrl(input) {
|
|
|
80
105
|
}
|
|
81
106
|
return `${DISCUSSION_URL_PREFIX}${asin}`;
|
|
82
107
|
}
|
|
83
|
-
|
|
108
|
+
function getRankingSpec(listType) {
|
|
109
|
+
return AMAZON_RANKING_SPECS[listType];
|
|
110
|
+
}
|
|
111
|
+
export function isSupportedRankingPath(listType, inputUrl) {
|
|
112
|
+
try {
|
|
113
|
+
const url = new URL(inputUrl);
|
|
114
|
+
return getRankingSpec(listType).pathPattern.test(url.pathname);
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
export function resolveRankingUrl(listType, input) {
|
|
121
|
+
const spec = getRankingSpec(listType);
|
|
84
122
|
const normalized = cleanText(input);
|
|
85
|
-
if (!normalized)
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
return BESTSELLERS_URL;
|
|
123
|
+
if (!normalized || normalized === 'root')
|
|
124
|
+
return spec.rootUrl;
|
|
125
|
+
let candidateUrl;
|
|
89
126
|
if (normalized.startsWith('/')) {
|
|
90
|
-
|
|
127
|
+
candidateUrl = new URL(normalized, HOME_URL).toString();
|
|
128
|
+
}
|
|
129
|
+
else if (/^https?:\/\//i.test(normalized)) {
|
|
130
|
+
candidateUrl = canonicalizeAmazonUrl(normalized);
|
|
131
|
+
}
|
|
132
|
+
else if (normalized.includes('amazon.') && normalized.includes('/')) {
|
|
133
|
+
candidateUrl = canonicalizeAmazonUrl(`https://${normalized.replace(/^\/+/, '')}`);
|
|
91
134
|
}
|
|
92
|
-
|
|
93
|
-
|
|
135
|
+
else {
|
|
136
|
+
throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint);
|
|
94
137
|
}
|
|
95
|
-
if (
|
|
96
|
-
|
|
138
|
+
if (!isSupportedRankingPath(listType, candidateUrl)) {
|
|
139
|
+
throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint);
|
|
97
140
|
}
|
|
98
|
-
|
|
141
|
+
return normalizeRankingInputUrl(candidateUrl);
|
|
142
|
+
}
|
|
143
|
+
function normalizeRankingInputUrl(inputUrl) {
|
|
144
|
+
try {
|
|
145
|
+
const url = new URL(inputUrl);
|
|
146
|
+
const normalizedPathSegments = url.pathname
|
|
147
|
+
.split('/')
|
|
148
|
+
.filter(Boolean)
|
|
149
|
+
.filter((segment) => !/^ref=/i.test(segment));
|
|
150
|
+
url.pathname = `/${normalizedPathSegments.join('/')}`;
|
|
151
|
+
url.hash = '';
|
|
152
|
+
// Ranking pages are frequently shared with tracking refs that can land on unstable variants.
|
|
153
|
+
// Dropping ref keeps the canonical ranking path while preserving useful params (for example pg=2).
|
|
154
|
+
url.searchParams.delete('ref');
|
|
155
|
+
return url.toString();
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
return inputUrl;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
export function isRankingPaginationUrl(listType, inputUrl) {
|
|
162
|
+
const absolute = toAbsoluteAmazonUrl(inputUrl);
|
|
163
|
+
if (!absolute || !isSupportedRankingPath(listType, absolute))
|
|
164
|
+
return false;
|
|
165
|
+
try {
|
|
166
|
+
const url = new URL(absolute);
|
|
167
|
+
const ref = cleanText(url.searchParams.get('ref')).toLowerCase();
|
|
168
|
+
// pg= query param is the most reliable pagination indicator across all ranking lists
|
|
169
|
+
return url.searchParams.has('pg')
|
|
170
|
+
|| /(?:^|_)pg(?:_|$)/.test(ref)
|
|
171
|
+
// Amazon ranking pagination refs: zg_bs_pg_ (bestsellers), zg_bsnr_pg_ (new releases), zg_bsms_pg_ (movers & shakers)
|
|
172
|
+
|| /zg_bs(?:nr|ms)?_pg_/.test(ref);
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
export function extractCategoryNodeId(inputUrl) {
|
|
179
|
+
const absolute = toAbsoluteAmazonUrl(inputUrl);
|
|
180
|
+
if (!absolute)
|
|
181
|
+
return null;
|
|
182
|
+
try {
|
|
183
|
+
const url = new URL(absolute);
|
|
184
|
+
for (const key of ['node', 'nodeid', 'nodeId', 'browseNode']) {
|
|
185
|
+
const value = cleanText(url.searchParams.get(key));
|
|
186
|
+
if (/^\d{4,}$/.test(value))
|
|
187
|
+
return value;
|
|
188
|
+
}
|
|
189
|
+
const rhValue = cleanText(url.searchParams.get('rh'));
|
|
190
|
+
const rhMatch = decodeURIComponent(rhValue).match(/(?:^|,)\s*n:(\d{4,})(?:,|$)/i);
|
|
191
|
+
if (rhMatch)
|
|
192
|
+
return rhMatch[1];
|
|
193
|
+
const pathMatches = [...url.pathname.matchAll(/\/(\d{4,})(?=\/|$)/g)];
|
|
194
|
+
if (pathMatches.length > 0) {
|
|
195
|
+
return pathMatches[pathMatches.length - 1][1];
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
return null;
|
|
202
|
+
}
|
|
203
|
+
export function resolveBestsellersUrl(input) {
|
|
204
|
+
return resolveRankingUrl('bestsellers', input);
|
|
99
205
|
}
|
|
100
206
|
export function canonicalizeAmazonUrl(input) {
|
|
101
207
|
try {
|
|
@@ -244,6 +350,10 @@ export const __test__ = {
|
|
|
244
350
|
buildProductUrl,
|
|
245
351
|
buildDiscussionUrl,
|
|
246
352
|
resolveBestsellersUrl,
|
|
353
|
+
resolveRankingUrl,
|
|
354
|
+
isSupportedRankingPath,
|
|
355
|
+
isRankingPaginationUrl,
|
|
356
|
+
extractCategoryNodeId,
|
|
247
357
|
parsePriceText,
|
|
248
358
|
parseRatingValue,
|
|
249
359
|
parseReviewCount,
|
|
@@ -30,4 +30,15 @@ describe('amazon shared helpers', () => {
|
|
|
30
30
|
expect(__test__.resolveBestsellersUrl('/Best-Sellers/zgbs')).toBe('https://www.amazon.com/Best-Sellers/zgbs');
|
|
31
31
|
expect(() => __test__.resolveBestsellersUrl('desk shelf organizer')).toThrow('amazon bestsellers expects a best sellers URL or /zgbs path');
|
|
32
32
|
});
|
|
33
|
+
it('resolves and validates all ranking list URLs', () => {
|
|
34
|
+
expect(__test__.resolveRankingUrl('new_releases')).toBe('https://www.amazon.com/gp/new-releases');
|
|
35
|
+
expect(__test__.resolveRankingUrl('movers_shakers')).toBe('https://www.amazon.com/gp/movers-and-shakers');
|
|
36
|
+
expect(__test__.resolveRankingUrl('new_releases', '/gp/new-releases/kitchen')).toBe('https://www.amazon.com/gp/new-releases/kitchen');
|
|
37
|
+
expect(__test__.resolveRankingUrl('bestsellers', 'https://www.amazon.com/Best-Sellers/zgbs/ref=zg_bsnr_tab_bs')).toBe('https://www.amazon.com/Best-Sellers/zgbs');
|
|
38
|
+
expect(() => __test__.resolveRankingUrl('movers_shakers', 'https://example.com/gp/movers-and-shakers')).toThrow('Invalid Amazon URL');
|
|
39
|
+
});
|
|
40
|
+
it('extracts category node id from URL best effort', () => {
|
|
41
|
+
expect(__test__.extractCategoryNodeId('https://www.amazon.com/Best-Sellers-Home-Kitchen/zgbs/home-garden/3744371')).toBe('3744371');
|
|
42
|
+
expect(__test__.extractCategoryNodeId('https://www.amazon.com/s?k=desk+organizer&rh=n%3A1064954')).toBe('1064954');
|
|
43
|
+
});
|
|
33
44
|
});
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Uses the /x/v2/reply/main endpoint which is stable and doesn't depend on DOM structure.
|
|
4
4
|
*/
|
|
5
5
|
import { cli, Strategy } from '../../registry.js';
|
|
6
|
-
import { apiGet } from './utils.js';
|
|
6
|
+
import { apiGet, resolveBvid } from './utils.js';
|
|
7
7
|
cli({
|
|
8
8
|
site: 'bilibili',
|
|
9
9
|
name: 'comments',
|
|
@@ -16,7 +16,7 @@ cli({
|
|
|
16
16
|
],
|
|
17
17
|
columns: ['rank', 'author', 'text', 'likes', 'replies', 'time'],
|
|
18
18
|
func: async (page, kwargs) => {
|
|
19
|
-
const bvid =
|
|
19
|
+
const bvid = await resolveBvid(kwargs.bvid);
|
|
20
20
|
const limit = Math.min(Number(kwargs.limit) || 20, 50);
|
|
21
21
|
// Resolve bvid → aid (required by reply API)
|
|
22
22
|
const view = await apiGet(page, '/x/web-interface/view', { params: { bvid } });
|
|
@@ -2,7 +2,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
|
2
2
|
const { mockApiGet } = vi.hoisted(() => ({
|
|
3
3
|
mockApiGet: vi.fn(),
|
|
4
4
|
}));
|
|
5
|
-
vi.mock('./utils.js', () => ({
|
|
5
|
+
vi.mock('./utils.js', async (importOriginal) => ({
|
|
6
|
+
...(await importOriginal()),
|
|
6
7
|
apiGet: mockApiGet,
|
|
7
8
|
}));
|
|
8
9
|
import { getRegistry } from '../../registry.js';
|
|
@@ -47,7 +48,7 @@ describe('bilibili comments', () => {
|
|
|
47
48
|
});
|
|
48
49
|
it('throws when aid cannot be resolved', async () => {
|
|
49
50
|
mockApiGet.mockResolvedValueOnce({ data: {} }); // no aid
|
|
50
|
-
await expect(command.func({}, { bvid: '
|
|
51
|
+
await expect(command.func({}, { bvid: 'BVinvalid123', limit: 5 })).rejects.toThrow('Cannot resolve aid for bvid: BVinvalid123');
|
|
51
52
|
});
|
|
52
53
|
it('returns empty array when replies is missing', async () => {
|
|
53
54
|
mockApiGet
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import { cli, Strategy } from '../../registry.js';
|
|
11
11
|
import { checkYtdlp, sanitizeFilename } from '../../download/index.js';
|
|
12
12
|
import { downloadMedia } from '../../download/media-download.js';
|
|
13
|
+
import { resolveBvid } from './utils.js';
|
|
13
14
|
cli({
|
|
14
15
|
site: 'bilibili',
|
|
15
16
|
name: 'download',
|
|
@@ -23,7 +24,7 @@ cli({
|
|
|
23
24
|
],
|
|
24
25
|
columns: ['bvid', 'title', 'status', 'size'],
|
|
25
26
|
func: async (page, kwargs) => {
|
|
26
|
-
const bvid = kwargs.bvid;
|
|
27
|
+
const bvid = await resolveBvid(kwargs.bvid);
|
|
27
28
|
const output = kwargs.output;
|
|
28
29
|
const quality = kwargs.quality;
|
|
29
30
|
// Check yt-dlp availability
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cli, Strategy } from '../../registry.js';
|
|
2
2
|
import { AuthRequiredError, CommandExecutionError, EmptyResultError, SelectorError } from '../../errors.js';
|
|
3
|
-
import { apiGet } from './utils.js';
|
|
3
|
+
import { apiGet, resolveBvid } from './utils.js';
|
|
4
4
|
cli({
|
|
5
5
|
site: 'bilibili',
|
|
6
6
|
name: 'subtitle',
|
|
@@ -14,8 +14,9 @@ cli({
|
|
|
14
14
|
func: async (page, kwargs) => {
|
|
15
15
|
if (!page)
|
|
16
16
|
throw new CommandExecutionError('Browser session required for bilibili subtitle');
|
|
17
|
+
const bvid = await resolveBvid(kwargs.bvid);
|
|
17
18
|
// 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
|
|
18
|
-
await page.goto(`https://www.bilibili.com/video/${
|
|
19
|
+
await page.goto(`https://www.bilibili.com/video/${bvid}/`);
|
|
19
20
|
// 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
|
|
20
21
|
const cid = await page.evaluate(`(async () => {
|
|
21
22
|
const state = window.__INITIAL_STATE__ || {};
|
|
@@ -27,7 +28,7 @@ cli({
|
|
|
27
28
|
// 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
|
|
28
29
|
// 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
|
|
29
30
|
const payload = await apiGet(page, '/x/player/wbi/v2', {
|
|
30
|
-
params: { bvid
|
|
31
|
+
params: { bvid, cid },
|
|
31
32
|
signed: true, // 开启 wbi_sign 自动签名
|
|
32
33
|
});
|
|
33
34
|
if (payload.code !== 0) {
|
|
@@ -3,7 +3,8 @@ import { AuthRequiredError, EmptyResultError } from '../../errors.js';
|
|
|
3
3
|
const { mockApiGet } = vi.hoisted(() => ({
|
|
4
4
|
mockApiGet: vi.fn(),
|
|
5
5
|
}));
|
|
6
|
-
vi.mock('./utils.js', () => ({
|
|
6
|
+
vi.mock('./utils.js', async (importOriginal) => ({
|
|
7
|
+
...(await importOriginal()),
|
|
7
8
|
apiGet: mockApiGet,
|
|
8
9
|
}));
|
|
9
10
|
import { getRegistry } from '../../registry.js';
|
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
* Bilibili shared helpers: WBI signing, authenticated fetch, nav data, UID resolution.
|
|
3
3
|
*/
|
|
4
4
|
import type { IPage } from '../../types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Resolve Bilibili short URL / short code to BV ID.
|
|
7
|
+
* Supports: BV1MV9NBtENN, XYzsqGa, b23.tv/XYzsqGa, https://b23.tv/XYzsqGa
|
|
8
|
+
*/
|
|
9
|
+
export declare function resolveBvid(input: unknown): Promise<string>;
|
|
5
10
|
export declare function stripHtml(s: string): string;
|
|
6
11
|
export declare function payloadData(payload: any): any;
|
|
7
12
|
export declare function wbiSign(page: IPage, params: Record<string, any>): Promise<Record<string, string>>;
|
|
@@ -1,7 +1,37 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Bilibili shared helpers: WBI signing, authenticated fetch, nav data, UID resolution.
|
|
3
3
|
*/
|
|
4
|
+
import https from 'node:https';
|
|
4
5
|
import { AuthRequiredError, EmptyResultError } from '../../errors.js';
|
|
6
|
+
/**
|
|
7
|
+
* Resolve Bilibili short URL / short code to BV ID.
|
|
8
|
+
* Supports: BV1MV9NBtENN, XYzsqGa, b23.tv/XYzsqGa, https://b23.tv/XYzsqGa
|
|
9
|
+
*/
|
|
10
|
+
export function resolveBvid(input) {
|
|
11
|
+
const trimmed = String(input).trim();
|
|
12
|
+
if (/^BV[A-Za-z0-9]+$/i.test(trimmed)) {
|
|
13
|
+
return Promise.resolve(trimmed);
|
|
14
|
+
}
|
|
15
|
+
const shortCode = trimmed.replace(/^https?:\/\//, '').replace(/^(www\.)?b23\.tv\//, '');
|
|
16
|
+
const url = 'https://b23.tv/' + shortCode;
|
|
17
|
+
return new Promise((resolve, reject) => {
|
|
18
|
+
const req = https.get(url, (res) => {
|
|
19
|
+
const location = res.headers.location;
|
|
20
|
+
if (location) {
|
|
21
|
+
const match = location.match(/\/video\/(BV[A-Za-z0-9]+)/);
|
|
22
|
+
if (match) {
|
|
23
|
+
res.resume();
|
|
24
|
+
resolve(match[1]);
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
res.resume();
|
|
29
|
+
reject(new Error(`Cannot resolve BV ID from short URL: ${trimmed}`));
|
|
30
|
+
});
|
|
31
|
+
req.on('error', reject);
|
|
32
|
+
req.setTimeout(5000, () => { req.destroy(); reject(new Error(`Timeout resolving short URL: ${trimmed}`)); });
|
|
33
|
+
});
|
|
34
|
+
}
|
|
5
35
|
const MIXIN_KEY_ENC_TAB = [
|
|
6
36
|
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
|
7
37
|
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { resolveBvid } from './utils.js';
|
|
3
|
+
describe('resolveBvid', () => {
|
|
4
|
+
it('passes through a valid BV ID', async () => {
|
|
5
|
+
expect(await resolveBvid('BV1MV9NBtENN')).toBe('BV1MV9NBtENN');
|
|
6
|
+
});
|
|
7
|
+
it('passes through BV ID with surrounding whitespace', async () => {
|
|
8
|
+
expect(await resolveBvid(' BV1MV9NBtENN ')).toBe('BV1MV9NBtENN');
|
|
9
|
+
});
|
|
10
|
+
it('handles non-string input via String() coercion', async () => {
|
|
11
|
+
expect(await resolveBvid('BV123abc')).toBe('BV123abc');
|
|
12
|
+
});
|
|
13
|
+
it('rejects invalid input that cannot be resolved', async () => {
|
|
14
|
+
// A random string that b23.tv won't resolve — should timeout or fail
|
|
15
|
+
await expect(resolveBvid('not-a-valid-code-99999')).rejects.toThrow();
|
|
16
|
+
});
|
|
17
|
+
});
|