@jackwener/opencli 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/build-extension.yml +2 -6
- package/.github/workflows/ci.yml +21 -1
- package/README.md +35 -6
- package/README.zh-CN.md +12 -5
- package/SKILL.md +2 -0
- package/dist/browser/cdp.d.ts +2 -1
- package/dist/browser/discover.d.ts +4 -1
- package/dist/browser/discover.js +6 -2
- package/dist/browser/errors.d.ts +2 -2
- package/dist/browser/errors.js +4 -12
- package/dist/browser/mcp.d.ts +2 -1
- package/dist/build-manifest.d.ts +2 -0
- package/dist/build-manifest.js +39 -14
- package/dist/build-manifest.test.js +21 -0
- package/dist/capabilityRouting.d.ts +2 -0
- package/dist/capabilityRouting.js +2 -1
- package/dist/cli-manifest.json +1111 -112
- package/dist/cli.js +34 -3
- package/dist/clis/36kr/article.d.ts +1 -0
- package/dist/clis/36kr/article.js +62 -0
- package/dist/clis/36kr/hot.d.ts +3 -0
- package/dist/clis/36kr/hot.js +80 -0
- package/dist/clis/36kr/hot.test.d.ts +1 -0
- package/dist/clis/36kr/hot.test.js +15 -0
- package/dist/clis/36kr/news.d.ts +1 -0
- package/dist/clis/36kr/news.js +51 -0
- package/dist/clis/36kr/news.test.d.ts +1 -0
- package/dist/clis/36kr/news.test.js +85 -0
- package/dist/clis/36kr/search.d.ts +1 -0
- package/dist/clis/36kr/search.js +72 -0
- package/dist/clis/bilibili/comments.d.ts +5 -0
- package/dist/clis/bilibili/comments.js +40 -0
- package/dist/clis/bilibili/comments.test.d.ts +1 -0
- package/dist/clis/bilibili/comments.test.js +82 -0
- package/dist/clis/chatgpt/ask.js +29 -14
- package/dist/clis/chatgpt/ax.d.ts +6 -0
- package/dist/clis/chatgpt/ax.js +172 -1
- package/dist/clis/chatgpt/model.d.ts +1 -0
- package/dist/clis/chatgpt/model.js +24 -0
- package/dist/clis/chatgpt/send.js +12 -3
- package/dist/clis/douban/download.d.ts +1 -0
- package/dist/clis/douban/download.js +67 -0
- package/dist/clis/douban/download.test.d.ts +1 -0
- package/dist/clis/douban/download.test.js +170 -0
- package/dist/clis/douban/photos.d.ts +1 -0
- package/dist/clis/douban/photos.js +34 -0
- package/dist/clis/douban/utils.d.ts +25 -0
- package/dist/clis/douban/utils.js +190 -1
- package/dist/clis/douban/utils.test.d.ts +1 -0
- package/dist/clis/douban/utils.test.js +64 -0
- package/dist/clis/imdb/person.d.ts +1 -0
- package/dist/clis/imdb/person.js +203 -0
- package/dist/clis/imdb/reviews.d.ts +1 -0
- package/dist/clis/imdb/reviews.js +88 -0
- package/dist/clis/imdb/search.d.ts +1 -0
- package/dist/clis/imdb/search.js +161 -0
- package/dist/clis/imdb/title.d.ts +1 -0
- package/dist/clis/imdb/title.js +93 -0
- package/dist/clis/imdb/top.d.ts +1 -0
- package/dist/clis/imdb/top.js +53 -0
- package/dist/clis/imdb/trending.d.ts +1 -0
- package/dist/clis/imdb/trending.js +52 -0
- package/dist/clis/imdb/utils.d.ts +46 -0
- package/dist/clis/imdb/utils.js +285 -0
- package/dist/clis/imdb/utils.test.d.ts +1 -0
- package/dist/clis/imdb/utils.test.js +88 -0
- package/dist/clis/jd/item.d.ts +4 -0
- package/dist/clis/jd/item.js +16 -15
- package/dist/clis/jd/item.test.js +16 -1
- package/dist/clis/linux-do/categories.yaml +38 -9
- package/dist/clis/linux-do/category.d.ts +1 -0
- package/dist/clis/linux-do/category.js +36 -0
- package/dist/clis/linux-do/feed.d.ts +45 -0
- package/dist/clis/linux-do/feed.js +397 -0
- package/dist/clis/linux-do/feed.test.d.ts +1 -0
- package/dist/clis/linux-do/feed.test.js +118 -0
- package/dist/clis/linux-do/hot.d.ts +1 -0
- package/dist/clis/linux-do/hot.js +25 -0
- package/dist/clis/linux-do/latest.d.ts +1 -0
- package/dist/clis/linux-do/latest.js +18 -0
- package/dist/clis/linux-do/tags.yaml +41 -0
- package/dist/clis/linux-do/topic.yaml +41 -3
- package/dist/clis/linux-do/user-posts.yaml +67 -0
- package/dist/clis/linux-do/user-topics.yaml +54 -0
- package/dist/clis/paperreview/commands.test.d.ts +3 -0
- package/dist/clis/paperreview/commands.test.js +243 -0
- package/dist/clis/paperreview/feedback.d.ts +1 -0
- package/dist/clis/paperreview/feedback.js +52 -0
- package/dist/clis/paperreview/review.d.ts +1 -0
- package/dist/clis/paperreview/review.js +37 -0
- package/dist/clis/paperreview/submit.d.ts +1 -0
- package/dist/clis/paperreview/submit.js +85 -0
- package/dist/clis/paperreview/utils.d.ts +46 -0
- package/dist/clis/paperreview/utils.js +197 -0
- package/dist/clis/paperreview/utils.test.d.ts +1 -0
- package/dist/clis/paperreview/utils.test.js +49 -0
- package/dist/clis/producthunt/browse.d.ts +1 -0
- package/dist/clis/producthunt/browse.js +99 -0
- package/dist/clis/producthunt/hot.d.ts +1 -0
- package/dist/clis/producthunt/hot.js +110 -0
- package/dist/clis/producthunt/posts.d.ts +1 -0
- package/dist/clis/producthunt/posts.js +28 -0
- package/dist/clis/producthunt/today.d.ts +1 -0
- package/dist/clis/producthunt/today.js +35 -0
- package/dist/clis/producthunt/utils.d.ts +29 -0
- package/dist/clis/producthunt/utils.js +99 -0
- package/dist/clis/producthunt/utils.test.d.ts +1 -0
- package/dist/clis/producthunt/utils.test.js +64 -0
- package/dist/clis/twitter/article.js +4 -28
- package/dist/clis/twitter/likes.d.ts +24 -0
- package/dist/clis/twitter/likes.js +217 -0
- package/dist/clis/twitter/likes.test.d.ts +1 -0
- package/dist/clis/twitter/likes.test.js +85 -0
- package/dist/clis/twitter/profile.js +4 -28
- package/dist/clis/twitter/search.js +2 -1
- package/dist/clis/twitter/search.test.js +2 -0
- package/dist/clis/twitter/shared.d.ts +6 -0
- package/dist/clis/twitter/shared.js +35 -0
- package/dist/clis/twitter/timeline.js +2 -13
- package/dist/clis/weixin/download.d.ts +17 -0
- package/dist/clis/weixin/download.js +88 -20
- package/dist/clis/weread/book.js +2 -2
- package/dist/clis/weread/commands.test.d.ts +3 -0
- package/dist/clis/weread/commands.test.js +43 -0
- package/dist/clis/weread/highlights.js +2 -2
- package/dist/clis/weread/notebooks.js +2 -2
- package/dist/clis/weread/notes.js +3 -3
- package/dist/clis/weread/shelf.js +2 -2
- package/dist/clis/weread/utils.d.ts +4 -4
- package/dist/clis/weread/utils.js +32 -14
- package/dist/clis/weread/utils.test.js +1 -28
- package/dist/clis/xiaohongshu/comments.d.ts +5 -0
- package/dist/clis/xiaohongshu/comments.js +74 -0
- package/dist/clis/xiaohongshu/comments.test.d.ts +1 -0
- package/dist/clis/xiaohongshu/comments.test.js +79 -0
- package/dist/clis/xiaohongshu/publish.js +114 -18
- package/dist/clis/xiaohongshu/publish.test.d.ts +1 -0
- package/dist/clis/xiaohongshu/publish.test.js +119 -0
- package/dist/commanderAdapter.d.ts +1 -0
- package/dist/commanderAdapter.js +176 -29
- package/dist/commanderAdapter.test.d.ts +1 -0
- package/dist/commanderAdapter.test.js +62 -0
- package/dist/daemon.js +17 -1
- package/dist/discovery.js +8 -14
- package/dist/doctor.d.ts +1 -0
- package/dist/doctor.js +9 -2
- package/dist/download/index.js +63 -51
- package/dist/download/index.test.js +17 -4
- package/dist/errors.d.ts +3 -1
- package/dist/errors.js +15 -32
- package/dist/execution.d.ts +1 -3
- package/dist/execution.js +21 -1
- package/dist/hooks.js +2 -0
- package/dist/main.js +5 -0
- package/dist/output.js +5 -1
- package/dist/pipeline/executor.js +3 -4
- package/dist/plugin-manifest.d.ts +70 -0
- package/dist/plugin-manifest.js +160 -0
- package/dist/plugin-manifest.test.d.ts +4 -0
- package/dist/plugin-manifest.test.js +179 -0
- package/dist/plugin.d.ts +38 -5
- package/dist/plugin.js +267 -33
- package/dist/plugin.test.js +220 -3
- package/dist/registry.d.ts +4 -0
- package/dist/registry.js +2 -0
- package/dist/runtime-detect.d.ts +21 -0
- package/dist/runtime-detect.js +32 -0
- package/dist/runtime-detect.test.d.ts +1 -0
- package/dist/runtime-detect.test.js +27 -0
- package/dist/runtime.js +1 -1
- package/dist/serialization.d.ts +2 -0
- package/dist/serialization.js +6 -0
- package/dist/types.d.ts +1 -0
- package/dist/update-check.d.ts +22 -0
- package/dist/update-check.js +112 -0
- package/dist/weixin-download.test.d.ts +1 -0
- package/dist/weixin-download.test.js +30 -0
- package/dist/weread-private-api-regression.test.d.ts +1 -0
- package/dist/weread-private-api-regression.test.js +122 -0
- package/dist/yaml-schema.d.ts +3 -0
- package/dist/yaml-schema.js +18 -1
- package/docs/.vitepress/config.mts +4 -0
- package/docs/adapters/browser/36kr.md +47 -0
- package/docs/adapters/browser/douban.md +14 -0
- package/docs/adapters/browser/imdb.md +47 -0
- package/docs/adapters/browser/jd.md +2 -2
- package/docs/adapters/browser/linux-do.md +181 -20
- package/docs/adapters/browser/paperreview.md +43 -0
- package/docs/adapters/browser/producthunt.md +49 -0
- package/docs/adapters/desktop/chatgpt.md +5 -0
- package/docs/adapters/index.md +6 -2
- package/docs/advanced/download.md +4 -0
- package/docs/advanced/rate-limiter-plugin.md +99 -0
- package/docs/guide/electron-app-cli.md +200 -0
- package/docs/guide/getting-started.md +1 -0
- package/docs/guide/plugins.md +87 -0
- package/docs/zh/guide/electron-app-cli.md +188 -0
- package/docs/zh/guide/getting-started.md +1 -0
- package/docs/zh/guide/plugins.md +65 -0
- package/extension/package.json +1 -0
- package/extension/scripts/package-release.mjs +179 -0
- package/extension/src/background.ts +2 -0
- package/package.json +4 -1
- package/scripts/postinstall.js +10 -0
- package/src/browser/cdp.ts +2 -1
- package/src/browser/discover.ts +8 -3
- package/src/browser/errors.ts +13 -14
- package/src/browser/mcp.ts +2 -1
- package/src/build-manifest.test.ts +23 -0
- package/src/build-manifest.ts +40 -15
- package/src/capabilityRouting.ts +2 -1
- package/src/cli.ts +35 -3
- package/src/clis/36kr/article.ts +69 -0
- package/src/clis/36kr/hot.test.ts +19 -0
- package/src/clis/36kr/hot.ts +100 -0
- package/src/clis/36kr/news.test.ts +90 -0
- package/src/clis/36kr/news.ts +54 -0
- package/src/clis/36kr/search.ts +78 -0
- package/src/clis/bilibili/comments.test.ts +102 -0
- package/src/clis/bilibili/comments.ts +44 -0
- package/src/clis/chatgpt/ask.ts +28 -14
- package/src/clis/chatgpt/ax.ts +180 -1
- package/src/clis/chatgpt/model.ts +27 -0
- package/src/clis/chatgpt/send.ts +16 -6
- package/src/clis/douban/download.test.ts +196 -0
- package/src/clis/douban/download.ts +78 -0
- package/src/clis/douban/photos.ts +36 -0
- package/src/clis/douban/utils.test.ts +97 -0
- package/src/clis/douban/utils.ts +232 -1
- package/src/clis/imdb/person.ts +232 -0
- package/src/clis/imdb/reviews.ts +111 -0
- package/src/clis/imdb/search.ts +179 -0
- package/src/clis/imdb/title.ts +121 -0
- package/src/clis/imdb/top.ts +67 -0
- package/src/clis/imdb/trending.ts +66 -0
- package/src/clis/imdb/utils.test.ts +117 -0
- package/src/clis/imdb/utils.ts +305 -0
- package/src/clis/jd/item.test.ts +18 -1
- package/src/clis/jd/item.ts +18 -15
- package/src/clis/linux-do/categories.yaml +38 -9
- package/src/clis/linux-do/category.ts +37 -0
- package/src/clis/linux-do/feed.test.ts +132 -0
- package/src/clis/linux-do/feed.ts +501 -0
- package/src/clis/linux-do/hot.ts +26 -0
- package/src/clis/linux-do/latest.ts +19 -0
- package/src/clis/linux-do/tags.yaml +41 -0
- package/src/clis/linux-do/topic.yaml +41 -3
- package/src/clis/linux-do/user-posts.yaml +67 -0
- package/src/clis/linux-do/user-topics.yaml +54 -0
- package/src/clis/paperreview/commands.test.ts +283 -0
- package/src/clis/paperreview/feedback.ts +64 -0
- package/src/clis/paperreview/review.ts +47 -0
- package/src/clis/paperreview/submit.ts +119 -0
- package/src/clis/paperreview/utils.test.ts +68 -0
- package/src/clis/paperreview/utils.ts +276 -0
- package/src/clis/producthunt/browse.ts +109 -0
- package/src/clis/producthunt/hot.ts +127 -0
- package/src/clis/producthunt/posts.ts +29 -0
- package/src/clis/producthunt/today.ts +37 -0
- package/src/clis/producthunt/utils.test.ts +72 -0
- package/src/clis/producthunt/utils.ts +122 -0
- package/src/clis/twitter/article.ts +5 -28
- package/src/clis/twitter/likes.test.ts +91 -0
- package/src/clis/twitter/likes.ts +256 -0
- package/src/clis/twitter/profile.ts +5 -28
- package/src/clis/twitter/search.test.ts +2 -0
- package/src/clis/twitter/search.ts +3 -1
- package/src/clis/twitter/shared.ts +45 -0
- package/src/clis/twitter/timeline.ts +2 -13
- package/src/clis/weixin/download.ts +114 -20
- package/src/clis/weread/book.ts +2 -2
- package/src/clis/weread/commands.test.ts +57 -0
- package/src/clis/weread/highlights.ts +2 -2
- package/src/clis/weread/notebooks.ts +2 -2
- package/src/clis/weread/notes.ts +3 -3
- package/src/clis/weread/shelf.ts +2 -2
- package/src/clis/weread/utils.test.ts +1 -32
- package/src/clis/weread/utils.ts +41 -16
- package/src/clis/xiaohongshu/comments.test.ts +96 -0
- package/src/clis/xiaohongshu/comments.ts +81 -0
- package/src/clis/xiaohongshu/publish.test.ts +137 -0
- package/src/clis/xiaohongshu/publish.ts +129 -18
- package/src/commanderAdapter.test.ts +78 -0
- package/src/commanderAdapter.ts +188 -24
- package/src/daemon.ts +19 -1
- package/src/discovery.ts +8 -15
- package/src/doctor.ts +13 -2
- package/src/download/index.test.ts +14 -4
- package/src/download/index.ts +67 -55
- package/src/errors.ts +25 -66
- package/src/execution.ts +28 -3
- package/src/hooks.ts +1 -0
- package/src/main.ts +6 -0
- package/src/output.ts +3 -1
- package/src/pipeline/executor.ts +4 -6
- package/src/plugin-manifest.test.ts +223 -0
- package/src/plugin-manifest.ts +206 -0
- package/src/plugin.test.ts +246 -2
- package/src/plugin.ts +338 -36
- package/src/registry.ts +6 -1
- package/src/runtime-detect.test.ts +30 -0
- package/src/runtime-detect.ts +36 -0
- package/src/runtime.ts +1 -1
- package/src/serialization.ts +4 -0
- package/src/types.ts +1 -0
- package/src/update-check.ts +114 -0
- package/src/weixin-download.test.ts +64 -0
- package/src/weread-private-api-regression.test.ts +150 -0
- package/src/yaml-schema.ts +20 -0
- package/tests/e2e/browser-auth.test.ts +13 -9
- package/tests/e2e/browser-public-extended.test.ts +1 -1
- package/tests/e2e/browser-public.test.ts +62 -4
- package/tests/e2e/helpers.ts +2 -1
- package/tests/e2e/public-commands.test.ts +37 -3
- package/tests/smoke/api-health.test.ts +1 -1
- package/vitest.config.ts +10 -0
- package/dist/clis/linux-do/category.yaml +0 -51
- package/dist/clis/linux-do/hot.yaml +0 -50
- package/dist/clis/linux-do/latest.yaml +0 -40
- package/src/clis/linux-do/category.yaml +0 -51
- package/src/clis/linux-do/hot.yaml +0 -50
- package/src/clis/linux-do/latest.yaml +0 -40
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { CommandExecutionError } from '../../errors.js';
|
|
2
|
+
import { cli, Strategy } from '../../registry.js';
|
|
3
|
+
import { extractJsonLd, forceEnglishUrl, isChallengePage } from './utils.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Fetch the IMDb Top 250 Movies list from JSON-LD structured data on the chart page.
|
|
7
|
+
*/
|
|
8
|
+
cli({
|
|
9
|
+
site: 'imdb',
|
|
10
|
+
name: 'top',
|
|
11
|
+
description: 'IMDb Top 250 Movies',
|
|
12
|
+
domain: 'www.imdb.com',
|
|
13
|
+
strategy: Strategy.PUBLIC,
|
|
14
|
+
browser: true,
|
|
15
|
+
args: [
|
|
16
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
|
|
17
|
+
],
|
|
18
|
+
columns: ['rank', 'title', 'rating', 'votes', 'genre', 'url'],
|
|
19
|
+
func: async (page, args) => {
|
|
20
|
+
const url = forceEnglishUrl('https://www.imdb.com/chart/top/');
|
|
21
|
+
|
|
22
|
+
await page.goto(url);
|
|
23
|
+
await page.wait(2);
|
|
24
|
+
|
|
25
|
+
if (await isChallengePage(page)) {
|
|
26
|
+
throw new CommandExecutionError(
|
|
27
|
+
'IMDb blocked this request',
|
|
28
|
+
'Try again with a normal browser session or extension mode',
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Extract the ItemList JSON-LD block which contains all chart entries
|
|
33
|
+
const ld = await extractJsonLd(page, 'ItemList');
|
|
34
|
+
if (!ld || !Array.isArray(ld.itemListElement)) {
|
|
35
|
+
throw new CommandExecutionError(
|
|
36
|
+
'Could not find chart data on page',
|
|
37
|
+
'IMDb may have changed their page structure',
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const limit = Math.max(1, Math.min(Number(args.limit) || 20, 250));
|
|
42
|
+
const items = (ld.itemListElement as any[]).slice(0, limit);
|
|
43
|
+
|
|
44
|
+
return items.map((entry: any, index: number) => {
|
|
45
|
+
const item = entry.item || {};
|
|
46
|
+
const rating = item.aggregateRating || {};
|
|
47
|
+
const genre = Array.isArray(item.genre)
|
|
48
|
+
? item.genre.join(', ')
|
|
49
|
+
: String(item.genre || '');
|
|
50
|
+
|
|
51
|
+
// Normalize relative URLs to absolute IMDb URLs
|
|
52
|
+
let itemUrl: string = item.url || '';
|
|
53
|
+
if (itemUrl && !/^https?:\/\//.test(itemUrl)) {
|
|
54
|
+
itemUrl = 'https://www.imdb.com' + itemUrl;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
rank: entry.position || index + 1,
|
|
59
|
+
title: String(item.name || ''),
|
|
60
|
+
rating: rating.ratingValue != null ? String(rating.ratingValue) : '',
|
|
61
|
+
votes: rating.ratingCount != null ? String(rating.ratingCount) : '',
|
|
62
|
+
genre,
|
|
63
|
+
url: itemUrl,
|
|
64
|
+
};
|
|
65
|
+
});
|
|
66
|
+
},
|
|
67
|
+
});
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { CommandExecutionError } from '../../errors.js';
|
|
2
|
+
import { cli, Strategy } from '../../registry.js';
|
|
3
|
+
import { extractJsonLd, forceEnglishUrl, isChallengePage } from './utils.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Fetch the IMDb Most Popular Movies (MovieMeter) list from JSON-LD structured data.
|
|
7
|
+
*/
|
|
8
|
+
cli({
|
|
9
|
+
site: 'imdb',
|
|
10
|
+
name: 'trending',
|
|
11
|
+
description: 'IMDb Most Popular Movies',
|
|
12
|
+
domain: 'www.imdb.com',
|
|
13
|
+
strategy: Strategy.PUBLIC,
|
|
14
|
+
browser: true,
|
|
15
|
+
args: [
|
|
16
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
|
|
17
|
+
],
|
|
18
|
+
columns: ['rank', 'title', 'rating', 'genre', 'url'],
|
|
19
|
+
func: async (page, args) => {
|
|
20
|
+
const url = forceEnglishUrl('https://www.imdb.com/chart/moviemeter/');
|
|
21
|
+
|
|
22
|
+
await page.goto(url);
|
|
23
|
+
await page.wait(2);
|
|
24
|
+
|
|
25
|
+
if (await isChallengePage(page)) {
|
|
26
|
+
throw new CommandExecutionError(
|
|
27
|
+
'IMDb blocked this request',
|
|
28
|
+
'Try again with a normal browser session or extension mode',
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Extract the ItemList JSON-LD block which contains all chart entries
|
|
33
|
+
const ld = await extractJsonLd(page, 'ItemList');
|
|
34
|
+
if (!ld || !Array.isArray(ld.itemListElement)) {
|
|
35
|
+
throw new CommandExecutionError(
|
|
36
|
+
'Could not find chart data on page',
|
|
37
|
+
'IMDb may have changed their page structure',
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const limit = Math.max(1, Math.min(Number(args.limit) || 20, 100));
|
|
42
|
+
const items = (ld.itemListElement as any[]).slice(0, limit);
|
|
43
|
+
|
|
44
|
+
return items.map((entry: any, index: number) => {
|
|
45
|
+
const item = entry.item || {};
|
|
46
|
+
const rating = item.aggregateRating || {};
|
|
47
|
+
const genre = Array.isArray(item.genre)
|
|
48
|
+
? item.genre.join(', ')
|
|
49
|
+
: String(item.genre || '');
|
|
50
|
+
|
|
51
|
+
// Normalize relative URLs to absolute IMDb URLs
|
|
52
|
+
let itemUrl: string = item.url || '';
|
|
53
|
+
if (itemUrl && !/^https?:\/\//.test(itemUrl)) {
|
|
54
|
+
itemUrl = 'https://www.imdb.com' + itemUrl;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
rank: entry.position || index + 1,
|
|
59
|
+
title: String(item.name || ''),
|
|
60
|
+
rating: rating.ratingValue != null ? String(rating.ratingValue) : '',
|
|
61
|
+
genre,
|
|
62
|
+
url: itemUrl,
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
},
|
|
66
|
+
});
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import type { IPage } from '../../types.js';
|
|
3
|
+
import {
|
|
4
|
+
extractJsonLd,
|
|
5
|
+
forceEnglishUrl,
|
|
6
|
+
formatDuration,
|
|
7
|
+
getCurrentImdbId,
|
|
8
|
+
isChallengePage,
|
|
9
|
+
normalizeImdbTitleType,
|
|
10
|
+
normalizeImdbId,
|
|
11
|
+
waitForImdbPath,
|
|
12
|
+
waitForImdbReviewsReady,
|
|
13
|
+
waitForImdbSearchReady,
|
|
14
|
+
} from './utils.js';
|
|
15
|
+
|
|
16
|
+
describe('normalizeImdbId', () => {
|
|
17
|
+
it('passes through bare ids', () => {
|
|
18
|
+
expect(normalizeImdbId('tt1375666', 'tt')).toBe('tt1375666');
|
|
19
|
+
expect(normalizeImdbId('nm0634240', 'nm')).toBe('nm0634240');
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('extracts ids from supported urls', () => {
|
|
23
|
+
expect(normalizeImdbId('https://www.imdb.com/title/tt1375666/', 'tt')).toBe('tt1375666');
|
|
24
|
+
expect(normalizeImdbId('https://m.imdb.com/title/tt1375666/', 'tt')).toBe('tt1375666');
|
|
25
|
+
expect(normalizeImdbId('https://www.imdb.com/de/title/tt1375666/?ref_=nv_sr_srsg_0', 'tt')).toBe('tt1375666');
|
|
26
|
+
expect(normalizeImdbId('https://www.imdb.com/name/nm0634240/', 'nm')).toBe('nm0634240');
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('throws on invalid or mismatched ids', () => {
|
|
30
|
+
expect(() => normalizeImdbId('invalid', 'tt')).toThrow('Invalid IMDb ID');
|
|
31
|
+
expect(() => normalizeImdbId('tt1', 'tt')).toThrow('Invalid IMDb ID');
|
|
32
|
+
expect(() => normalizeImdbId('nm0634240', 'tt')).toThrow('Invalid IMDb ID');
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
describe('formatDuration', () => {
|
|
37
|
+
it('converts ISO-8601 durations to a short human format', () => {
|
|
38
|
+
expect(formatDuration('PT2H28M')).toBe('2h 28m');
|
|
39
|
+
expect(formatDuration('PT1H')).toBe('1h');
|
|
40
|
+
expect(formatDuration('PT45M')).toBe('45m');
|
|
41
|
+
expect(formatDuration('PT2H')).toBe('2h');
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('returns an empty string for invalid input', () => {
|
|
45
|
+
expect(formatDuration('')).toBe('');
|
|
46
|
+
expect(formatDuration('invalid')).toBe('');
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe('forceEnglishUrl', () => {
|
|
51
|
+
it('adds the English language parameter', () => {
|
|
52
|
+
expect(forceEnglishUrl('https://www.imdb.com/title/tt1375666/')).toContain('language=en-US');
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('preserves existing query parameters', () => {
|
|
56
|
+
const result = forceEnglishUrl('https://www.imdb.com/title/tt1375666/?ref_=nv');
|
|
57
|
+
expect(result).toContain('language=en-US');
|
|
58
|
+
expect(result).toContain('ref_=nv');
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe('normalizeImdbTitleType', () => {
|
|
63
|
+
it('maps internal imdb ids to readable labels', () => {
|
|
64
|
+
expect(normalizeImdbTitleType({ id: 'movie', text: '' })).toBe('Movie');
|
|
65
|
+
expect(normalizeImdbTitleType({ id: 'tvSeries', text: '' })).toBe('TV Series');
|
|
66
|
+
expect(normalizeImdbTitleType('short')).toBe('Short');
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('preserves explicit text labels', () => {
|
|
70
|
+
expect(normalizeImdbTitleType({ id: 'movie', text: 'Feature Film' })).toBe('Feature Film');
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('extractJsonLd', () => {
|
|
75
|
+
it('returns the evaluated JSON-LD payload', async () => {
|
|
76
|
+
const page = {
|
|
77
|
+
evaluate: vi.fn().mockResolvedValue({ '@type': 'Movie', name: 'Inception' }),
|
|
78
|
+
} as unknown as IPage;
|
|
79
|
+
|
|
80
|
+
await expect(extractJsonLd(page, 'Movie')).resolves.toEqual({ '@type': 'Movie', name: 'Inception' });
|
|
81
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
82
|
+
expect(page.evaluate).toHaveBeenCalledWith(expect.stringContaining('"Movie"'));
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
describe('isChallengePage', () => {
|
|
87
|
+
it('returns true when the page evaluation matches a challenge', async () => {
|
|
88
|
+
const page = {
|
|
89
|
+
evaluate: vi.fn().mockResolvedValue(true),
|
|
90
|
+
} as unknown as IPage;
|
|
91
|
+
|
|
92
|
+
await expect(isChallengePage(page)).resolves.toBe(true);
|
|
93
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe('imdb browser helpers', () => {
|
|
98
|
+
it('reads the current imdb id from page metadata', async () => {
|
|
99
|
+
const page = {
|
|
100
|
+
evaluate: vi.fn().mockResolvedValue('nm0634240'),
|
|
101
|
+
} as unknown as IPage;
|
|
102
|
+
|
|
103
|
+
await expect(getCurrentImdbId(page, 'nm')).resolves.toBe('nm0634240');
|
|
104
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('wait helpers resolve mocked readiness booleans', async () => {
|
|
108
|
+
const page = {
|
|
109
|
+
evaluate: vi.fn().mockResolvedValue(true),
|
|
110
|
+
} as unknown as IPage;
|
|
111
|
+
|
|
112
|
+
await expect(waitForImdbPath(page, '^/find/?$')).resolves.toBe(true);
|
|
113
|
+
await expect(waitForImdbSearchReady(page)).resolves.toBe(true);
|
|
114
|
+
await expect(waitForImdbReviewsReady(page)).resolves.toBe(true);
|
|
115
|
+
expect(page.evaluate).toHaveBeenCalledTimes(3);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import { ArgumentError } from '../../errors.js';
|
|
2
|
+
import type { IPage } from '../../types.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Normalize an IMDb title or person input to a bare ID.
|
|
6
|
+
* Accepts bare IDs, desktop URLs, mobile URLs, and URLs with language prefixes or query params.
|
|
7
|
+
*/
|
|
8
|
+
export function normalizeImdbId(input: string, prefix: 'tt' | 'nm'): string {
|
|
9
|
+
const trimmed = input.trim();
|
|
10
|
+
const barePattern = new RegExp(`^${prefix}\\d{7,8}$`);
|
|
11
|
+
if (barePattern.test(trimmed)) {
|
|
12
|
+
return trimmed;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const pathPattern = new RegExp(`/(?:[a-z]{2}/)?(?:title|name)/(${prefix}\\d{7,8})(?:[/?#]|$)`, 'i');
|
|
16
|
+
const pathMatch = trimmed.match(pathPattern);
|
|
17
|
+
if (pathMatch) {
|
|
18
|
+
return pathMatch[1];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
throw new ArgumentError(
|
|
22
|
+
`Invalid IMDb ID: "${input}"`,
|
|
23
|
+
`Expected ${prefix === 'tt' ? 'title' : 'name'} ID like ${prefix === 'tt' ? 'tt1375666' : 'nm0634240'} or an IMDb URL`,
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Convert an ISO 8601 duration string to a short human-readable format for table display.
|
|
29
|
+
* Example: PT2H28M -> 2h 28m.
|
|
30
|
+
*/
|
|
31
|
+
export function formatDuration(iso: string): string {
|
|
32
|
+
if (!iso) {
|
|
33
|
+
return '';
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const match = iso.match(/^PT(?:(\d+)H)?(?:(\d+)M)?$/);
|
|
37
|
+
if (!match) {
|
|
38
|
+
return '';
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const parts: string[] = [];
|
|
42
|
+
if (match[1]) {
|
|
43
|
+
parts.push(`${match[1]}h`);
|
|
44
|
+
}
|
|
45
|
+
if (match[2]) {
|
|
46
|
+
parts.push(`${match[2]}m`);
|
|
47
|
+
}
|
|
48
|
+
return parts.join(' ');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Force an IMDb page URL to use the English language parameter,
|
|
53
|
+
* reducing structural differences across localized pages.
|
|
54
|
+
*/
|
|
55
|
+
export function forceEnglishUrl(url: string): string {
|
|
56
|
+
const parsed = new URL(url);
|
|
57
|
+
parsed.searchParams.set('language', 'en-US');
|
|
58
|
+
return parsed.toString();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Normalize IMDb title-type payloads that may be represented as an object,
|
|
63
|
+
* a raw string, or an empty text field with only an internal id.
|
|
64
|
+
*/
|
|
65
|
+
export function normalizeImdbTitleType(input: unknown): string {
|
|
66
|
+
const raw = (() => {
|
|
67
|
+
if (typeof input === 'string') return input;
|
|
68
|
+
if (!input || typeof input !== 'object') return '';
|
|
69
|
+
const value = input as Record<string, unknown>;
|
|
70
|
+
return typeof value.text === 'string' && value.text.trim()
|
|
71
|
+
? value.text
|
|
72
|
+
: typeof value.id === 'string'
|
|
73
|
+
? value.id
|
|
74
|
+
: '';
|
|
75
|
+
})().trim();
|
|
76
|
+
|
|
77
|
+
if (!raw) return '';
|
|
78
|
+
|
|
79
|
+
const known: Record<string, string> = {
|
|
80
|
+
movie: 'Movie',
|
|
81
|
+
short: 'Short',
|
|
82
|
+
video: 'Video',
|
|
83
|
+
tvEpisode: 'TV Episode',
|
|
84
|
+
tvMiniSeries: 'TV Mini Series',
|
|
85
|
+
tvMovie: 'TV Movie',
|
|
86
|
+
tvSeries: 'TV Series',
|
|
87
|
+
tvShort: 'TV Short',
|
|
88
|
+
tvSpecial: 'TV Special',
|
|
89
|
+
videoGame: 'Video Game',
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
return known[raw] ?? raw;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extract structured JSON-LD data from the page.
|
|
97
|
+
* Accepts a single type string or an array of types to match against @type.
|
|
98
|
+
*/
|
|
99
|
+
export async function extractJsonLd(page: IPage, type?: string | string[]): Promise<Record<string, unknown> | null> {
|
|
100
|
+
const filterTypes = type ? (Array.isArray(type) ? type : [type]) : [];
|
|
101
|
+
return page.evaluate(`
|
|
102
|
+
(function() {
|
|
103
|
+
var scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
|
104
|
+
var wantedTypes = ${JSON.stringify(filterTypes)};
|
|
105
|
+
|
|
106
|
+
function matchesType(data) {
|
|
107
|
+
if (wantedTypes.length === 0) {
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
if (!data || typeof data !== 'object') {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
if (wantedTypes.indexOf(data['@type']) !== -1) {
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
if (Array.isArray(data['@type'])) {
|
|
117
|
+
for (var t = 0; t < data['@type'].length; t++) {
|
|
118
|
+
if (wantedTypes.indexOf(data['@type'][t]) !== -1) return true;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function findMatch(data) {
|
|
125
|
+
if (Array.isArray(data)) {
|
|
126
|
+
for (var i = 0; i < data.length; i++) {
|
|
127
|
+
var itemMatch = findMatch(data[i]);
|
|
128
|
+
if (itemMatch) {
|
|
129
|
+
return itemMatch;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (!data || typeof data !== 'object') {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (matchesType(data)) {
|
|
140
|
+
return data;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (Array.isArray(data['@graph'])) {
|
|
144
|
+
return findMatch(data['@graph']);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
for (var i = 0; i < scripts.length; i++) {
|
|
151
|
+
try {
|
|
152
|
+
var parsed = JSON.parse(scripts[i].textContent || 'null');
|
|
153
|
+
var match = findMatch(parsed);
|
|
154
|
+
if (match) {
|
|
155
|
+
return match;
|
|
156
|
+
}
|
|
157
|
+
} catch (error) {
|
|
158
|
+
void error;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return null;
|
|
163
|
+
})()
|
|
164
|
+
`);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Poll until the current IMDb page path matches the expected entity/search path.
|
|
169
|
+
*/
|
|
170
|
+
export async function waitForImdbPath(page: IPage, pathPattern: string, timeoutMs: number = 15000): Promise<boolean> {
|
|
171
|
+
const result = await page.evaluate(`
|
|
172
|
+
(async function() {
|
|
173
|
+
var deadline = Date.now() + ${timeoutMs};
|
|
174
|
+
var pattern = new RegExp(${JSON.stringify(pathPattern)}, 'i');
|
|
175
|
+
while (Date.now() < deadline) {
|
|
176
|
+
if (pattern.test(window.location.pathname)) {
|
|
177
|
+
return true;
|
|
178
|
+
}
|
|
179
|
+
await new Promise(function(resolve) { setTimeout(resolve, 250); });
|
|
180
|
+
}
|
|
181
|
+
return pattern.test(window.location.pathname);
|
|
182
|
+
})()
|
|
183
|
+
`);
|
|
184
|
+
return Boolean(result);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Wait until IMDb search results (or the search UI state) has rendered.
|
|
189
|
+
*/
|
|
190
|
+
export async function waitForImdbSearchReady(page: IPage, timeoutMs: number = 15000): Promise<boolean> {
|
|
191
|
+
const result = await page.evaluate(`
|
|
192
|
+
(async function() {
|
|
193
|
+
var deadline = Date.now() + ${timeoutMs};
|
|
194
|
+
|
|
195
|
+
function hasSearchResults() {
|
|
196
|
+
var nextDataEl = document.getElementById('__NEXT_DATA__');
|
|
197
|
+
if (nextDataEl) {
|
|
198
|
+
try {
|
|
199
|
+
var nextData = JSON.parse(nextDataEl.textContent || 'null');
|
|
200
|
+
var pageProps = nextData && nextData.props && nextData.props.pageProps;
|
|
201
|
+
var titleResults = (pageProps && pageProps.titleResults && pageProps.titleResults.results) || [];
|
|
202
|
+
var nameResults = (pageProps && pageProps.nameResults && pageProps.nameResults.results) || [];
|
|
203
|
+
if (titleResults.length > 0 || nameResults.length > 0) {
|
|
204
|
+
return true;
|
|
205
|
+
}
|
|
206
|
+
} catch (error) {
|
|
207
|
+
void error;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (document.querySelector('a[href*="/title/"], a[href*="/name/"]')) {
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
var body = document.body ? (document.body.textContent || '') : '';
|
|
216
|
+
return body.includes('No results found for') || body.includes('No exact matches');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
while (Date.now() < deadline) {
|
|
220
|
+
if (hasSearchResults()) {
|
|
221
|
+
return true;
|
|
222
|
+
}
|
|
223
|
+
await new Promise(function(resolve) { setTimeout(resolve, 250); });
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return hasSearchResults();
|
|
227
|
+
})()
|
|
228
|
+
`);
|
|
229
|
+
return Boolean(result);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Wait until IMDb review cards (or the page review summary) has rendered.
|
|
234
|
+
*/
|
|
235
|
+
export async function waitForImdbReviewsReady(page: IPage, timeoutMs: number = 15000): Promise<boolean> {
|
|
236
|
+
const result = await page.evaluate(`
|
|
237
|
+
(async function() {
|
|
238
|
+
var deadline = Date.now() + ${timeoutMs};
|
|
239
|
+
|
|
240
|
+
function hasReviewContent() {
|
|
241
|
+
if (document.querySelector('article.user-review-item, [data-testid="review-card-parent"], [data-testid="tturv-total-reviews"]')) {
|
|
242
|
+
return true;
|
|
243
|
+
}
|
|
244
|
+
var body = document.body ? (document.body.textContent || '') : '';
|
|
245
|
+
return body.includes('No user reviews') || body.includes('Review this title');
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
while (Date.now() < deadline) {
|
|
249
|
+
if (hasReviewContent()) {
|
|
250
|
+
return true;
|
|
251
|
+
}
|
|
252
|
+
await new Promise(function(resolve) { setTimeout(resolve, 250); });
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return hasReviewContent();
|
|
256
|
+
})()
|
|
257
|
+
`);
|
|
258
|
+
return Boolean(result);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Read the current IMDb entity id from the page URL/canonical metadata.
|
|
263
|
+
*/
|
|
264
|
+
export async function getCurrentImdbId(page: IPage, prefix: 'tt' | 'nm'): Promise<string> {
|
|
265
|
+
const result = await page.evaluate(`
|
|
266
|
+
(function() {
|
|
267
|
+
var pattern = new RegExp('(${prefix}\\\\d{7,8})', 'i');
|
|
268
|
+
var candidates = [
|
|
269
|
+
window.location.pathname || '',
|
|
270
|
+
document.querySelector('link[rel="canonical"]')?.getAttribute('href') || '',
|
|
271
|
+
document.querySelector('meta[property="og:url"]')?.getAttribute('content') || ''
|
|
272
|
+
];
|
|
273
|
+
|
|
274
|
+
for (var i = 0; i < candidates.length; i++) {
|
|
275
|
+
var match = candidates[i].match(pattern);
|
|
276
|
+
if (match) {
|
|
277
|
+
return match[1];
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return '';
|
|
281
|
+
})()
|
|
282
|
+
`);
|
|
283
|
+
|
|
284
|
+
return typeof result === 'string' ? result : '';
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Detect whether the current page is an IMDb bot-challenge or verification page.
|
|
289
|
+
*/
|
|
290
|
+
export async function isChallengePage(page: IPage): Promise<boolean> {
|
|
291
|
+
const result = await page.evaluate(`
|
|
292
|
+
(function() {
|
|
293
|
+
var title = document.title || '';
|
|
294
|
+
var body = document.body ? (document.body.textContent || '') : '';
|
|
295
|
+
return title.includes('Robot Check') ||
|
|
296
|
+
title.includes('Are you a robot') ||
|
|
297
|
+
title.includes('JavaScript is disabled') ||
|
|
298
|
+
body.includes('captcha') ||
|
|
299
|
+
body.includes('verify that you are human') ||
|
|
300
|
+
body.includes('not a robot');
|
|
301
|
+
})()
|
|
302
|
+
`);
|
|
303
|
+
|
|
304
|
+
return Boolean(result);
|
|
305
|
+
}
|
package/src/clis/jd/item.test.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, expect, it } from 'vitest';
|
|
2
2
|
import { getRegistry } from '../../registry.js';
|
|
3
|
+
import { __test__ } from './item.js';
|
|
3
4
|
import './item.js';
|
|
4
5
|
|
|
5
6
|
describe('jd item adapter', () => {
|
|
@@ -29,7 +30,23 @@ describe('jd item adapter', () => {
|
|
|
29
30
|
|
|
30
31
|
it('includes expected columns', () => {
|
|
31
32
|
expect(command!.columns).toEqual(
|
|
32
|
-
expect.arrayContaining(['title', 'price', 'shop', 'specs', '
|
|
33
|
+
expect.arrayContaining(['title', 'price', 'shop', 'specs', 'avifImages']),
|
|
33
34
|
);
|
|
34
35
|
});
|
|
36
|
+
|
|
37
|
+
it('extracts only pcpubliccms avif images and respects the limit', () => {
|
|
38
|
+
const result = __test__.extractAvifImages([
|
|
39
|
+
'https://img14.360buyimg.com/n1/jfs/t1/normal.jpg',
|
|
40
|
+
'https://img10.360buyimg.com/imgzone/jfs/t1/detail.avif',
|
|
41
|
+
'https://pcpubliccms.jd.com/image1.avif',
|
|
42
|
+
'https://pcpubliccms.jd.com/image1.avif',
|
|
43
|
+
'https://pcpubliccms.jd.com/image2.avif?x=1',
|
|
44
|
+
'https://example.com/not-jd.avif',
|
|
45
|
+
], 2);
|
|
46
|
+
|
|
47
|
+
expect(result).toEqual([
|
|
48
|
+
'https://pcpubliccms.jd.com/image1.avif',
|
|
49
|
+
'https://pcpubliccms.jd.com/image2.avif?x=1',
|
|
50
|
+
]);
|
|
51
|
+
});
|
|
35
52
|
});
|
package/src/clis/jd/item.ts
CHANGED
|
@@ -6,10 +6,17 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { cli, Strategy } from '../../registry.js';
|
|
8
8
|
|
|
9
|
+
function extractAvifImages(imageUrls: string[], maxImages: number): string[] {
|
|
10
|
+
const unique = [...new Set(imageUrls.filter(Boolean))];
|
|
11
|
+
return unique
|
|
12
|
+
.filter((url) => url.includes('.avif') && url.includes('pcpubliccms'))
|
|
13
|
+
.slice(0, maxImages);
|
|
14
|
+
}
|
|
15
|
+
|
|
9
16
|
cli({
|
|
10
17
|
site: 'jd',
|
|
11
18
|
name: 'item',
|
|
12
|
-
description: '
|
|
19
|
+
description: '京东商品详情(价格、店铺、规格参数、AVIF 图片)',
|
|
13
20
|
domain: 'item.jd.com',
|
|
14
21
|
strategy: Strategy.COOKIE,
|
|
15
22
|
args: [
|
|
@@ -23,10 +30,10 @@ cli({
|
|
|
23
30
|
name: 'images',
|
|
24
31
|
type: 'int',
|
|
25
32
|
default: 10,
|
|
26
|
-
help: '
|
|
33
|
+
help: 'AVIF 图片数量上限(默认10)',
|
|
27
34
|
},
|
|
28
35
|
],
|
|
29
|
-
columns: ['title', 'price', 'shop', 'specs', '
|
|
36
|
+
columns: ['title', 'price', 'shop', 'specs', 'avifImages'],
|
|
30
37
|
func: async (page, kwargs) => {
|
|
31
38
|
const sku = kwargs.sku;
|
|
32
39
|
const maxImages = kwargs.images as number;
|
|
@@ -35,7 +42,7 @@ cli({
|
|
|
35
42
|
await page.goto(url, { waitUntil: 'load' });
|
|
36
43
|
await page.wait(2);
|
|
37
44
|
|
|
38
|
-
//
|
|
45
|
+
// 滚动加载商品详情区域中的延迟图片
|
|
39
46
|
for (let i = 0; i < 6; i++) {
|
|
40
47
|
await page.evaluate(`window.scrollTo(0, ${i * 2500})`);
|
|
41
48
|
await page.wait(1);
|
|
@@ -65,17 +72,9 @@ cli({
|
|
|
65
72
|
// 所有图片
|
|
66
73
|
const allImgs = Array.from(document.querySelectorAll('img[src*="360buyimg.com"]'));
|
|
67
74
|
const srcs = allImgs.map(img => img.src).filter(Boolean);
|
|
68
|
-
const unique = [...new Set(srcs)];
|
|
69
|
-
|
|
70
|
-
// 主图
|
|
71
|
-
const mainImgs = unique
|
|
72
|
-
.filter(u => u.includes('/n1/') || u.includes('/n3/') || u.includes('/n4/') || u.includes('/img/'))
|
|
73
|
-
.slice(0, maxImg);
|
|
74
75
|
|
|
75
|
-
//
|
|
76
|
-
const
|
|
77
|
-
.filter(u => u.includes('/babel/') || u.includes('/popshop/'))
|
|
78
|
-
.slice(0, maxImg);
|
|
76
|
+
// 所有 avif 图片(去重,只保留 pcpubliccms CDN)
|
|
77
|
+
const avifImages = ${extractAvifImages.toString()}(srcs, maxImg);
|
|
79
78
|
|
|
80
79
|
// 规格参数:从页面文本提取
|
|
81
80
|
const text = document.body.innerText;
|
|
@@ -92,10 +91,14 @@ cli({
|
|
|
92
91
|
}
|
|
93
92
|
}
|
|
94
93
|
|
|
95
|
-
return { title, price, shop, specs,
|
|
94
|
+
return { title, price, shop, specs, avifImages, totalImages: new Set(srcs).size };
|
|
96
95
|
})()
|
|
97
96
|
`);
|
|
98
97
|
|
|
99
98
|
return [data];
|
|
100
99
|
},
|
|
101
100
|
});
|
|
101
|
+
|
|
102
|
+
export const __test__ = {
|
|
103
|
+
extractAvifImages,
|
|
104
|
+
};
|