@jackwener/opencli 1.6.6 → 1.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/README.zh-CN.md +6 -2
- package/dist/clis/1688/assets.d.ts +42 -0
- package/dist/clis/1688/assets.js +204 -0
- package/dist/clis/1688/assets.test.d.ts +1 -0
- package/dist/clis/1688/assets.test.js +39 -0
- package/dist/clis/1688/download.d.ts +9 -0
- package/dist/clis/1688/download.js +76 -0
- package/dist/clis/1688/download.test.d.ts +1 -0
- package/dist/clis/1688/download.test.js +31 -0
- package/dist/clis/1688/shared.d.ts +10 -0
- package/dist/clis/1688/shared.js +43 -0
- package/dist/clis/linux-do/topic-content.d.ts +35 -0
- package/dist/clis/linux-do/topic-content.js +154 -0
- package/dist/clis/linux-do/topic-content.test.d.ts +1 -0
- package/dist/clis/linux-do/topic-content.test.js +59 -0
- package/dist/clis/linux-do/topic.yaml +1 -16
- package/dist/clis/xueqiu/groups.yaml +23 -0
- package/dist/clis/xueqiu/kline.yaml +65 -0
- package/dist/clis/xueqiu/watchlist.yaml +9 -9
- package/dist/src/analysis.d.ts +2 -0
- package/dist/src/analysis.js +6 -0
- package/dist/src/browser/cdp.js +96 -0
- package/dist/src/build-manifest.d.ts +3 -1
- package/dist/src/build-manifest.js +10 -7
- package/dist/src/build-manifest.test.js +8 -4
- package/dist/src/cli.d.ts +2 -1
- package/dist/src/cli.js +48 -46
- package/dist/src/commands/daemon.js +2 -10
- package/dist/src/diagnostic.d.ts +63 -0
- package/dist/src/diagnostic.js +247 -0
- package/dist/src/diagnostic.test.d.ts +1 -0
- package/dist/src/diagnostic.test.js +213 -0
- package/dist/src/discovery.js +7 -17
- package/dist/src/download/progress.js +7 -2
- package/dist/src/execution.js +25 -4
- package/dist/src/explore.d.ts +0 -2
- package/dist/src/explore.js +61 -38
- package/dist/src/extension-manifest-regression.test.js +0 -1
- package/dist/src/generate.d.ts +1 -1
- package/dist/src/generate.js +2 -3
- package/dist/src/package-paths.d.ts +8 -0
- package/dist/src/package-paths.js +41 -0
- package/dist/src/plugin-scaffold.js +1 -3
- package/dist/src/record.d.ts +1 -2
- package/dist/src/record.js +14 -52
- package/dist/src/synthesize.d.ts +0 -2
- package/dist/src/synthesize.js +8 -4
- package/package.json +1 -1
- package/scripts/postinstall.js +18 -71
- package/dist/cli-manifest.json +0 -17250
package/README.md
CHANGED
|
@@ -133,7 +133,7 @@ git clone git@github.com:jackwener/opencli.git && cd opencli && npm install && n
|
|
|
133
133
|
| **twitter** | `trending` `search` `timeline` `bookmarks` `post` `download` `profile` `article` `like` `likes` `notifications` `reply` `reply-dm` `thread` `follow` `unfollow` `followers` `following` `block` `unblock` `bookmark` `unbookmark` `delete` `hide-reply` `accept` |
|
|
134
134
|
| **reddit** | `hot` `frontpage` `popular` `search` `subreddit` `read` `user` `user-posts` `user-comments` `upvote` `upvoted` `save` `saved` `comment` `subscribe` |
|
|
135
135
|
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` |
|
|
136
|
-
| **1688** | `search` `item` `store` |
|
|
136
|
+
| **1688** | `search` `item` `assets` `download` `store` |
|
|
137
137
|
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` |
|
|
138
138
|
| **yuanbao** | `new` `ask` |
|
|
139
139
|
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` |
|
|
@@ -191,6 +191,7 @@ OpenCLI supports downloading images, videos, and articles from supported platfor
|
|
|
191
191
|
| **twitter** | Images, Videos | From user media tab or single tweet |
|
|
192
192
|
| **douban** | Images | Poster / still image lists |
|
|
193
193
|
| **pixiv** | Images | Original-quality illustrations, multi-page |
|
|
194
|
+
| **1688** | Images, Videos | Downloads page-visible product media from item pages |
|
|
194
195
|
| **zhihu** | Articles (Markdown) | Exports with optional image download |
|
|
195
196
|
| **weixin** | Articles (Markdown) | WeChat Official Account articles |
|
|
196
197
|
|
|
@@ -200,6 +201,7 @@ For video downloads, install `yt-dlp` first: `brew install yt-dlp`
|
|
|
200
201
|
opencli xiaohongshu download abc123 --output ./xhs
|
|
201
202
|
opencli bilibili download BV1xxx --output ./bilibili
|
|
202
203
|
opencli twitter download elonmusk --limit 20 --output ./twitter
|
|
204
|
+
opencli 1688 download 841141931191 --output ./1688-downloads
|
|
203
205
|
```
|
|
204
206
|
|
|
205
207
|
## Output Formats
|
package/README.zh-CN.md
CHANGED
|
@@ -178,7 +178,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
|
|
|
178
178
|
| **jike** | `feed` `search` `create` `like` `comment` `repost` `notifications` `post` `topic` `user` | 浏览器 |
|
|
179
179
|
| **jimeng** | `generate` `history` | 浏览器 |
|
|
180
180
|
| **yollomi** | `generate` `video` `edit` `upload` `models` `remove-bg` `upscale` `face-swap` `restore` `try-on` `background` `object-remover` | 浏览器 |
|
|
181
|
-
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `user-posts` `user-topics` | 浏览器 |
|
|
181
|
+
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `topic-content` `user-posts` `user-topics` | 浏览器 |
|
|
182
182
|
| **stackoverflow** | `hot` `search` `bounties` `unanswered` | 公开 |
|
|
183
183
|
| **steam** | `top-sellers` | 公开 |
|
|
184
184
|
| **weread** | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | 浏览器 |
|
|
@@ -186,7 +186,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
|
|
|
186
186
|
| **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 |
|
|
187
187
|
| **google** | `news` `search` `suggest` `trends` | 公开 |
|
|
188
188
|
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` | 浏览器 |
|
|
189
|
-
| **1688** | `search` `item` `store` | 浏览器 |
|
|
189
|
+
| **1688** | `search` `item` `assets` `download` `store` | 浏览器 |
|
|
190
190
|
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` | 浏览器 |
|
|
191
191
|
| **spotify** | `auth` `status` `play` `pause` `next` `prev` `volume` `search` `queue` `shuffle` `repeat` | OAuth API |
|
|
192
192
|
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` | 浏览器 |
|
|
@@ -258,6 +258,7 @@ OpenCLI 支持从各平台下载图片、视频和文章。
|
|
|
258
258
|
| **B站** | 视频 | 需要安装 `yt-dlp` |
|
|
259
259
|
| **Twitter/X** | 图片、视频 | 从用户媒体页或单条推文下载 |
|
|
260
260
|
| **Pixiv** | 图片 | 下载原始画质插画,支持多页作品 |
|
|
261
|
+
| **1688** | 图片、视频 | 下载商品页中可见的商品素材 |
|
|
261
262
|
| **知乎** | 文章(Markdown) | 导出文章,可选下载图片到本地 |
|
|
262
263
|
| **微信公众号** | 文章(Markdown) | 导出微信公众号文章为 Markdown |
|
|
263
264
|
| **豆瓣** | 图片 | 下载电影条目的海报 / 剧照图片 |
|
|
@@ -292,6 +293,9 @@ opencli twitter download --tweet-url "https://x.com/user/status/123" --output ./
|
|
|
292
293
|
# 下载豆瓣电影海报 / 剧照
|
|
293
294
|
opencli douban download 30382501 --output ./douban
|
|
294
295
|
|
|
296
|
+
# 下载 1688 商品页中的图片 / 视频素材
|
|
297
|
+
opencli 1688 download 841141931191 --output ./1688-downloads
|
|
298
|
+
|
|
295
299
|
# 导出知乎文章为 Markdown
|
|
296
300
|
opencli zhihu download "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
|
|
297
301
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { IPage } from '@jackwener/opencli/types';
|
|
2
|
+
import { type MediaSource } from './shared.js';
|
|
3
|
+
interface AssetBrowserPayload {
|
|
4
|
+
href?: string;
|
|
5
|
+
title?: string;
|
|
6
|
+
offerTitle?: string;
|
|
7
|
+
offerId?: string | number;
|
|
8
|
+
gallery?: {
|
|
9
|
+
mainImage?: string[];
|
|
10
|
+
offerImgList?: string[];
|
|
11
|
+
wlImageInfos?: Array<{
|
|
12
|
+
fullPathImageURI?: string;
|
|
13
|
+
}>;
|
|
14
|
+
[key: string]: unknown;
|
|
15
|
+
};
|
|
16
|
+
scannedAssets?: MediaSource[];
|
|
17
|
+
}
|
|
18
|
+
export interface Normalized1688Assets {
|
|
19
|
+
offer_id: string | null;
|
|
20
|
+
title: string | null;
|
|
21
|
+
item_url: string;
|
|
22
|
+
main_images: string[];
|
|
23
|
+
sku_images: string[];
|
|
24
|
+
detail_images: string[];
|
|
25
|
+
videos: string[];
|
|
26
|
+
other_images: string[];
|
|
27
|
+
raw_assets: MediaSource[];
|
|
28
|
+
source: string[];
|
|
29
|
+
main_count: number;
|
|
30
|
+
sku_count: number;
|
|
31
|
+
detail_count: number;
|
|
32
|
+
video_count: number;
|
|
33
|
+
source_url: string;
|
|
34
|
+
fetched_at: string;
|
|
35
|
+
strategy: string;
|
|
36
|
+
}
|
|
37
|
+
declare function normalizeAssets(payload: AssetBrowserPayload): Normalized1688Assets;
|
|
38
|
+
export declare function extractAssetsForInput(page: IPage, input: string): Promise<Normalized1688Assets>;
|
|
39
|
+
export declare const __test__: {
|
|
40
|
+
normalizeAssets: typeof normalizeAssets;
|
|
41
|
+
};
|
|
42
|
+
export {};
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { assertAuthenticatedState, buildDetailUrl, buildProvenance, cleanText, extractOfferId, gotoAndReadState, uniqueMediaSources, } from './shared.js';
|
|
3
|
+
function scriptToReadAssets() {
|
|
4
|
+
return `
|
|
5
|
+
(() => {
|
|
6
|
+
const root = window.context ?? {};
|
|
7
|
+
const model = root.result?.global?.globalData?.model ?? null;
|
|
8
|
+
const gallery = root.result?.data?.gallery?.fields ?? null;
|
|
9
|
+
const defaultSrcProps = ['data-lazyload-src', 'data-src', 'data-ks-lazyload', 'currentSrc', 'src'];
|
|
10
|
+
const groups = [
|
|
11
|
+
{ key: 'main', type: 'image', selectors: ['#dt-tab img', '.detail-gallery-turn img.detail-gallery-img', '.img-list-wrapper img.od-gallery-img', '.od-scroller-item span'] },
|
|
12
|
+
{ key: 'video', type: 'video', selectors: ['.lib-video video', 'video[src]', 'video source[src]'] },
|
|
13
|
+
{ key: 'sku', type: 'image', selectors: ['.pc-sku-wrapper .prop-item-inner-wrapper', '.sku-item-wrapper', '.specification-cell', '.sku-filter-button', '.expand-view-item', '.feature-item img'], srcProps: ['backgroundImage'] },
|
|
14
|
+
{ key: 'detail', type: 'image', selectors: ['.de-description-detail img', '#detailContentContainer img', '.html-description img', '.html-description source', '.desc-lazyload-container img'] },
|
|
15
|
+
];
|
|
16
|
+
const assets = [];
|
|
17
|
+
const seen = new Set();
|
|
18
|
+
|
|
19
|
+
const normalizeUrl = (value) => {
|
|
20
|
+
if (typeof value !== 'string') return '';
|
|
21
|
+
let next = value
|
|
22
|
+
.replace(/^url\\((.*)\\)$/i, '$1')
|
|
23
|
+
.replace(/^['"]|['"]$/g, '')
|
|
24
|
+
.replace(/\\\\u002F/g, '/')
|
|
25
|
+
.replace(/&/g, '&')
|
|
26
|
+
.trim();
|
|
27
|
+
if (!next || next.startsWith('blob:') || next.startsWith('data:')) return '';
|
|
28
|
+
if (next.startsWith('//')) next = 'https:' + next;
|
|
29
|
+
try {
|
|
30
|
+
return new URL(next, location.href).toString();
|
|
31
|
+
} catch {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const push = (type, group, url, source) => {
|
|
37
|
+
const normalized = normalizeUrl(url);
|
|
38
|
+
if (!normalized) return;
|
|
39
|
+
const key = type + ':' + normalized;
|
|
40
|
+
if (seen.has(key)) return;
|
|
41
|
+
seen.add(key);
|
|
42
|
+
assets.push({ type, group, url: normalized, source });
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const queryAllDeep = (selector) => {
|
|
46
|
+
const results = [];
|
|
47
|
+
const visitedRoots = new Set();
|
|
48
|
+
const walkRoots = (root, fn) => {
|
|
49
|
+
if (!root || visitedRoots.has(root)) return;
|
|
50
|
+
visitedRoots.add(root);
|
|
51
|
+
fn(root);
|
|
52
|
+
const childElements = root.querySelectorAll ? Array.from(root.querySelectorAll('*')) : [];
|
|
53
|
+
for (const child of childElements) {
|
|
54
|
+
if (child && child.shadowRoot) {
|
|
55
|
+
walkRoots(child.shadowRoot, fn);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
walkRoots(document, (root) => {
|
|
60
|
+
if (root.querySelectorAll) {
|
|
61
|
+
results.push(...Array.from(root.querySelectorAll(selector)));
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
return results;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const valuesFromElement = (element, srcProps) => {
|
|
68
|
+
const values = [];
|
|
69
|
+
const props = srcProps && srcProps.length ? srcProps : defaultSrcProps;
|
|
70
|
+
for (const prop of props) {
|
|
71
|
+
try {
|
|
72
|
+
if (prop === 'backgroundImage') {
|
|
73
|
+
const bg = getComputedStyle(element).backgroundImage || '';
|
|
74
|
+
const matches = bg.match(/url\\(([^)]+)\\)/g) || [];
|
|
75
|
+
for (const match of matches) {
|
|
76
|
+
const clean = match.replace(/^url\\(/, '').replace(/\\)$/, '');
|
|
77
|
+
values.push(clean);
|
|
78
|
+
}
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const direct = element[prop];
|
|
83
|
+
if (typeof direct === 'string' && direct) values.push(direct);
|
|
84
|
+
const attr = element.getAttribute ? element.getAttribute(prop) : '';
|
|
85
|
+
if (attr) values.push(attr);
|
|
86
|
+
} catch {}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (element.tagName === 'SOURCE' && element.parentElement?.tagName === 'VIDEO') {
|
|
90
|
+
values.push(element.src || element.getAttribute('src') || '');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (element.tagName === 'VIDEO') {
|
|
94
|
+
values.push(element.currentSrc || '');
|
|
95
|
+
values.push(element.src || '');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return values;
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
for (const group of groups) {
|
|
102
|
+
for (const selector of group.selectors) {
|
|
103
|
+
for (const element of queryAllDeep(selector)) {
|
|
104
|
+
for (const value of valuesFromElement(element, group.srcProps)) {
|
|
105
|
+
push(group.type, group.key, value, 'dom:' + selector);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const scriptTexts = Array.from(document.scripts).map((script) => script.textContent || '');
|
|
112
|
+
const videoRegex = /https?:\\/\\/[^"'\\s]+\\.(?:mp4|m3u8)(?:\\?[^"'\\s]*)?/gi;
|
|
113
|
+
for (const scriptText of scriptTexts) {
|
|
114
|
+
const matches = scriptText.match(videoRegex) || [];
|
|
115
|
+
for (const match of matches) {
|
|
116
|
+
push('video', 'video', match, 'script');
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const toJson = (value) => JSON.parse(JSON.stringify(value ?? null));
|
|
121
|
+
return {
|
|
122
|
+
href: window.location.href,
|
|
123
|
+
title: document.title || '',
|
|
124
|
+
offerTitle: model?.offerTitleModel?.subject ?? '',
|
|
125
|
+
offerId: model?.tradeModel?.offerId ?? '',
|
|
126
|
+
gallery: toJson(gallery),
|
|
127
|
+
scannedAssets: assets,
|
|
128
|
+
};
|
|
129
|
+
})()
|
|
130
|
+
`;
|
|
131
|
+
}
|
|
132
|
+
function normalizeAssets(payload) {
|
|
133
|
+
const offerId = cleanText(String(payload.offerId ?? '')) || extractOfferId(cleanText(payload.href)) || null;
|
|
134
|
+
const itemUrl = offerId ? buildDetailUrl(offerId) : cleanText(payload.href);
|
|
135
|
+
const seededAssets = [
|
|
136
|
+
...((payload.gallery?.mainImage ?? []).map((url) => ({ type: 'image', group: 'main', url, source: 'page_state:mainImage' }))),
|
|
137
|
+
...((payload.gallery?.offerImgList ?? []).map((url) => ({ type: 'image', group: 'main', url, source: 'page_state:offerImgList' }))),
|
|
138
|
+
...((payload.gallery?.wlImageInfos ?? []).map((item) => ({
|
|
139
|
+
type: 'image',
|
|
140
|
+
group: 'main',
|
|
141
|
+
url: item?.fullPathImageURI ?? '',
|
|
142
|
+
source: 'page_state:wlImageInfos',
|
|
143
|
+
}))),
|
|
144
|
+
];
|
|
145
|
+
const assets = uniqueMediaSources([...seededAssets, ...(payload.scannedAssets ?? [])]);
|
|
146
|
+
const mainImages = assets.filter((item) => item.type === 'image' && item.group === 'main').map((item) => item.url);
|
|
147
|
+
const skuImages = assets.filter((item) => item.type === 'image' && item.group === 'sku').map((item) => item.url);
|
|
148
|
+
const detailImages = assets.filter((item) => item.type === 'image' && item.group === 'detail').map((item) => item.url);
|
|
149
|
+
const videos = assets.filter((item) => item.type === 'video').map((item) => item.url);
|
|
150
|
+
const otherImages = assets
|
|
151
|
+
.filter((item) => item.type === 'image' && !['main', 'sku', 'detail'].includes(item.group))
|
|
152
|
+
.map((item) => item.url);
|
|
153
|
+
return {
|
|
154
|
+
offer_id: offerId,
|
|
155
|
+
title: cleanText(payload.offerTitle) || cleanText(payload.title) || null,
|
|
156
|
+
item_url: itemUrl,
|
|
157
|
+
main_images: mainImages,
|
|
158
|
+
sku_images: skuImages,
|
|
159
|
+
detail_images: detailImages,
|
|
160
|
+
videos,
|
|
161
|
+
other_images: otherImages,
|
|
162
|
+
raw_assets: assets,
|
|
163
|
+
source: [...new Set(assets.map((item) => cleanText(item.source)).filter(Boolean))],
|
|
164
|
+
main_count: mainImages.length,
|
|
165
|
+
sku_count: skuImages.length,
|
|
166
|
+
detail_count: detailImages.length,
|
|
167
|
+
video_count: videos.length,
|
|
168
|
+
...buildProvenance(cleanText(payload.href) || itemUrl),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
async function readAssetsPayload(page, itemUrl) {
|
|
172
|
+
const state = await gotoAndReadState(page, itemUrl, 2500, 'assets');
|
|
173
|
+
assertAuthenticatedState(state, 'assets');
|
|
174
|
+
await page.autoScroll({ times: 3, delayMs: 400 });
|
|
175
|
+
await page.wait(1);
|
|
176
|
+
return await page.evaluate(scriptToReadAssets());
|
|
177
|
+
}
|
|
178
|
+
export async function extractAssetsForInput(page, input) {
|
|
179
|
+
const itemUrl = buildDetailUrl(String(input ?? ''));
|
|
180
|
+
const payload = await readAssetsPayload(page, itemUrl);
|
|
181
|
+
return normalizeAssets(payload);
|
|
182
|
+
}
|
|
183
|
+
cli({
|
|
184
|
+
site: '1688',
|
|
185
|
+
name: 'assets',
|
|
186
|
+
description: '列出 1688 商品页可提取的图片/视频素材',
|
|
187
|
+
domain: 'www.1688.com',
|
|
188
|
+
strategy: Strategy.COOKIE,
|
|
189
|
+
args: [
|
|
190
|
+
{
|
|
191
|
+
name: 'input',
|
|
192
|
+
required: true,
|
|
193
|
+
positional: true,
|
|
194
|
+
help: '1688 商品 URL 或 offer ID(如 887904326744)',
|
|
195
|
+
},
|
|
196
|
+
],
|
|
197
|
+
columns: ['offer_id', 'title', 'main_count', 'sku_count', 'detail_count', 'video_count'],
|
|
198
|
+
func: async (page, kwargs) => {
|
|
199
|
+
return [await extractAssetsForInput(page, String(kwargs.input ?? ''))];
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
export const __test__ = {
|
|
203
|
+
normalizeAssets,
|
|
204
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './assets.js';
|
|
3
|
+
import { __test__ as sharedTest } from './shared.js';
|
|
4
|
+
describe('1688 assets normalization', () => {
|
|
5
|
+
it('normalizes gallery and scanned assets into grouped media lists', () => {
|
|
6
|
+
const result = __test__.normalizeAssets({
|
|
7
|
+
href: 'https://detail.1688.com/offer/887904326744.html',
|
|
8
|
+
title: '测试商品 - 阿里巴巴',
|
|
9
|
+
offerTitle: '测试商品',
|
|
10
|
+
offerId: 887904326744,
|
|
11
|
+
gallery: {
|
|
12
|
+
mainImage: ['//img.example.com/main-1.jpg'],
|
|
13
|
+
offerImgList: ['https://img.example.com/main-2.jpg'],
|
|
14
|
+
wlImageInfos: [{ fullPathImageURI: 'https://img.example.com/main-3.jpg' }],
|
|
15
|
+
},
|
|
16
|
+
scannedAssets: [
|
|
17
|
+
{ type: 'image', group: 'sku', url: 'https://img.example.com/sku-1.png', source: 'dom:.sku' },
|
|
18
|
+
{ type: 'image', group: 'detail', url: 'https://img.example.com/detail-1.jpg', source: 'dom:.detail' },
|
|
19
|
+
{ type: 'video', group: 'video', url: 'https://video.example.com/demo.mp4', source: 'script' },
|
|
20
|
+
{ type: 'image', group: 'detail', url: 'blob:https://detail.1688.com/1', source: 'ignore' },
|
|
21
|
+
],
|
|
22
|
+
});
|
|
23
|
+
expect(result.offer_id).toBe('887904326744');
|
|
24
|
+
expect(result.main_images).toEqual([
|
|
25
|
+
'https://img.example.com/main-1.jpg',
|
|
26
|
+
'https://img.example.com/main-2.jpg',
|
|
27
|
+
'https://img.example.com/main-3.jpg',
|
|
28
|
+
]);
|
|
29
|
+
expect(result.sku_images).toEqual(['https://img.example.com/sku-1.png']);
|
|
30
|
+
expect(result.detail_images).toEqual(['https://img.example.com/detail-1.jpg']);
|
|
31
|
+
expect(result.videos).toEqual(['https://video.example.com/demo.mp4']);
|
|
32
|
+
expect(result.main_count).toBe(3);
|
|
33
|
+
expect(result.video_count).toBe(1);
|
|
34
|
+
});
|
|
35
|
+
it('normalizes media urls from style syntax and protocol-relative URLs', () => {
|
|
36
|
+
expect(sharedTest.normalizeMediaUrl('url("//img.example.com/1.jpg")')).toBe('https://img.example.com/1.jpg');
|
|
37
|
+
expect(sharedTest.normalizeMediaUrl('blob:https://detail.1688.com/1')).toBe('');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { type MediaItem } from '@jackwener/opencli/download/media-download';
|
|
2
|
+
import { extractAssetsForInput } from './assets.js';
|
|
3
|
+
declare function extFromUrl(url: string, fallback: string): string;
|
|
4
|
+
declare function toDownloadItems(offerId: string, assets: Awaited<ReturnType<typeof extractAssetsForInput>>): MediaItem[];
|
|
5
|
+
export declare const __test__: {
|
|
6
|
+
extFromUrl: typeof extFromUrl;
|
|
7
|
+
toDownloadItems: typeof toDownloadItems;
|
|
8
|
+
};
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import { formatCookieHeader } from '@jackwener/opencli/download';
|
|
3
|
+
import { downloadMedia } from '@jackwener/opencli/download/media-download';
|
|
4
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
5
|
+
import { cleanText } from './shared.js';
|
|
6
|
+
import { extractAssetsForInput } from './assets.js';
|
|
7
|
+
function extFromUrl(url, fallback) {
|
|
8
|
+
try {
|
|
9
|
+
const ext = path.extname(new URL(url).pathname).toLowerCase();
|
|
10
|
+
if (ext && ext.length <= 8)
|
|
11
|
+
return ext;
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
// ignore
|
|
15
|
+
}
|
|
16
|
+
return fallback;
|
|
17
|
+
}
|
|
18
|
+
function toDownloadItems(offerId, assets) {
|
|
19
|
+
const items = [];
|
|
20
|
+
const pushImages = (urls, prefix) => {
|
|
21
|
+
urls.forEach((url, index) => {
|
|
22
|
+
items.push({
|
|
23
|
+
type: 'image',
|
|
24
|
+
url,
|
|
25
|
+
filename: `${offerId}_${prefix}_${String(index + 1).padStart(2, '0')}${extFromUrl(url, '.jpg')}`,
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
};
|
|
29
|
+
pushImages(assets.main_images, 'main');
|
|
30
|
+
pushImages(assets.sku_images, 'sku');
|
|
31
|
+
pushImages(assets.detail_images, 'detail');
|
|
32
|
+
pushImages(assets.other_images, 'other');
|
|
33
|
+
assets.videos.forEach((url, index) => {
|
|
34
|
+
items.push({
|
|
35
|
+
type: 'video',
|
|
36
|
+
url,
|
|
37
|
+
filename: `${offerId}_video_${String(index + 1).padStart(2, '0')}${extFromUrl(url, '.mp4')}`,
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
return items;
|
|
41
|
+
}
|
|
42
|
+
cli({
|
|
43
|
+
site: '1688',
|
|
44
|
+
name: 'download',
|
|
45
|
+
description: '批量下载 1688 商品页可提取的图片和视频素材',
|
|
46
|
+
domain: 'www.1688.com',
|
|
47
|
+
strategy: Strategy.COOKIE,
|
|
48
|
+
args: [
|
|
49
|
+
{
|
|
50
|
+
name: 'input',
|
|
51
|
+
required: true,
|
|
52
|
+
positional: true,
|
|
53
|
+
help: '1688 商品 URL 或 offer ID(如 887904326744)',
|
|
54
|
+
},
|
|
55
|
+
{ name: 'output', default: './1688-downloads', help: '输出目录' },
|
|
56
|
+
],
|
|
57
|
+
columns: ['index', 'type', 'status', 'size'],
|
|
58
|
+
func: async (page, kwargs) => {
|
|
59
|
+
const assets = await extractAssetsForInput(page, String(kwargs.input ?? ''));
|
|
60
|
+
const offerId = cleanText(assets.offer_id) || '1688';
|
|
61
|
+
const items = toDownloadItems(offerId, assets);
|
|
62
|
+
const browserCookies = await page.getCookies({ domain: '1688.com' });
|
|
63
|
+
return downloadMedia(items, {
|
|
64
|
+
output: String(kwargs.output || './1688-downloads'),
|
|
65
|
+
subdir: offerId,
|
|
66
|
+
cookies: formatCookieHeader(browserCookies),
|
|
67
|
+
browserCookies,
|
|
68
|
+
filenamePrefix: offerId,
|
|
69
|
+
timeout: 60000,
|
|
70
|
+
});
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
export const __test__ = {
|
|
74
|
+
extFromUrl,
|
|
75
|
+
toDownloadItems,
|
|
76
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './download.js';
|
|
3
|
+
describe('1688 download helpers', () => {
|
|
4
|
+
it('builds stable filenames for grouped assets', () => {
|
|
5
|
+
const items = __test__.toDownloadItems('887904326744', {
|
|
6
|
+
offer_id: '887904326744',
|
|
7
|
+
title: '测试商品',
|
|
8
|
+
item_url: 'https://detail.1688.com/offer/887904326744.html',
|
|
9
|
+
main_images: ['https://img.example.com/a.jpg'],
|
|
10
|
+
sku_images: ['https://img.example.com/b.png'],
|
|
11
|
+
detail_images: ['https://img.example.com/c.webp'],
|
|
12
|
+
videos: ['https://video.example.com/d.mp4'],
|
|
13
|
+
other_images: [],
|
|
14
|
+
raw_assets: [],
|
|
15
|
+
source: [],
|
|
16
|
+
main_count: 1,
|
|
17
|
+
sku_count: 1,
|
|
18
|
+
detail_count: 1,
|
|
19
|
+
video_count: 1,
|
|
20
|
+
source_url: 'https://detail.1688.com/offer/887904326744.html',
|
|
21
|
+
fetched_at: new Date().toISOString(),
|
|
22
|
+
strategy: 'cookie',
|
|
23
|
+
});
|
|
24
|
+
expect(items.map((item) => item.filename)).toEqual([
|
|
25
|
+
'887904326744_main_01.jpg',
|
|
26
|
+
'887904326744_sku_01.png',
|
|
27
|
+
'887904326744_detail_01.webp',
|
|
28
|
+
'887904326744_video_01.mp4',
|
|
29
|
+
]);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -43,6 +43,12 @@ export interface SearchCandidate {
|
|
|
43
43
|
seller_name: string | null;
|
|
44
44
|
seller_url: string | null;
|
|
45
45
|
}
|
|
46
|
+
export interface MediaSource {
|
|
47
|
+
type: 'image' | 'video';
|
|
48
|
+
group: 'main' | 'sku' | 'detail' | 'video' | 'unknown';
|
|
49
|
+
url: string;
|
|
50
|
+
source?: string;
|
|
51
|
+
}
|
|
46
52
|
export declare function cleanText(value: unknown): string;
|
|
47
53
|
export declare function cleanMultilineText(value: unknown): string;
|
|
48
54
|
export declare function uniqueNonEmpty(values: Array<string | null | undefined>): string[];
|
|
@@ -80,6 +86,8 @@ export declare function assertAuthenticatedState(state: PageState, action: strin
|
|
|
80
86
|
export declare function assertNotCaptcha(state: PageState, action: string): void;
|
|
81
87
|
export declare function toNumber(value: unknown): number | null;
|
|
82
88
|
export declare function limitCandidates<T>(values: T[], limit: number): T[];
|
|
89
|
+
export declare function normalizeMediaUrl(input: unknown): string;
|
|
90
|
+
export declare function uniqueMediaSources(values: MediaSource[]): MediaSource[];
|
|
83
91
|
export declare const __test__: {
|
|
84
92
|
SEARCH_LIMIT_DEFAULT: number;
|
|
85
93
|
SEARCH_LIMIT_MAX: number;
|
|
@@ -108,5 +116,7 @@ export declare const __test__: {
|
|
|
108
116
|
cleanText: typeof cleanText;
|
|
109
117
|
cleanMultilineText: typeof cleanMultilineText;
|
|
110
118
|
uniqueNonEmpty: typeof uniqueNonEmpty;
|
|
119
|
+
normalizeMediaUrl: typeof normalizeMediaUrl;
|
|
120
|
+
uniqueMediaSources: typeof uniqueMediaSources;
|
|
111
121
|
limitCandidates: typeof limitCandidates;
|
|
112
122
|
};
|
package/dist/clis/1688/shared.js
CHANGED
|
@@ -432,6 +432,47 @@ export function limitCandidates(values, limit) {
|
|
|
432
432
|
const normalizedLimit = Math.max(1, Math.trunc(limit) || 1);
|
|
433
433
|
return values.slice(0, normalizedLimit);
|
|
434
434
|
}
|
|
435
|
+
export function normalizeMediaUrl(input) {
|
|
436
|
+
const raw = cleanText(input);
|
|
437
|
+
if (!raw)
|
|
438
|
+
return '';
|
|
439
|
+
let value = raw
|
|
440
|
+
.replace(/^url\((.*)\)$/i, '$1')
|
|
441
|
+
.replace(/^['"]|['"]$/g, '')
|
|
442
|
+
.replace(/\\u002F/g, '/')
|
|
443
|
+
.replace(/&/g, '&')
|
|
444
|
+
.trim();
|
|
445
|
+
if (!value || value.startsWith('data:') || value.startsWith('blob:'))
|
|
446
|
+
return '';
|
|
447
|
+
if (value.startsWith('//'))
|
|
448
|
+
value = `https:${value}`;
|
|
449
|
+
try {
|
|
450
|
+
const url = new URL(value);
|
|
451
|
+
return url.toString();
|
|
452
|
+
}
|
|
453
|
+
catch {
|
|
454
|
+
return '';
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
export function uniqueMediaSources(values) {
|
|
458
|
+
const seen = new Set();
|
|
459
|
+
const result = [];
|
|
460
|
+
for (const value of values) {
|
|
461
|
+
const url = normalizeMediaUrl(value.url);
|
|
462
|
+
if (!url)
|
|
463
|
+
continue;
|
|
464
|
+
const key = `${value.type}:${url}`;
|
|
465
|
+
if (seen.has(key))
|
|
466
|
+
continue;
|
|
467
|
+
seen.add(key);
|
|
468
|
+
result.push({
|
|
469
|
+
...value,
|
|
470
|
+
url,
|
|
471
|
+
source: cleanText(value.source) || undefined,
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
return result;
|
|
475
|
+
}
|
|
435
476
|
function normalizeNumericText(value) {
|
|
436
477
|
return value
|
|
437
478
|
.replace(/([¥$€])\s+(?=\d)/g, '$1')
|
|
@@ -510,5 +551,7 @@ export const __test__ = {
|
|
|
510
551
|
cleanText,
|
|
511
552
|
cleanMultilineText,
|
|
512
553
|
uniqueNonEmpty,
|
|
554
|
+
normalizeMediaUrl,
|
|
555
|
+
uniqueMediaSources,
|
|
513
556
|
limitCandidates,
|
|
514
557
|
};
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
interface LinuxDoTopicPost {
|
|
2
|
+
post_number?: number;
|
|
3
|
+
username?: string;
|
|
4
|
+
raw?: string;
|
|
5
|
+
cooked?: string;
|
|
6
|
+
like_count?: number;
|
|
7
|
+
created_at?: string;
|
|
8
|
+
}
|
|
9
|
+
interface LinuxDoTopicPayload {
|
|
10
|
+
title?: string;
|
|
11
|
+
post_stream?: {
|
|
12
|
+
posts?: LinuxDoTopicPost[];
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
interface TopicContentRow {
|
|
16
|
+
content: string;
|
|
17
|
+
}
|
|
18
|
+
declare function toLocalTime(utcStr: string): string;
|
|
19
|
+
declare function normalizeTopicPayload(payload: unknown): LinuxDoTopicPayload | null;
|
|
20
|
+
declare function buildTopicMarkdownDocument(params: {
|
|
21
|
+
title: string;
|
|
22
|
+
author: string;
|
|
23
|
+
likes?: number;
|
|
24
|
+
createdAt: string;
|
|
25
|
+
url: string;
|
|
26
|
+
body: string;
|
|
27
|
+
}): string;
|
|
28
|
+
declare function extractTopicContent(payload: unknown, id: number): TopicContentRow;
|
|
29
|
+
export declare const __test__: {
|
|
30
|
+
buildTopicMarkdownDocument: typeof buildTopicMarkdownDocument;
|
|
31
|
+
extractTopicContent: typeof extractTopicContent;
|
|
32
|
+
normalizeTopicPayload: typeof normalizeTopicPayload;
|
|
33
|
+
toLocalTime: typeof toLocalTime;
|
|
34
|
+
};
|
|
35
|
+
export {};
|