@jackwener/opencli 1.6.7 → 1.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/README.zh-CN.md +8 -3
- package/dist/clis/1688/assets.d.ts +42 -0
- package/dist/clis/1688/assets.js +204 -0
- package/dist/clis/1688/assets.test.d.ts +1 -0
- package/dist/clis/1688/assets.test.js +39 -0
- package/dist/clis/1688/download.d.ts +9 -0
- package/dist/clis/1688/download.js +76 -0
- package/dist/clis/1688/download.test.d.ts +1 -0
- package/dist/clis/1688/download.test.js +31 -0
- package/dist/clis/1688/shared.d.ts +10 -0
- package/dist/clis/1688/shared.js +43 -0
- package/dist/clis/jianyu/search.d.ts +14 -0
- package/dist/clis/jianyu/search.js +135 -0
- package/dist/clis/jianyu/search.test.d.ts +1 -0
- package/dist/clis/jianyu/search.test.js +23 -0
- package/dist/clis/linux-do/topic-content.d.ts +35 -0
- package/dist/clis/linux-do/topic-content.js +154 -0
- package/dist/clis/linux-do/topic-content.test.d.ts +1 -0
- package/dist/clis/linux-do/topic-content.test.js +59 -0
- package/dist/clis/linux-do/topic.yaml +1 -16
- package/dist/clis/quark/ls.d.ts +1 -0
- package/dist/clis/quark/ls.js +63 -0
- package/dist/clis/quark/mkdir.d.ts +1 -0
- package/dist/clis/quark/mkdir.js +36 -0
- package/dist/clis/quark/mv.d.ts +1 -0
- package/dist/clis/quark/mv.js +53 -0
- package/dist/clis/quark/rename.d.ts +1 -0
- package/dist/clis/quark/rename.js +26 -0
- package/dist/clis/quark/rm.d.ts +1 -0
- package/dist/clis/quark/rm.js +24 -0
- package/dist/clis/quark/save.d.ts +1 -0
- package/dist/clis/quark/save.js +80 -0
- package/dist/clis/quark/share-tree.d.ts +1 -0
- package/dist/clis/quark/share-tree.js +45 -0
- package/dist/clis/quark/utils.d.ts +50 -0
- package/dist/clis/quark/utils.js +146 -0
- package/dist/clis/quark/utils.test.d.ts +1 -0
- package/dist/clis/quark/utils.test.js +58 -0
- package/dist/clis/twitter/reply.js +3 -8
- package/dist/clis/twitter/reply.test.js +5 -5
- package/dist/clis/xiaohongshu/note.js +8 -3
- package/dist/clis/xiaohongshu/note.test.js +11 -0
- package/dist/clis/xueqiu/groups.yaml +23 -0
- package/dist/clis/xueqiu/kline.yaml +65 -0
- package/dist/clis/xueqiu/watchlist.yaml +9 -9
- package/dist/clis/zhihu/answer.d.ts +1 -0
- package/dist/clis/zhihu/answer.js +194 -0
- package/dist/clis/zhihu/answer.test.d.ts +1 -0
- package/dist/clis/zhihu/answer.test.js +81 -0
- package/dist/clis/zhihu/comment.d.ts +1 -0
- package/dist/clis/zhihu/comment.js +335 -0
- package/dist/clis/zhihu/comment.test.d.ts +1 -0
- package/dist/clis/zhihu/comment.test.js +54 -0
- package/dist/clis/zhihu/favorite.d.ts +1 -0
- package/dist/clis/zhihu/favorite.js +224 -0
- package/dist/clis/zhihu/favorite.test.d.ts +1 -0
- package/dist/clis/zhihu/favorite.test.js +196 -0
- package/dist/clis/zhihu/follow.d.ts +1 -0
- package/dist/clis/zhihu/follow.js +80 -0
- package/dist/clis/zhihu/follow.test.d.ts +1 -0
- package/dist/clis/zhihu/follow.test.js +45 -0
- package/dist/clis/zhihu/like.d.ts +1 -0
- package/dist/clis/zhihu/like.js +91 -0
- package/dist/clis/zhihu/like.test.d.ts +1 -0
- package/dist/clis/zhihu/like.test.js +64 -0
- package/dist/clis/zhihu/target.d.ts +24 -0
- package/dist/clis/zhihu/target.js +91 -0
- package/dist/clis/zhihu/target.test.d.ts +1 -0
- package/dist/clis/zhihu/target.test.js +77 -0
- package/dist/clis/zhihu/write-shared.d.ts +32 -0
- package/dist/clis/zhihu/write-shared.js +221 -0
- package/dist/clis/zhihu/write-shared.test.d.ts +1 -0
- package/dist/clis/zhihu/write-shared.test.js +175 -0
- package/dist/src/analysis.d.ts +2 -0
- package/dist/src/analysis.js +6 -0
- package/dist/src/browser/bridge.d.ts +2 -0
- package/dist/src/browser/bridge.js +30 -24
- package/dist/src/browser/cdp.js +96 -0
- package/dist/src/browser/daemon-client.d.ts +17 -8
- package/dist/src/browser/daemon-client.js +12 -13
- package/dist/src/browser/daemon-client.test.js +32 -25
- package/dist/src/browser/index.d.ts +2 -1
- package/dist/src/browser/index.js +1 -1
- package/dist/src/browser.test.js +2 -3
- package/dist/src/build-manifest.d.ts +3 -1
- package/dist/src/build-manifest.js +10 -7
- package/dist/src/build-manifest.test.js +8 -4
- package/dist/src/cli.d.ts +2 -1
- package/dist/src/cli.js +48 -46
- package/dist/src/clis/binance/commands.test.d.ts +1 -0
- package/dist/src/clis/binance/commands.test.js +54 -0
- package/dist/src/commanderAdapter.js +19 -6
- package/dist/src/commands/daemon.js +2 -10
- package/dist/src/diagnostic.d.ts +28 -2
- package/dist/src/diagnostic.js +263 -25
- package/dist/src/diagnostic.test.js +220 -1
- package/dist/src/discovery.js +7 -17
- package/dist/src/doctor.d.ts +2 -0
- package/dist/src/doctor.js +59 -31
- package/dist/src/doctor.test.js +89 -16
- package/dist/src/download/progress.js +7 -2
- package/dist/src/execution.js +1 -13
- package/dist/src/explore.d.ts +0 -2
- package/dist/src/explore.js +61 -38
- package/dist/src/extension-manifest-regression.test.js +0 -1
- package/dist/src/generate.d.ts +3 -6
- package/dist/src/generate.js +4 -8
- package/dist/src/package-paths.d.ts +8 -0
- package/dist/src/package-paths.js +41 -0
- package/dist/src/plugin-scaffold.js +1 -3
- package/dist/src/plugin.d.ts +2 -1
- package/dist/src/plugin.js +25 -8
- package/dist/src/plugin.test.js +16 -1
- package/dist/src/record.d.ts +1 -2
- package/dist/src/record.js +14 -52
- package/dist/src/synthesize.d.ts +0 -2
- package/dist/src/synthesize.js +8 -4
- package/package.json +3 -3
- package/dist/cli-manifest.json +0 -17250
- package/dist/src/browser/discover.d.ts +0 -15
- package/dist/src/browser/discover.js +0 -19
package/README.md
CHANGED
|
@@ -132,14 +132,16 @@ git clone git@github.com:jackwener/opencli.git && cd opencli && npm install && n
|
|
|
132
132
|
| **hupu** | `hot` `search` `detail` `mentions` `reply` `like` `unlike` |
|
|
133
133
|
| **twitter** | `trending` `search` `timeline` `bookmarks` `post` `download` `profile` `article` `like` `likes` `notifications` `reply` `reply-dm` `thread` `follow` `unfollow` `followers` `following` `block` `unblock` `bookmark` `unbookmark` `delete` `hide-reply` `accept` |
|
|
134
134
|
| **reddit** | `hot` `frontpage` `popular` `search` `subreddit` `read` `user` `user-posts` `user-comments` `upvote` `upvoted` `save` `saved` `comment` `subscribe` |
|
|
135
|
+
| **zhihu** | `hot` `search` `question` `download` `follow` `like` `favorite` `comment` `answer` |
|
|
135
136
|
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` |
|
|
136
|
-
| **1688** | `search` `item` `store` |
|
|
137
|
+
| **1688** | `search` `item` `assets` `download` `store` |
|
|
137
138
|
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` |
|
|
138
139
|
| **yuanbao** | `new` `ask` |
|
|
139
140
|
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` |
|
|
140
141
|
| **spotify** | `auth` `status` `play` `pause` `next` `prev` `volume` `search` `queue` `shuffle` `repeat` |
|
|
141
142
|
| **xianyu** | `search` `item` `chat` |
|
|
142
143
|
| **xiaoe** | `courses` `detail` `catalog` `play-url` `content` |
|
|
144
|
+
| **quark** | `ls` `mkdir` `mv` `rename` `rm` `save` `share-tree` |
|
|
143
145
|
|
|
144
146
|
79+ adapters in total — **[→ see all supported sites & commands](./docs/adapters/index.md)**
|
|
145
147
|
|
|
@@ -191,6 +193,7 @@ OpenCLI supports downloading images, videos, and articles from supported platfor
|
|
|
191
193
|
| **twitter** | Images, Videos | From user media tab or single tweet |
|
|
192
194
|
| **douban** | Images | Poster / still image lists |
|
|
193
195
|
| **pixiv** | Images | Original-quality illustrations, multi-page |
|
|
196
|
+
| **1688** | Images, Videos | Downloads page-visible product media from item pages |
|
|
194
197
|
| **zhihu** | Articles (Markdown) | Exports with optional image download |
|
|
195
198
|
| **weixin** | Articles (Markdown) | WeChat Official Account articles |
|
|
196
199
|
|
|
@@ -200,6 +203,7 @@ For video downloads, install `yt-dlp` first: `brew install yt-dlp`
|
|
|
200
203
|
opencli xiaohongshu download abc123 --output ./xhs
|
|
201
204
|
opencli bilibili download BV1xxx --output ./bilibili
|
|
202
205
|
opencli twitter download elonmusk --limit 20 --output ./twitter
|
|
206
|
+
opencli 1688 download 841141931191 --output ./1688-downloads
|
|
203
207
|
```
|
|
204
208
|
|
|
205
209
|
## Output Formats
|
package/README.zh-CN.md
CHANGED
|
@@ -147,9 +147,10 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
|
|
|
147
147
|
| **chatgpt** | `status` `new` `send` `read` `ask` `model` | 桌面端 |
|
|
148
148
|
| **xiaohongshu** | `search` `notifications` `feed` `user` `download` `publish` `creator-notes` `creator-note-detail` `creator-notes-summary` `creator-profile` `creator-stats` | 浏览器 |
|
|
149
149
|
| **xiaoe** | `courses` `detail` `catalog` `play-url` `content` | 浏览器 |
|
|
150
|
+
| **quark** | `ls` `mkdir` `mv` `rename` `rm` `save` `share-tree` | 浏览器 |
|
|
150
151
|
| **apple-podcasts** | `search` `episodes` `top` | 公开 |
|
|
151
152
|
| **xiaoyuzhou** | `podcast` `podcast-episodes` `episode` | 公开 |
|
|
152
|
-
| **zhihu** | `hot` `search` `question` `download` | 浏览器 |
|
|
153
|
+
| **zhihu** | `hot` `search` `question` `download` `follow` `like` `favorite` `comment` `answer` | 浏览器 |
|
|
153
154
|
| **weixin** | `download` | 浏览器 |
|
|
154
155
|
| **youtube** | `search` `video` `transcript` | 浏览器 |
|
|
155
156
|
| **boss** | `search` `detail` `recommend` `joblist` `greet` `batchgreet` `send` `chatlist` `chatmsg` `invite` `mark` `exchange` `resume` `stats` | 浏览器 |
|
|
@@ -178,7 +179,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
|
|
|
178
179
|
| **jike** | `feed` `search` `create` `like` `comment` `repost` `notifications` `post` `topic` `user` | 浏览器 |
|
|
179
180
|
| **jimeng** | `generate` `history` | 浏览器 |
|
|
180
181
|
| **yollomi** | `generate` `video` `edit` `upload` `models` `remove-bg` `upscale` `face-swap` `restore` `try-on` `background` `object-remover` | 浏览器 |
|
|
181
|
-
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `user-posts` `user-topics` | 浏览器 |
|
|
182
|
+
| **linux-do** | `hot` `latest` `feed` `search` `categories` `category` `tags` `topic` `topic-content` `user-posts` `user-topics` | 浏览器 |
|
|
182
183
|
| **stackoverflow** | `hot` `search` `bounties` `unanswered` | 公开 |
|
|
183
184
|
| **steam** | `top-sellers` | 公开 |
|
|
184
185
|
| **weread** | `shelf` `search` `book` `highlights` `notes` `notebooks` `ranking` | 浏览器 |
|
|
@@ -186,7 +187,7 @@ npx skills add jackwener/opencli --skill opencli-oneshot # 快速命令参
|
|
|
186
187
|
| **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 |
|
|
187
188
|
| **google** | `news` `search` `suggest` `trends` | 公开 |
|
|
188
189
|
| **amazon** | `bestsellers` `search` `product` `offer` `discussion` `movers-shakers` `new-releases` | 浏览器 |
|
|
189
|
-
| **1688** | `search` `item` `store` | 浏览器 |
|
|
190
|
+
| **1688** | `search` `item` `assets` `download` `store` | 浏览器 |
|
|
190
191
|
| **gemini** | `new` `ask` `image` `deep-research` `deep-research-result` | 浏览器 |
|
|
191
192
|
| **spotify** | `auth` `status` `play` `pause` `next` `prev` `volume` `search` `queue` `shuffle` `repeat` | OAuth API |
|
|
192
193
|
| **notebooklm** | `status` `list` `open` `current` `get` `history` `summary` `note-list` `notes-get` `source-list` `source-get` `source-fulltext` `source-guide` | 浏览器 |
|
|
@@ -258,6 +259,7 @@ OpenCLI 支持从各平台下载图片、视频和文章。
|
|
|
258
259
|
| **B站** | 视频 | 需要安装 `yt-dlp` |
|
|
259
260
|
| **Twitter/X** | 图片、视频 | 从用户媒体页或单条推文下载 |
|
|
260
261
|
| **Pixiv** | 图片 | 下载原始画质插画,支持多页作品 |
|
|
262
|
+
| **1688** | 图片、视频 | 下载商品页中可见的商品素材 |
|
|
261
263
|
| **知乎** | 文章(Markdown) | 导出文章,可选下载图片到本地 |
|
|
262
264
|
| **微信公众号** | 文章(Markdown) | 导出微信公众号文章为 Markdown |
|
|
263
265
|
| **豆瓣** | 图片 | 下载电影条目的海报 / 剧照图片 |
|
|
@@ -292,6 +294,9 @@ opencli twitter download --tweet-url "https://x.com/user/status/123" --output ./
|
|
|
292
294
|
# 下载豆瓣电影海报 / 剧照
|
|
293
295
|
opencli douban download 30382501 --output ./douban
|
|
294
296
|
|
|
297
|
+
# 下载 1688 商品页中的图片 / 视频素材
|
|
298
|
+
opencli 1688 download 841141931191 --output ./1688-downloads
|
|
299
|
+
|
|
295
300
|
# 导出知乎文章为 Markdown
|
|
296
301
|
opencli zhihu download "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
|
|
297
302
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { IPage } from '@jackwener/opencli/types';
|
|
2
|
+
import { type MediaSource } from './shared.js';
|
|
3
|
+
interface AssetBrowserPayload {
|
|
4
|
+
href?: string;
|
|
5
|
+
title?: string;
|
|
6
|
+
offerTitle?: string;
|
|
7
|
+
offerId?: string | number;
|
|
8
|
+
gallery?: {
|
|
9
|
+
mainImage?: string[];
|
|
10
|
+
offerImgList?: string[];
|
|
11
|
+
wlImageInfos?: Array<{
|
|
12
|
+
fullPathImageURI?: string;
|
|
13
|
+
}>;
|
|
14
|
+
[key: string]: unknown;
|
|
15
|
+
};
|
|
16
|
+
scannedAssets?: MediaSource[];
|
|
17
|
+
}
|
|
18
|
+
export interface Normalized1688Assets {
|
|
19
|
+
offer_id: string | null;
|
|
20
|
+
title: string | null;
|
|
21
|
+
item_url: string;
|
|
22
|
+
main_images: string[];
|
|
23
|
+
sku_images: string[];
|
|
24
|
+
detail_images: string[];
|
|
25
|
+
videos: string[];
|
|
26
|
+
other_images: string[];
|
|
27
|
+
raw_assets: MediaSource[];
|
|
28
|
+
source: string[];
|
|
29
|
+
main_count: number;
|
|
30
|
+
sku_count: number;
|
|
31
|
+
detail_count: number;
|
|
32
|
+
video_count: number;
|
|
33
|
+
source_url: string;
|
|
34
|
+
fetched_at: string;
|
|
35
|
+
strategy: string;
|
|
36
|
+
}
|
|
37
|
+
declare function normalizeAssets(payload: AssetBrowserPayload): Normalized1688Assets;
|
|
38
|
+
export declare function extractAssetsForInput(page: IPage, input: string): Promise<Normalized1688Assets>;
|
|
39
|
+
export declare const __test__: {
|
|
40
|
+
normalizeAssets: typeof normalizeAssets;
|
|
41
|
+
};
|
|
42
|
+
export {};
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { assertAuthenticatedState, buildDetailUrl, buildProvenance, cleanText, extractOfferId, gotoAndReadState, uniqueMediaSources, } from './shared.js';
|
|
3
|
+
function scriptToReadAssets() {
|
|
4
|
+
return `
|
|
5
|
+
(() => {
|
|
6
|
+
const root = window.context ?? {};
|
|
7
|
+
const model = root.result?.global?.globalData?.model ?? null;
|
|
8
|
+
const gallery = root.result?.data?.gallery?.fields ?? null;
|
|
9
|
+
const defaultSrcProps = ['data-lazyload-src', 'data-src', 'data-ks-lazyload', 'currentSrc', 'src'];
|
|
10
|
+
const groups = [
|
|
11
|
+
{ key: 'main', type: 'image', selectors: ['#dt-tab img', '.detail-gallery-turn img.detail-gallery-img', '.img-list-wrapper img.od-gallery-img', '.od-scroller-item span'] },
|
|
12
|
+
{ key: 'video', type: 'video', selectors: ['.lib-video video', 'video[src]', 'video source[src]'] },
|
|
13
|
+
{ key: 'sku', type: 'image', selectors: ['.pc-sku-wrapper .prop-item-inner-wrapper', '.sku-item-wrapper', '.specification-cell', '.sku-filter-button', '.expand-view-item', '.feature-item img'], srcProps: ['backgroundImage'] },
|
|
14
|
+
{ key: 'detail', type: 'image', selectors: ['.de-description-detail img', '#detailContentContainer img', '.html-description img', '.html-description source', '.desc-lazyload-container img'] },
|
|
15
|
+
];
|
|
16
|
+
const assets = [];
|
|
17
|
+
const seen = new Set();
|
|
18
|
+
|
|
19
|
+
const normalizeUrl = (value) => {
|
|
20
|
+
if (typeof value !== 'string') return '';
|
|
21
|
+
let next = value
|
|
22
|
+
.replace(/^url\\((.*)\\)$/i, '$1')
|
|
23
|
+
.replace(/^['"]|['"]$/g, '')
|
|
24
|
+
.replace(/\\\\u002F/g, '/')
|
|
25
|
+
.replace(/&/g, '&')
|
|
26
|
+
.trim();
|
|
27
|
+
if (!next || next.startsWith('blob:') || next.startsWith('data:')) return '';
|
|
28
|
+
if (next.startsWith('//')) next = 'https:' + next;
|
|
29
|
+
try {
|
|
30
|
+
return new URL(next, location.href).toString();
|
|
31
|
+
} catch {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const push = (type, group, url, source) => {
|
|
37
|
+
const normalized = normalizeUrl(url);
|
|
38
|
+
if (!normalized) return;
|
|
39
|
+
const key = type + ':' + normalized;
|
|
40
|
+
if (seen.has(key)) return;
|
|
41
|
+
seen.add(key);
|
|
42
|
+
assets.push({ type, group, url: normalized, source });
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const queryAllDeep = (selector) => {
|
|
46
|
+
const results = [];
|
|
47
|
+
const visitedRoots = new Set();
|
|
48
|
+
const walkRoots = (root, fn) => {
|
|
49
|
+
if (!root || visitedRoots.has(root)) return;
|
|
50
|
+
visitedRoots.add(root);
|
|
51
|
+
fn(root);
|
|
52
|
+
const childElements = root.querySelectorAll ? Array.from(root.querySelectorAll('*')) : [];
|
|
53
|
+
for (const child of childElements) {
|
|
54
|
+
if (child && child.shadowRoot) {
|
|
55
|
+
walkRoots(child.shadowRoot, fn);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
walkRoots(document, (root) => {
|
|
60
|
+
if (root.querySelectorAll) {
|
|
61
|
+
results.push(...Array.from(root.querySelectorAll(selector)));
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
return results;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const valuesFromElement = (element, srcProps) => {
|
|
68
|
+
const values = [];
|
|
69
|
+
const props = srcProps && srcProps.length ? srcProps : defaultSrcProps;
|
|
70
|
+
for (const prop of props) {
|
|
71
|
+
try {
|
|
72
|
+
if (prop === 'backgroundImage') {
|
|
73
|
+
const bg = getComputedStyle(element).backgroundImage || '';
|
|
74
|
+
const matches = bg.match(/url\\(([^)]+)\\)/g) || [];
|
|
75
|
+
for (const match of matches) {
|
|
76
|
+
const clean = match.replace(/^url\\(/, '').replace(/\\)$/, '');
|
|
77
|
+
values.push(clean);
|
|
78
|
+
}
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const direct = element[prop];
|
|
83
|
+
if (typeof direct === 'string' && direct) values.push(direct);
|
|
84
|
+
const attr = element.getAttribute ? element.getAttribute(prop) : '';
|
|
85
|
+
if (attr) values.push(attr);
|
|
86
|
+
} catch {}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (element.tagName === 'SOURCE' && element.parentElement?.tagName === 'VIDEO') {
|
|
90
|
+
values.push(element.src || element.getAttribute('src') || '');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (element.tagName === 'VIDEO') {
|
|
94
|
+
values.push(element.currentSrc || '');
|
|
95
|
+
values.push(element.src || '');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return values;
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
for (const group of groups) {
|
|
102
|
+
for (const selector of group.selectors) {
|
|
103
|
+
for (const element of queryAllDeep(selector)) {
|
|
104
|
+
for (const value of valuesFromElement(element, group.srcProps)) {
|
|
105
|
+
push(group.type, group.key, value, 'dom:' + selector);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const scriptTexts = Array.from(document.scripts).map((script) => script.textContent || '');
|
|
112
|
+
const videoRegex = /https?:\\/\\/[^"'\\s]+\\.(?:mp4|m3u8)(?:\\?[^"'\\s]*)?/gi;
|
|
113
|
+
for (const scriptText of scriptTexts) {
|
|
114
|
+
const matches = scriptText.match(videoRegex) || [];
|
|
115
|
+
for (const match of matches) {
|
|
116
|
+
push('video', 'video', match, 'script');
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const toJson = (value) => JSON.parse(JSON.stringify(value ?? null));
|
|
121
|
+
return {
|
|
122
|
+
href: window.location.href,
|
|
123
|
+
title: document.title || '',
|
|
124
|
+
offerTitle: model?.offerTitleModel?.subject ?? '',
|
|
125
|
+
offerId: model?.tradeModel?.offerId ?? '',
|
|
126
|
+
gallery: toJson(gallery),
|
|
127
|
+
scannedAssets: assets,
|
|
128
|
+
};
|
|
129
|
+
})()
|
|
130
|
+
`;
|
|
131
|
+
}
|
|
132
|
+
function normalizeAssets(payload) {
|
|
133
|
+
const offerId = cleanText(String(payload.offerId ?? '')) || extractOfferId(cleanText(payload.href)) || null;
|
|
134
|
+
const itemUrl = offerId ? buildDetailUrl(offerId) : cleanText(payload.href);
|
|
135
|
+
const seededAssets = [
|
|
136
|
+
...((payload.gallery?.mainImage ?? []).map((url) => ({ type: 'image', group: 'main', url, source: 'page_state:mainImage' }))),
|
|
137
|
+
...((payload.gallery?.offerImgList ?? []).map((url) => ({ type: 'image', group: 'main', url, source: 'page_state:offerImgList' }))),
|
|
138
|
+
...((payload.gallery?.wlImageInfos ?? []).map((item) => ({
|
|
139
|
+
type: 'image',
|
|
140
|
+
group: 'main',
|
|
141
|
+
url: item?.fullPathImageURI ?? '',
|
|
142
|
+
source: 'page_state:wlImageInfos',
|
|
143
|
+
}))),
|
|
144
|
+
];
|
|
145
|
+
const assets = uniqueMediaSources([...seededAssets, ...(payload.scannedAssets ?? [])]);
|
|
146
|
+
const mainImages = assets.filter((item) => item.type === 'image' && item.group === 'main').map((item) => item.url);
|
|
147
|
+
const skuImages = assets.filter((item) => item.type === 'image' && item.group === 'sku').map((item) => item.url);
|
|
148
|
+
const detailImages = assets.filter((item) => item.type === 'image' && item.group === 'detail').map((item) => item.url);
|
|
149
|
+
const videos = assets.filter((item) => item.type === 'video').map((item) => item.url);
|
|
150
|
+
const otherImages = assets
|
|
151
|
+
.filter((item) => item.type === 'image' && !['main', 'sku', 'detail'].includes(item.group))
|
|
152
|
+
.map((item) => item.url);
|
|
153
|
+
return {
|
|
154
|
+
offer_id: offerId,
|
|
155
|
+
title: cleanText(payload.offerTitle) || cleanText(payload.title) || null,
|
|
156
|
+
item_url: itemUrl,
|
|
157
|
+
main_images: mainImages,
|
|
158
|
+
sku_images: skuImages,
|
|
159
|
+
detail_images: detailImages,
|
|
160
|
+
videos,
|
|
161
|
+
other_images: otherImages,
|
|
162
|
+
raw_assets: assets,
|
|
163
|
+
source: [...new Set(assets.map((item) => cleanText(item.source)).filter(Boolean))],
|
|
164
|
+
main_count: mainImages.length,
|
|
165
|
+
sku_count: skuImages.length,
|
|
166
|
+
detail_count: detailImages.length,
|
|
167
|
+
video_count: videos.length,
|
|
168
|
+
...buildProvenance(cleanText(payload.href) || itemUrl),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
async function readAssetsPayload(page, itemUrl) {
|
|
172
|
+
const state = await gotoAndReadState(page, itemUrl, 2500, 'assets');
|
|
173
|
+
assertAuthenticatedState(state, 'assets');
|
|
174
|
+
await page.autoScroll({ times: 3, delayMs: 400 });
|
|
175
|
+
await page.wait(1);
|
|
176
|
+
return await page.evaluate(scriptToReadAssets());
|
|
177
|
+
}
|
|
178
|
+
export async function extractAssetsForInput(page, input) {
|
|
179
|
+
const itemUrl = buildDetailUrl(String(input ?? ''));
|
|
180
|
+
const payload = await readAssetsPayload(page, itemUrl);
|
|
181
|
+
return normalizeAssets(payload);
|
|
182
|
+
}
|
|
183
|
+
cli({
|
|
184
|
+
site: '1688',
|
|
185
|
+
name: 'assets',
|
|
186
|
+
description: '列出 1688 商品页可提取的图片/视频素材',
|
|
187
|
+
domain: 'www.1688.com',
|
|
188
|
+
strategy: Strategy.COOKIE,
|
|
189
|
+
args: [
|
|
190
|
+
{
|
|
191
|
+
name: 'input',
|
|
192
|
+
required: true,
|
|
193
|
+
positional: true,
|
|
194
|
+
help: '1688 商品 URL 或 offer ID(如 887904326744)',
|
|
195
|
+
},
|
|
196
|
+
],
|
|
197
|
+
columns: ['offer_id', 'title', 'main_count', 'sku_count', 'detail_count', 'video_count'],
|
|
198
|
+
func: async (page, kwargs) => {
|
|
199
|
+
return [await extractAssetsForInput(page, String(kwargs.input ?? ''))];
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
export const __test__ = {
|
|
203
|
+
normalizeAssets,
|
|
204
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './assets.js';
|
|
3
|
+
import { __test__ as sharedTest } from './shared.js';
|
|
4
|
+
describe('1688 assets normalization', () => {
|
|
5
|
+
it('normalizes gallery and scanned assets into grouped media lists', () => {
|
|
6
|
+
const result = __test__.normalizeAssets({
|
|
7
|
+
href: 'https://detail.1688.com/offer/887904326744.html',
|
|
8
|
+
title: '测试商品 - 阿里巴巴',
|
|
9
|
+
offerTitle: '测试商品',
|
|
10
|
+
offerId: 887904326744,
|
|
11
|
+
gallery: {
|
|
12
|
+
mainImage: ['//img.example.com/main-1.jpg'],
|
|
13
|
+
offerImgList: ['https://img.example.com/main-2.jpg'],
|
|
14
|
+
wlImageInfos: [{ fullPathImageURI: 'https://img.example.com/main-3.jpg' }],
|
|
15
|
+
},
|
|
16
|
+
scannedAssets: [
|
|
17
|
+
{ type: 'image', group: 'sku', url: 'https://img.example.com/sku-1.png', source: 'dom:.sku' },
|
|
18
|
+
{ type: 'image', group: 'detail', url: 'https://img.example.com/detail-1.jpg', source: 'dom:.detail' },
|
|
19
|
+
{ type: 'video', group: 'video', url: 'https://video.example.com/demo.mp4', source: 'script' },
|
|
20
|
+
{ type: 'image', group: 'detail', url: 'blob:https://detail.1688.com/1', source: 'ignore' },
|
|
21
|
+
],
|
|
22
|
+
});
|
|
23
|
+
expect(result.offer_id).toBe('887904326744');
|
|
24
|
+
expect(result.main_images).toEqual([
|
|
25
|
+
'https://img.example.com/main-1.jpg',
|
|
26
|
+
'https://img.example.com/main-2.jpg',
|
|
27
|
+
'https://img.example.com/main-3.jpg',
|
|
28
|
+
]);
|
|
29
|
+
expect(result.sku_images).toEqual(['https://img.example.com/sku-1.png']);
|
|
30
|
+
expect(result.detail_images).toEqual(['https://img.example.com/detail-1.jpg']);
|
|
31
|
+
expect(result.videos).toEqual(['https://video.example.com/demo.mp4']);
|
|
32
|
+
expect(result.main_count).toBe(3);
|
|
33
|
+
expect(result.video_count).toBe(1);
|
|
34
|
+
});
|
|
35
|
+
it('normalizes media urls from style syntax and protocol-relative URLs', () => {
|
|
36
|
+
expect(sharedTest.normalizeMediaUrl('url("//img.example.com/1.jpg")')).toBe('https://img.example.com/1.jpg');
|
|
37
|
+
expect(sharedTest.normalizeMediaUrl('blob:https://detail.1688.com/1')).toBe('');
|
|
38
|
+
});
|
|
39
|
+
});
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { type MediaItem } from '@jackwener/opencli/download/media-download';
|
|
2
|
+
import { extractAssetsForInput } from './assets.js';
|
|
3
|
+
declare function extFromUrl(url: string, fallback: string): string;
|
|
4
|
+
declare function toDownloadItems(offerId: string, assets: Awaited<ReturnType<typeof extractAssetsForInput>>): MediaItem[];
|
|
5
|
+
export declare const __test__: {
|
|
6
|
+
extFromUrl: typeof extFromUrl;
|
|
7
|
+
toDownloadItems: typeof toDownloadItems;
|
|
8
|
+
};
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import * as path from 'node:path';
|
|
2
|
+
import { formatCookieHeader } from '@jackwener/opencli/download';
|
|
3
|
+
import { downloadMedia } from '@jackwener/opencli/download/media-download';
|
|
4
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
5
|
+
import { cleanText } from './shared.js';
|
|
6
|
+
import { extractAssetsForInput } from './assets.js';
|
|
7
|
+
function extFromUrl(url, fallback) {
|
|
8
|
+
try {
|
|
9
|
+
const ext = path.extname(new URL(url).pathname).toLowerCase();
|
|
10
|
+
if (ext && ext.length <= 8)
|
|
11
|
+
return ext;
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
// ignore
|
|
15
|
+
}
|
|
16
|
+
return fallback;
|
|
17
|
+
}
|
|
18
|
+
function toDownloadItems(offerId, assets) {
|
|
19
|
+
const items = [];
|
|
20
|
+
const pushImages = (urls, prefix) => {
|
|
21
|
+
urls.forEach((url, index) => {
|
|
22
|
+
items.push({
|
|
23
|
+
type: 'image',
|
|
24
|
+
url,
|
|
25
|
+
filename: `${offerId}_${prefix}_${String(index + 1).padStart(2, '0')}${extFromUrl(url, '.jpg')}`,
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
};
|
|
29
|
+
pushImages(assets.main_images, 'main');
|
|
30
|
+
pushImages(assets.sku_images, 'sku');
|
|
31
|
+
pushImages(assets.detail_images, 'detail');
|
|
32
|
+
pushImages(assets.other_images, 'other');
|
|
33
|
+
assets.videos.forEach((url, index) => {
|
|
34
|
+
items.push({
|
|
35
|
+
type: 'video',
|
|
36
|
+
url,
|
|
37
|
+
filename: `${offerId}_video_${String(index + 1).padStart(2, '0')}${extFromUrl(url, '.mp4')}`,
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
return items;
|
|
41
|
+
}
|
|
42
|
+
cli({
|
|
43
|
+
site: '1688',
|
|
44
|
+
name: 'download',
|
|
45
|
+
description: '批量下载 1688 商品页可提取的图片和视频素材',
|
|
46
|
+
domain: 'www.1688.com',
|
|
47
|
+
strategy: Strategy.COOKIE,
|
|
48
|
+
args: [
|
|
49
|
+
{
|
|
50
|
+
name: 'input',
|
|
51
|
+
required: true,
|
|
52
|
+
positional: true,
|
|
53
|
+
help: '1688 商品 URL 或 offer ID(如 887904326744)',
|
|
54
|
+
},
|
|
55
|
+
{ name: 'output', default: './1688-downloads', help: '输出目录' },
|
|
56
|
+
],
|
|
57
|
+
columns: ['index', 'type', 'status', 'size'],
|
|
58
|
+
func: async (page, kwargs) => {
|
|
59
|
+
const assets = await extractAssetsForInput(page, String(kwargs.input ?? ''));
|
|
60
|
+
const offerId = cleanText(assets.offer_id) || '1688';
|
|
61
|
+
const items = toDownloadItems(offerId, assets);
|
|
62
|
+
const browserCookies = await page.getCookies({ domain: '1688.com' });
|
|
63
|
+
return downloadMedia(items, {
|
|
64
|
+
output: String(kwargs.output || './1688-downloads'),
|
|
65
|
+
subdir: offerId,
|
|
66
|
+
cookies: formatCookieHeader(browserCookies),
|
|
67
|
+
browserCookies,
|
|
68
|
+
filenamePrefix: offerId,
|
|
69
|
+
timeout: 60000,
|
|
70
|
+
});
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
export const __test__ = {
|
|
74
|
+
extFromUrl,
|
|
75
|
+
toDownloadItems,
|
|
76
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './download.js';
|
|
3
|
+
describe('1688 download helpers', () => {
|
|
4
|
+
it('builds stable filenames for grouped assets', () => {
|
|
5
|
+
const items = __test__.toDownloadItems('887904326744', {
|
|
6
|
+
offer_id: '887904326744',
|
|
7
|
+
title: '测试商品',
|
|
8
|
+
item_url: 'https://detail.1688.com/offer/887904326744.html',
|
|
9
|
+
main_images: ['https://img.example.com/a.jpg'],
|
|
10
|
+
sku_images: ['https://img.example.com/b.png'],
|
|
11
|
+
detail_images: ['https://img.example.com/c.webp'],
|
|
12
|
+
videos: ['https://video.example.com/d.mp4'],
|
|
13
|
+
other_images: [],
|
|
14
|
+
raw_assets: [],
|
|
15
|
+
source: [],
|
|
16
|
+
main_count: 1,
|
|
17
|
+
sku_count: 1,
|
|
18
|
+
detail_count: 1,
|
|
19
|
+
video_count: 1,
|
|
20
|
+
source_url: 'https://detail.1688.com/offer/887904326744.html',
|
|
21
|
+
fetched_at: new Date().toISOString(),
|
|
22
|
+
strategy: 'cookie',
|
|
23
|
+
});
|
|
24
|
+
expect(items.map((item) => item.filename)).toEqual([
|
|
25
|
+
'887904326744_main_01.jpg',
|
|
26
|
+
'887904326744_sku_01.png',
|
|
27
|
+
'887904326744_detail_01.webp',
|
|
28
|
+
'887904326744_video_01.mp4',
|
|
29
|
+
]);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -43,6 +43,12 @@ export interface SearchCandidate {
|
|
|
43
43
|
seller_name: string | null;
|
|
44
44
|
seller_url: string | null;
|
|
45
45
|
}
|
|
46
|
+
export interface MediaSource {
|
|
47
|
+
type: 'image' | 'video';
|
|
48
|
+
group: 'main' | 'sku' | 'detail' | 'video' | 'unknown';
|
|
49
|
+
url: string;
|
|
50
|
+
source?: string;
|
|
51
|
+
}
|
|
46
52
|
export declare function cleanText(value: unknown): string;
|
|
47
53
|
export declare function cleanMultilineText(value: unknown): string;
|
|
48
54
|
export declare function uniqueNonEmpty(values: Array<string | null | undefined>): string[];
|
|
@@ -80,6 +86,8 @@ export declare function assertAuthenticatedState(state: PageState, action: strin
|
|
|
80
86
|
export declare function assertNotCaptcha(state: PageState, action: string): void;
|
|
81
87
|
export declare function toNumber(value: unknown): number | null;
|
|
82
88
|
export declare function limitCandidates<T>(values: T[], limit: number): T[];
|
|
89
|
+
export declare function normalizeMediaUrl(input: unknown): string;
|
|
90
|
+
export declare function uniqueMediaSources(values: MediaSource[]): MediaSource[];
|
|
83
91
|
export declare const __test__: {
|
|
84
92
|
SEARCH_LIMIT_DEFAULT: number;
|
|
85
93
|
SEARCH_LIMIT_MAX: number;
|
|
@@ -108,5 +116,7 @@ export declare const __test__: {
|
|
|
108
116
|
cleanText: typeof cleanText;
|
|
109
117
|
cleanMultilineText: typeof cleanMultilineText;
|
|
110
118
|
uniqueNonEmpty: typeof uniqueNonEmpty;
|
|
119
|
+
normalizeMediaUrl: typeof normalizeMediaUrl;
|
|
120
|
+
uniqueMediaSources: typeof uniqueMediaSources;
|
|
111
121
|
limitCandidates: typeof limitCandidates;
|
|
112
122
|
};
|
package/dist/clis/1688/shared.js
CHANGED
|
@@ -432,6 +432,47 @@ export function limitCandidates(values, limit) {
|
|
|
432
432
|
const normalizedLimit = Math.max(1, Math.trunc(limit) || 1);
|
|
433
433
|
return values.slice(0, normalizedLimit);
|
|
434
434
|
}
|
|
435
|
+
export function normalizeMediaUrl(input) {
|
|
436
|
+
const raw = cleanText(input);
|
|
437
|
+
if (!raw)
|
|
438
|
+
return '';
|
|
439
|
+
let value = raw
|
|
440
|
+
.replace(/^url\((.*)\)$/i, '$1')
|
|
441
|
+
.replace(/^['"]|['"]$/g, '')
|
|
442
|
+
.replace(/\\u002F/g, '/')
|
|
443
|
+
.replace(/&/g, '&')
|
|
444
|
+
.trim();
|
|
445
|
+
if (!value || value.startsWith('data:') || value.startsWith('blob:'))
|
|
446
|
+
return '';
|
|
447
|
+
if (value.startsWith('//'))
|
|
448
|
+
value = `https:${value}`;
|
|
449
|
+
try {
|
|
450
|
+
const url = new URL(value);
|
|
451
|
+
return url.toString();
|
|
452
|
+
}
|
|
453
|
+
catch {
|
|
454
|
+
return '';
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
export function uniqueMediaSources(values) {
|
|
458
|
+
const seen = new Set();
|
|
459
|
+
const result = [];
|
|
460
|
+
for (const value of values) {
|
|
461
|
+
const url = normalizeMediaUrl(value.url);
|
|
462
|
+
if (!url)
|
|
463
|
+
continue;
|
|
464
|
+
const key = `${value.type}:${url}`;
|
|
465
|
+
if (seen.has(key))
|
|
466
|
+
continue;
|
|
467
|
+
seen.add(key);
|
|
468
|
+
result.push({
|
|
469
|
+
...value,
|
|
470
|
+
url,
|
|
471
|
+
source: cleanText(value.source) || undefined,
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
return result;
|
|
475
|
+
}
|
|
435
476
|
function normalizeNumericText(value) {
|
|
436
477
|
return value
|
|
437
478
|
.replace(/([¥$€])\s+(?=\d)/g, '$1')
|
|
@@ -510,5 +551,7 @@ export const __test__ = {
|
|
|
510
551
|
cleanText,
|
|
511
552
|
cleanMultilineText,
|
|
512
553
|
uniqueNonEmpty,
|
|
554
|
+
normalizeMediaUrl,
|
|
555
|
+
uniqueMediaSources,
|
|
513
556
|
limitCandidates,
|
|
514
557
|
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
interface JianyuCandidate {
|
|
2
|
+
title: string;
|
|
3
|
+
url: string;
|
|
4
|
+
date: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function buildSearchUrl(query: string): string;
|
|
7
|
+
export declare function normalizeDate(raw: string): string;
|
|
8
|
+
declare function dedupeCandidates(items: JianyuCandidate[]): JianyuCandidate[];
|
|
9
|
+
export declare const __test__: {
|
|
10
|
+
buildSearchUrl: typeof buildSearchUrl;
|
|
11
|
+
normalizeDate: typeof normalizeDate;
|
|
12
|
+
dedupeCandidates: typeof dedupeCandidates;
|
|
13
|
+
};
|
|
14
|
+
export {};
|