@jackwener/opencli 1.7.6 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +325 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/deepseek/ask.js +19 -13
- package/clis/deepseek/ask.test.js +93 -1
- package/clis/deepseek/utils.js +108 -23
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
package/clis/twitter/likes.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
2
|
import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
3
|
-
import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
|
|
3
|
+
import { resolveTwitterQueryId, sanitizeQueryId, extractMedia } from './shared.js';
|
|
4
4
|
const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
|
|
5
5
|
const LIKES_QUERY_ID = 'RozQdCp4CilQzrcuU0NY5w';
|
|
6
6
|
const USER_BY_SCREEN_NAME_QUERY_ID = 'qRednkZG-rn1P6b48NINmQ';
|
|
@@ -99,6 +99,7 @@ function extractLikedTweet(result, seen) {
|
|
|
99
99
|
retweets: legacy.retweet_count || 0,
|
|
100
100
|
created_at: legacy.created_at || '',
|
|
101
101
|
url: `https://x.com/${screenName}/status/${tw.rest_id}`,
|
|
102
|
+
...extractMedia(legacy),
|
|
102
103
|
};
|
|
103
104
|
}
|
|
104
105
|
function parseLikes(data, seen) {
|
|
@@ -144,7 +145,7 @@ cli({
|
|
|
144
145
|
{ name: 'username', type: 'string', positional: true, help: 'Twitter screen name (without @). Defaults to logged-in user.' },
|
|
145
146
|
{ name: 'limit', type: 'int', default: 20 },
|
|
146
147
|
],
|
|
147
|
-
columns: ['author', 'name', 'text', 'likes', 'url'],
|
|
148
|
+
columns: ['author', 'name', 'text', 'likes', 'url', 'has_media', 'media_urls'],
|
|
148
149
|
func: async (page, kwargs) => {
|
|
149
150
|
const limit = kwargs.limit || 20;
|
|
150
151
|
let username = (kwargs.username || '').replace(/^@/, '');
|
package/clis/twitter/search.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { CommandExecutionError } from '@jackwener/opencli/errors';
|
|
2
2
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
3
|
+
import { extractMedia } from './shared.js';
|
|
3
4
|
/**
|
|
4
5
|
* Trigger Twitter search SPA navigation with fallback strategies.
|
|
5
6
|
*
|
|
@@ -102,7 +103,7 @@ cli({
|
|
|
102
103
|
{ name: 'filter', type: 'string', default: 'top', choices: ['top', 'live'] },
|
|
103
104
|
{ name: 'limit', type: 'int', default: 15 },
|
|
104
105
|
],
|
|
105
|
-
columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url'],
|
|
106
|
+
columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url', 'has_media', 'media_urls'],
|
|
106
107
|
func: async (page, kwargs) => {
|
|
107
108
|
const query = kwargs.query;
|
|
108
109
|
const filter = kwargs.filter === 'live' ? 'live' : 'top';
|
|
@@ -156,7 +157,8 @@ cli({
|
|
|
156
157
|
created_at: tweet.legacy?.created_at || '',
|
|
157
158
|
likes: tweet.legacy?.favorite_count || 0,
|
|
158
159
|
views: tweet.views?.count || '0',
|
|
159
|
-
url: `https://x.com/i/status/${tweet.rest_id}
|
|
160
|
+
url: `https://x.com/i/status/${tweet.rest_id}`,
|
|
161
|
+
...extractMedia(tweet.legacy),
|
|
160
162
|
});
|
|
161
163
|
}
|
|
162
164
|
}
|
|
@@ -75,6 +75,8 @@ describe('twitter search command', () => {
|
|
|
75
75
|
likes: 7,
|
|
76
76
|
views: '12',
|
|
77
77
|
url: 'https://x.com/i/status/1',
|
|
78
|
+
has_media: false,
|
|
79
|
+
media_urls: [],
|
|
78
80
|
},
|
|
79
81
|
]);
|
|
80
82
|
expect(page.installInterceptor).toHaveBeenCalledWith('SearchTimeline');
|
|
@@ -203,6 +205,8 @@ describe('twitter search command', () => {
|
|
|
203
205
|
likes: 3,
|
|
204
206
|
views: '5',
|
|
205
207
|
url: 'https://x.com/i/status/99',
|
|
208
|
+
has_media: false,
|
|
209
|
+
media_urls: [],
|
|
206
210
|
},
|
|
207
211
|
]);
|
|
208
212
|
// 6 evaluate calls: 2x pushState + 2x pathname check + 1x fallback + 1x pathname check
|
package/clis/twitter/shared.js
CHANGED
|
@@ -35,6 +35,34 @@ export async function resolveTwitterQueryId(page, operationName, fallbackId) {
|
|
|
35
35
|
}`);
|
|
36
36
|
return sanitizeQueryId(resolved, fallbackId);
|
|
37
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Extract media flags and URLs from a tweet's `legacy` object.
|
|
40
|
+
*
|
|
41
|
+
* Prefers `extended_entities.media` (superset with full video_info) and falls
|
|
42
|
+
* back to `entities.media` when the extended form is missing. For videos and
|
|
43
|
+
* animated GIFs, returns the mp4 variant URL; for photos, returns
|
|
44
|
+
* `media_url_https`.
|
|
45
|
+
*/
|
|
46
|
+
export function extractMedia(legacy) {
|
|
47
|
+
const media = legacy?.extended_entities?.media || legacy?.entities?.media;
|
|
48
|
+
if (!Array.isArray(media) || media.length === 0) {
|
|
49
|
+
return { has_media: false, media_urls: [] };
|
|
50
|
+
}
|
|
51
|
+
const urls = [];
|
|
52
|
+
for (const m of media) {
|
|
53
|
+
if (!m) continue;
|
|
54
|
+
if (m.type === 'video' || m.type === 'animated_gif') {
|
|
55
|
+
const variants = m.video_info?.variants || [];
|
|
56
|
+
const mp4 = variants.find((v) => v?.content_type === 'video/mp4');
|
|
57
|
+
const url = mp4?.url || m.media_url_https;
|
|
58
|
+
if (url) urls.push(url);
|
|
59
|
+
} else {
|
|
60
|
+
if (m.media_url_https) urls.push(m.media_url_https);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return { has_media: urls.length > 0, media_urls: urls };
|
|
64
|
+
}
|
|
38
65
|
export const __test__ = {
|
|
39
66
|
sanitizeQueryId,
|
|
67
|
+
extractMedia,
|
|
40
68
|
};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { __test__ } from './shared.js';
|
|
3
|
+
|
|
4
|
+
const { extractMedia } = __test__;
|
|
5
|
+
|
|
6
|
+
describe('twitter extractMedia', () => {
|
|
7
|
+
it('returns false + empty list when legacy has no media', () => {
|
|
8
|
+
expect(extractMedia({})).toEqual({ has_media: false, media_urls: [] });
|
|
9
|
+
expect(extractMedia(undefined)).toEqual({ has_media: false, media_urls: [] });
|
|
10
|
+
expect(extractMedia({ extended_entities: { media: [] } })).toEqual({
|
|
11
|
+
has_media: false,
|
|
12
|
+
media_urls: [],
|
|
13
|
+
});
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('extracts photo urls from extended_entities', () => {
|
|
17
|
+
const result = extractMedia({
|
|
18
|
+
extended_entities: {
|
|
19
|
+
media: [
|
|
20
|
+
{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/a.jpg' },
|
|
21
|
+
{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/b.jpg' },
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
expect(result.has_media).toBe(true);
|
|
26
|
+
expect(result.media_urls).toEqual([
|
|
27
|
+
'https://pbs.twimg.com/media/a.jpg',
|
|
28
|
+
'https://pbs.twimg.com/media/b.jpg',
|
|
29
|
+
]);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('prefers mp4 variant for video and animated_gif', () => {
|
|
33
|
+
const result = extractMedia({
|
|
34
|
+
extended_entities: {
|
|
35
|
+
media: [
|
|
36
|
+
{
|
|
37
|
+
type: 'video',
|
|
38
|
+
media_url_https: 'https://pbs.twimg.com/media/thumb.jpg',
|
|
39
|
+
video_info: {
|
|
40
|
+
variants: [
|
|
41
|
+
{ content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/x.m3u8' },
|
|
42
|
+
{ content_type: 'video/mp4', url: 'https://video.twimg.com/x.mp4' },
|
|
43
|
+
],
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
type: 'animated_gif',
|
|
48
|
+
media_url_https: 'https://pbs.twimg.com/tweet_video_thumb/g.jpg',
|
|
49
|
+
video_info: {
|
|
50
|
+
variants: [
|
|
51
|
+
{ content_type: 'video/mp4', url: 'https://video.twimg.com/g.mp4' },
|
|
52
|
+
],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
expect(result.has_media).toBe(true);
|
|
59
|
+
expect(result.media_urls).toEqual([
|
|
60
|
+
'https://video.twimg.com/x.mp4',
|
|
61
|
+
'https://video.twimg.com/g.mp4',
|
|
62
|
+
]);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('falls back to media_url_https when no mp4 variant is available', () => {
|
|
66
|
+
const result = extractMedia({
|
|
67
|
+
extended_entities: {
|
|
68
|
+
media: [
|
|
69
|
+
{
|
|
70
|
+
type: 'video',
|
|
71
|
+
media_url_https: 'https://pbs.twimg.com/media/thumb.jpg',
|
|
72
|
+
video_info: { variants: [] },
|
|
73
|
+
},
|
|
74
|
+
],
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
expect(result).toEqual({
|
|
78
|
+
has_media: true,
|
|
79
|
+
media_urls: ['https://pbs.twimg.com/media/thumb.jpg'],
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('falls back to entities.media when extended_entities is missing', () => {
|
|
84
|
+
const result = extractMedia({
|
|
85
|
+
entities: {
|
|
86
|
+
media: [
|
|
87
|
+
{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/c.jpg' },
|
|
88
|
+
],
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
expect(result).toEqual({
|
|
92
|
+
has_media: true,
|
|
93
|
+
media_urls: ['https://pbs.twimg.com/media/c.jpg'],
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
});
|
package/clis/twitter/thread.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
2
|
import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { extractMedia } from './shared.js';
|
|
3
4
|
// ── Twitter GraphQL constants ──────────────────────────────────────────
|
|
4
5
|
const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
|
|
5
6
|
const TWEET_DETAIL_QUERY_ID = 'nBS-WpgA6ZG0CyNHD517JQ';
|
|
@@ -54,6 +55,7 @@ function extractTweet(r, seen) {
|
|
|
54
55
|
in_reply_to: l.in_reply_to_status_id_str || undefined,
|
|
55
56
|
created_at: l.created_at,
|
|
56
57
|
url: `https://x.com/${screenName}/status/${tw.rest_id}`,
|
|
58
|
+
...extractMedia(l),
|
|
57
59
|
};
|
|
58
60
|
}
|
|
59
61
|
function parseTweetDetail(data, seen) {
|
|
@@ -101,7 +103,7 @@ cli({
|
|
|
101
103
|
{ name: 'tweet-id', positional: true, type: 'string', required: true },
|
|
102
104
|
{ name: 'limit', type: 'int', default: 50 },
|
|
103
105
|
],
|
|
104
|
-
columns: ['id', 'author', 'text', 'likes', 'retweets', 'url'],
|
|
106
|
+
columns: ['id', 'author', 'text', 'likes', 'retweets', 'url', 'has_media', 'media_urls'],
|
|
105
107
|
func: async (page, kwargs) => {
|
|
106
108
|
let tweetId = kwargs['tweet-id'];
|
|
107
109
|
const urlMatch = tweetId.match(/\/status\/(\d+)/);
|
package/clis/twitter/timeline.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
2
2
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
3
|
-
import { resolveTwitterQueryId } from './shared.js';
|
|
3
|
+
import { resolveTwitterQueryId, extractMedia } from './shared.js';
|
|
4
4
|
// ── Twitter GraphQL constants ──────────────────────────────────────────
|
|
5
5
|
const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
|
|
6
6
|
const HOME_TIMELINE_QUERY_ID = 'c-CzHF1LboFilMpsx4ZCrQ';
|
|
@@ -85,6 +85,7 @@ function extractTweet(result, seen) {
|
|
|
85
85
|
views,
|
|
86
86
|
created_at: l.created_at || '',
|
|
87
87
|
url: `https://x.com/${screenName}/status/${tw.rest_id}`,
|
|
88
|
+
...extractMedia(l),
|
|
88
89
|
};
|
|
89
90
|
}
|
|
90
91
|
function parseHomeTimeline(data, seen) {
|
|
@@ -148,7 +149,7 @@ cli({
|
|
|
148
149
|
},
|
|
149
150
|
{ name: 'limit', type: 'int', default: 20 },
|
|
150
151
|
],
|
|
151
|
-
columns: ['id', 'author', 'text', 'likes', 'retweets', 'replies', 'views', 'created_at', 'url'],
|
|
152
|
+
columns: ['id', 'author', 'text', 'likes', 'retweets', 'replies', 'views', 'created_at', 'url', 'has_media', 'media_urls'],
|
|
152
153
|
func: async (page, kwargs) => {
|
|
153
154
|
const limit = kwargs.limit || 20;
|
|
154
155
|
const timelineType = kwargs.type === 'following' ? 'following' : 'for-you';
|
package/clis/twitter/tweets.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
2
|
import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
-
import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
|
|
3
|
+
import { resolveTwitterQueryId, sanitizeQueryId, extractMedia } from './shared.js';
|
|
4
4
|
|
|
5
5
|
const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
|
|
6
6
|
const USER_TWEETS_QUERY_ID = '6fWQaBPK51aGyC_VC7t9GQ';
|
|
@@ -105,6 +105,7 @@ function extractTweet(result, seen) {
|
|
|
105
105
|
is_retweet: isRetweet,
|
|
106
106
|
created_at: legacy.created_at || '',
|
|
107
107
|
url: `https://x.com/${screenName}/status/${tw.rest_id}`,
|
|
108
|
+
...extractMedia(legacy),
|
|
108
109
|
};
|
|
109
110
|
}
|
|
110
111
|
|
|
@@ -151,7 +152,7 @@ cli({
|
|
|
151
152
|
{ name: 'username', type: 'string', positional: true, required: true, help: 'Twitter screen name (with or without @)' },
|
|
152
153
|
{ name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
|
|
153
154
|
],
|
|
154
|
-
columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url'],
|
|
155
|
+
columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls'],
|
|
155
156
|
func: async (page, kwargs) => {
|
|
156
157
|
const limit = Math.max(1, Math.min(200, kwargs.limit || 20));
|
|
157
158
|
const username = String(kwargs.username || '').replace(/^@/, '').trim();
|
|
@@ -5,7 +5,7 @@ import { __test__ } from './tweets.js';
|
|
|
5
5
|
describe('twitter tweets helpers', () => {
|
|
6
6
|
it('registers is_retweet in the default columns', () => {
|
|
7
7
|
const cmd = getRegistry().get('twitter/tweets');
|
|
8
|
-
expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url']);
|
|
8
|
+
expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls']);
|
|
9
9
|
});
|
|
10
10
|
|
|
11
11
|
it('falls back when queryId contains unsafe characters', () => {
|
package/clis/web/read.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
*/
|
|
16
16
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
17
17
|
import { downloadArticle } from '@jackwener/opencli/download/article-download';
|
|
18
|
-
cli({
|
|
18
|
+
const command = cli({
|
|
19
19
|
site: 'web',
|
|
20
20
|
name: 'read',
|
|
21
21
|
description: 'Fetch any web page and export as Markdown',
|
|
@@ -26,6 +26,7 @@ cli({
|
|
|
26
26
|
{ name: 'output', default: './web-articles', help: 'Output directory' },
|
|
27
27
|
{ name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' },
|
|
28
28
|
{ name: 'wait', type: 'int', default: 3, help: 'Seconds to wait after page load' },
|
|
29
|
+
{ name: 'stdout', type: 'boolean', default: false, help: 'Print markdown to stdout instead of saving to a file' },
|
|
29
30
|
],
|
|
30
31
|
columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'],
|
|
31
32
|
func: async (page, kwargs) => {
|
|
@@ -162,14 +163,26 @@ cli({
|
|
|
162
163
|
if (el.children && el.children.length > 2) dedup(el);
|
|
163
164
|
});
|
|
164
165
|
|
|
166
|
+
// --- Lazy-load image src rewrite ---
|
|
167
|
+
// Many sites render <img src="placeholder.gif" data-src="real.jpg">.
|
|
168
|
+
// Promote the real URL onto src so both the markdown body and the
|
|
169
|
+
// image download list reference the same URL.
|
|
170
|
+
clone.querySelectorAll('img').forEach(img => {
|
|
171
|
+
const srcset = img.getAttribute('data-srcset') || '';
|
|
172
|
+
const srcsetFirst = srcset.split(',')[0]?.trim().split(' ')[0] || '';
|
|
173
|
+
const real = img.getAttribute('data-src')
|
|
174
|
+
|| img.getAttribute('data-original')
|
|
175
|
+
|| img.getAttribute('data-lazy-src')
|
|
176
|
+
|| srcsetFirst;
|
|
177
|
+
if (real) img.setAttribute('src', real);
|
|
178
|
+
});
|
|
179
|
+
|
|
165
180
|
result.contentHtml = clone.innerHTML;
|
|
166
181
|
|
|
167
182
|
// --- Image extraction ---
|
|
168
183
|
const seen = new Set();
|
|
169
184
|
clone.querySelectorAll('img').forEach(img => {
|
|
170
|
-
const src = img.getAttribute('
|
|
171
|
-
|| img.getAttribute('data-original')
|
|
172
|
-
|| img.getAttribute('src');
|
|
185
|
+
const src = img.getAttribute('src') || '';
|
|
173
186
|
if (src && !src.startsWith('data:') && !seen.has(src)) {
|
|
174
187
|
seen.add(src);
|
|
175
188
|
result.imageUrls.push(src);
|
|
@@ -186,7 +199,7 @@ cli({
|
|
|
186
199
|
referer = parsed.origin + '/';
|
|
187
200
|
}
|
|
188
201
|
catch { /* ignore */ }
|
|
189
|
-
|
|
202
|
+
const result = await downloadArticle({
|
|
190
203
|
title: data?.title || 'untitled',
|
|
191
204
|
author: data?.author,
|
|
192
205
|
publishTime: data?.publishTime,
|
|
@@ -197,6 +210,13 @@ cli({
|
|
|
197
210
|
output: kwargs.output,
|
|
198
211
|
downloadImages: kwargs['download-images'],
|
|
199
212
|
imageHeaders: referer ? { Referer: referer } : undefined,
|
|
213
|
+
stdout: kwargs.stdout,
|
|
200
214
|
});
|
|
215
|
+
// `--stdout` is a content-streaming mode. The markdown body already went
|
|
216
|
+
// to process.stdout inside downloadArticle(), so returning rows here
|
|
217
|
+
// would make Commander append table/JSON output to the same stdout
|
|
218
|
+
// stream and break piping.
|
|
219
|
+
return kwargs.stdout ? null : result;
|
|
201
220
|
},
|
|
202
221
|
});
|
|
222
|
+
export const __test__ = { command };
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
|
|
3
|
+
const { mockDownloadArticle } = vi.hoisted(() => ({
|
|
4
|
+
mockDownloadArticle: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
vi.mock('@jackwener/opencli/download/article-download', () => ({
|
|
8
|
+
downloadArticle: mockDownloadArticle,
|
|
9
|
+
}));
|
|
10
|
+
|
|
11
|
+
const { __test__ } = await import('./read.js');
|
|
12
|
+
|
|
13
|
+
describe('web/read stdout behavior', () => {
|
|
14
|
+
const read = __test__.command;
|
|
15
|
+
const page = {
|
|
16
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
17
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
18
|
+
evaluate: vi.fn().mockResolvedValue({
|
|
19
|
+
title: 'Example Article',
|
|
20
|
+
author: 'Author',
|
|
21
|
+
publishTime: '2026-04-22',
|
|
22
|
+
contentHtml: '<p>hello</p>',
|
|
23
|
+
imageUrls: ['https://example.com/a.jpg'],
|
|
24
|
+
}),
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
beforeEach(() => {
|
|
28
|
+
mockDownloadArticle.mockReset();
|
|
29
|
+
mockDownloadArticle.mockResolvedValue([{
|
|
30
|
+
title: 'Example Article',
|
|
31
|
+
author: 'Author',
|
|
32
|
+
publish_time: '2026-04-22',
|
|
33
|
+
status: 'success',
|
|
34
|
+
size: '1 KB',
|
|
35
|
+
saved: '-',
|
|
36
|
+
}]);
|
|
37
|
+
page.goto.mockClear();
|
|
38
|
+
page.wait.mockClear();
|
|
39
|
+
page.evaluate.mockClear();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('returns null in --stdout mode so the CLI does not append result rows to stdout', async () => {
|
|
43
|
+
const result = await read.func(page, {
|
|
44
|
+
url: 'https://example.com/article',
|
|
45
|
+
output: '/tmp/out',
|
|
46
|
+
'download-images': false,
|
|
47
|
+
stdout: true,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
expect(result).toBeNull();
|
|
51
|
+
expect(mockDownloadArticle).toHaveBeenCalledWith(
|
|
52
|
+
expect.objectContaining({
|
|
53
|
+
title: 'Example Article',
|
|
54
|
+
sourceUrl: 'https://example.com/article',
|
|
55
|
+
}),
|
|
56
|
+
expect.objectContaining({
|
|
57
|
+
output: '/tmp/out',
|
|
58
|
+
stdout: true,
|
|
59
|
+
}),
|
|
60
|
+
);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('still returns the saved-row payload when writing to disk', async () => {
|
|
64
|
+
const rows = [{ title: 'Example Article', saved: '/tmp/out/Example Article/example.md' }];
|
|
65
|
+
mockDownloadArticle.mockResolvedValue(rows);
|
|
66
|
+
|
|
67
|
+
const result = await read.func(page, {
|
|
68
|
+
url: 'https://example.com/article',
|
|
69
|
+
output: '/tmp/out',
|
|
70
|
+
'download-images': false,
|
|
71
|
+
stdout: false,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(result).toBe(rows);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { WEREAD_UA, WEREAD_WEB_ORIGIN, WEREAD_DOMAIN } from './utils.js';
|
|
4
|
+
|
|
5
|
+
const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
|
|
6
|
+
|
|
7
|
+
function buildCookieHeader(cookies) {
|
|
8
|
+
return cookies.map((c) => `${c.name}=${c.value}`).join('; ');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async function postWebApiWithCookies(page, path, body) {
|
|
12
|
+
const url = `${WEB_API}${path}`;
|
|
13
|
+
const [apiCookies, domainCookies] = await Promise.all([
|
|
14
|
+
page.getCookies({ url }),
|
|
15
|
+
page.getCookies({ domain: WEREAD_DOMAIN }),
|
|
16
|
+
]);
|
|
17
|
+
const merged = new Map();
|
|
18
|
+
for (const c of domainCookies) merged.set(c.name, c);
|
|
19
|
+
for (const c of apiCookies) merged.set(c.name, c);
|
|
20
|
+
const cookieHeader = buildCookieHeader(Array.from(merged.values()));
|
|
21
|
+
|
|
22
|
+
const resp = await fetch(url, {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers: {
|
|
25
|
+
'User-Agent': WEREAD_UA,
|
|
26
|
+
'Content-Type': 'application/json',
|
|
27
|
+
'Origin': WEREAD_WEB_ORIGIN,
|
|
28
|
+
'Referer': `${WEREAD_WEB_ORIGIN}/`,
|
|
29
|
+
...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
|
|
30
|
+
},
|
|
31
|
+
body: JSON.stringify(body),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
if (resp.status === 401) {
|
|
35
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let data;
|
|
39
|
+
try {
|
|
40
|
+
data = await resp.json();
|
|
41
|
+
} catch {
|
|
42
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (data?.errcode === -2010 || data?.errcode === -2012) {
|
|
46
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
47
|
+
}
|
|
48
|
+
if (!resp.ok) {
|
|
49
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
50
|
+
}
|
|
51
|
+
return data;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function postWebApi(path, body) {
|
|
55
|
+
const url = `${WEB_API}${path}`;
|
|
56
|
+
const resp = await fetch(url, {
|
|
57
|
+
method: 'POST',
|
|
58
|
+
headers: {
|
|
59
|
+
'User-Agent': WEREAD_UA,
|
|
60
|
+
'Content-Type': 'application/json',
|
|
61
|
+
},
|
|
62
|
+
body: JSON.stringify(body),
|
|
63
|
+
});
|
|
64
|
+
if (!resp.ok) {
|
|
65
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
return await resp.json();
|
|
69
|
+
} catch {
|
|
70
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
cli({
|
|
75
|
+
site: 'weread',
|
|
76
|
+
name: 'ai-outline',
|
|
77
|
+
description: 'Get AI-generated outline for a book',
|
|
78
|
+
domain: 'weread.qq.com',
|
|
79
|
+
strategy: Strategy.COOKIE,
|
|
80
|
+
defaultFormat: 'plain',
|
|
81
|
+
args: [
|
|
82
|
+
{ name: 'book-id', positional: true, required: true, help: 'Book ID (from shelf or search results)' },
|
|
83
|
+
{ name: 'limit', type: 'int', default: 200, help: 'Max outline items to return' },
|
|
84
|
+
{ name: 'depth', type: 'int', default: 4, help: 'Max outline depth (2=topics, 3=key points, 4=details)' },
|
|
85
|
+
{ name: 'raw', type: 'boolean', default: false, help: 'Output structured rows (chapter/idx/level/text) for programmatic use' },
|
|
86
|
+
],
|
|
87
|
+
columns: undefined,
|
|
88
|
+
func: async (page, args) => {
|
|
89
|
+
const bookId = String(args['book-id'] || '').trim();
|
|
90
|
+
const rawMode = Boolean(args.raw);
|
|
91
|
+
|
|
92
|
+
const chapterData = await postWebApiWithCookies(page, '/book/chapterInfos', {
|
|
93
|
+
bookIds: [bookId],
|
|
94
|
+
sinces: [0],
|
|
95
|
+
});
|
|
96
|
+
const chapters = chapterData?.data?.[0]?.updated ?? [];
|
|
97
|
+
if (chapters.length === 0) {
|
|
98
|
+
throw new CliError('NOT_FOUND', 'No chapters found for this book', 'Check that the book ID is correct');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const chapterUids = chapters.map((c) => c.chapterUid);
|
|
102
|
+
const chapterNameMap = new Map();
|
|
103
|
+
for (const c of chapters) {
|
|
104
|
+
chapterNameMap.set(c.chapterUid, c.title ?? '');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const outlineData = await postWebApi('/book/outline', {
|
|
108
|
+
bookId,
|
|
109
|
+
chapterUids,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const itemsArray = outlineData?.itemsArray ?? [];
|
|
113
|
+
const maxDepth = Number(args.depth);
|
|
114
|
+
const rawRows = [];
|
|
115
|
+
|
|
116
|
+
for (const entry of itemsArray) {
|
|
117
|
+
const items = entry.items;
|
|
118
|
+
if (!Array.isArray(items) || items.length === 0) continue;
|
|
119
|
+
|
|
120
|
+
const chapterName = chapterNameMap.get(entry.chapterUid) ?? `Chapter ${entry.chapterUid}`;
|
|
121
|
+
let lastL3Idx = '';
|
|
122
|
+
let l4Counter = 0;
|
|
123
|
+
|
|
124
|
+
for (const item of items) {
|
|
125
|
+
const level = item.level ?? 1;
|
|
126
|
+
if (level <= 1) continue;
|
|
127
|
+
if (level > maxDepth) continue;
|
|
128
|
+
|
|
129
|
+
let idx = item.uiIdx ?? '';
|
|
130
|
+
if (level === 3 && idx) {
|
|
131
|
+
lastL3Idx = idx;
|
|
132
|
+
l4Counter = 0;
|
|
133
|
+
}
|
|
134
|
+
if (level === 4 && !idx && lastL3Idx) {
|
|
135
|
+
l4Counter++;
|
|
136
|
+
idx = `${lastL3Idx}.${l4Counter}`;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
rawRows.push({ chapter: chapterName, idx, level, text: item.text ?? '' });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (rawRows.length === 0) {
|
|
144
|
+
throw new CliError('NOT_FOUND', 'No AI outline available for this book', 'AI outlines may not be generated for all books');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (rawMode) {
|
|
148
|
+
return rawRows.slice(0, Number(args.limit));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const grouped = new Map();
|
|
152
|
+
for (const row of rawRows) {
|
|
153
|
+
if (!grouped.has(row.chapter)) grouped.set(row.chapter, []);
|
|
154
|
+
grouped.get(row.chapter).push(row);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const results = [];
|
|
158
|
+
for (const [chapter, rows] of grouped) {
|
|
159
|
+
const lines = [`📖 ${chapter}`];
|
|
160
|
+
for (const row of rows) {
|
|
161
|
+
const indent = ' '.repeat(row.level - 2);
|
|
162
|
+
const prefix = row.level === 2 ? `${row.idx}. ` : `${row.idx} `;
|
|
163
|
+
lines.push(`${indent}${prefix}${row.text}`);
|
|
164
|
+
}
|
|
165
|
+
results.push({ outline: lines.join('\n') });
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return results.slice(0, Number(args.limit));
|
|
169
|
+
},
|
|
170
|
+
});
|