@jackwener/opencli 1.7.18 → 1.7.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +18 -17
  2. package/README.zh-CN.md +16 -18
  3. package/cli-manifest.json +311 -186
  4. package/clis/ctrip/ctrip.test.js +486 -1
  5. package/clis/ctrip/flight.js +136 -0
  6. package/clis/ctrip/hotel-search.js +132 -0
  7. package/clis/ctrip/utils.js +298 -0
  8. package/clis/google/search.js +16 -6
  9. package/clis/google-scholar/search.js +20 -5
  10. package/clis/google-scholar/search.test.js +35 -2
  11. package/clis/reddit/home.js +117 -0
  12. package/clis/reddit/home.test.js +127 -0
  13. package/clis/reddit/read.js +400 -54
  14. package/clis/reddit/read.test.js +315 -12
  15. package/clis/reddit/subreddit-info.js +117 -0
  16. package/clis/reddit/subreddit-info.test.js +163 -0
  17. package/clis/reddit/whoami.js +84 -0
  18. package/clis/reddit/whoami.test.js +105 -0
  19. package/clis/rednote/search.js +6 -2
  20. package/clis/twitter/bookmark-folder.js +8 -4
  21. package/clis/twitter/bookmark-folder.test.js +59 -1
  22. package/clis/twitter/bookmarks.js +12 -4
  23. package/clis/twitter/bookmarks.test.js +205 -0
  24. package/clis/twitter/followers.js +20 -5
  25. package/clis/twitter/followers.test.js +44 -0
  26. package/clis/twitter/following.js +36 -20
  27. package/clis/twitter/following.test.js +60 -8
  28. package/clis/twitter/likes.js +28 -13
  29. package/clis/twitter/likes.test.js +111 -1
  30. package/clis/twitter/list-add.js +128 -204
  31. package/clis/twitter/list-add.test.js +97 -1
  32. package/clis/twitter/list-tweets.js +13 -4
  33. package/clis/twitter/list-tweets.test.js +48 -0
  34. package/clis/twitter/lists.js +5 -2
  35. package/clis/twitter/post.js +23 -4
  36. package/clis/twitter/post.test.js +30 -0
  37. package/clis/twitter/profile.js +16 -8
  38. package/clis/twitter/profile.test.js +39 -0
  39. package/clis/twitter/reply.js +133 -10
  40. package/clis/twitter/reply.test.js +55 -0
  41. package/clis/twitter/search.js +188 -170
  42. package/clis/twitter/search.test.js +96 -258
  43. package/clis/twitter/shared.js +167 -16
  44. package/clis/twitter/shared.test.js +102 -1
  45. package/clis/twitter/timeline.js +3 -1
  46. package/clis/twitter/tweets.js +147 -51
  47. package/clis/twitter/tweets.test.js +238 -1
  48. package/clis/xiaohongshu/comments.js +23 -2
  49. package/clis/xiaohongshu/comments.test.js +63 -1
  50. package/clis/xiaohongshu/search.js +168 -13
  51. package/clis/xiaohongshu/search.test.js +82 -8
  52. package/clis/xueqiu/earnings-date.js +2 -2
  53. package/clis/xueqiu/kline.js +2 -2
  54. package/clis/xueqiu/utils.js +19 -0
  55. package/clis/xueqiu/utils.test.js +26 -0
  56. package/clis/zhihu/answer-detail.js +233 -0
  57. package/clis/zhihu/answer-detail.test.js +330 -0
  58. package/clis/zhihu/question.js +44 -10
  59. package/clis/zhihu/question.test.js +78 -1
  60. package/clis/zhihu/recommend.js +103 -0
  61. package/clis/zhihu/recommend.test.js +143 -0
  62. package/dist/src/browser/base-page.d.ts +3 -2
  63. package/dist/src/browser/base-page.test.js +2 -2
  64. package/dist/src/browser/cdp.js +3 -3
  65. package/dist/src/browser/daemon-client.d.ts +1 -0
  66. package/dist/src/browser/daemon-client.js +3 -0
  67. package/dist/src/browser/daemon-client.test.js +20 -0
  68. package/dist/src/browser/page.d.ts +3 -2
  69. package/dist/src/browser/page.js +4 -4
  70. package/dist/src/browser/page.test.js +31 -0
  71. package/dist/src/browser/utils.d.ts +10 -0
  72. package/dist/src/browser/utils.js +37 -0
  73. package/dist/src/browser/utils.test.d.ts +1 -0
  74. package/dist/src/browser/utils.test.js +29 -0
  75. package/dist/src/cli-argv-preprocess.d.ts +37 -0
  76. package/dist/src/cli-argv-preprocess.js +131 -0
  77. package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
  78. package/dist/src/cli-argv-preprocess.test.js +130 -0
  79. package/dist/src/cli.js +131 -89
  80. package/dist/src/cli.test.js +34 -28
  81. package/dist/src/commands/daemon.js +6 -7
  82. package/dist/src/daemon-utils.d.ts +18 -0
  83. package/dist/src/daemon-utils.js +37 -0
  84. package/dist/src/daemon.d.ts +1 -1
  85. package/dist/src/daemon.js +44 -13
  86. package/dist/src/daemon.test.js +42 -1
  87. package/dist/src/doctor.js +15 -16
  88. package/dist/src/download/progress.js +15 -11
  89. package/dist/src/download/progress.test.d.ts +1 -0
  90. package/dist/src/download/progress.test.js +25 -0
  91. package/dist/src/electron-apps.js +0 -1
  92. package/dist/src/electron-apps.test.js +1 -0
  93. package/dist/src/execution.js +1 -3
  94. package/dist/src/execution.test.js +4 -16
  95. package/dist/src/external-clis.yaml +12 -3
  96. package/dist/src/external.d.ts +4 -0
  97. package/dist/src/external.js +3 -0
  98. package/dist/src/external.test.js +24 -1
  99. package/dist/src/help.d.ts +16 -1
  100. package/dist/src/help.js +50 -8
  101. package/dist/src/help.test.js +5 -1
  102. package/dist/src/logger.js +8 -9
  103. package/dist/src/main.js +16 -0
  104. package/dist/src/output.js +4 -5
  105. package/dist/src/runtime-detect.d.ts +1 -1
  106. package/dist/src/runtime-detect.js +1 -1
  107. package/dist/src/runtime-detect.test.js +3 -2
  108. package/dist/src/tui.d.ts +0 -1
  109. package/dist/src/tui.js +9 -22
  110. package/dist/src/types.d.ts +3 -1
  111. package/dist/src/update-check.js +4 -5
  112. package/package.json +5 -4
  113. package/clis/notion/export.js +0 -32
  114. package/clis/notion/favorites.js +0 -85
  115. package/clis/notion/new.js +0 -35
  116. package/clis/notion/read.js +0 -31
  117. package/clis/notion/search.js +0 -47
  118. package/clis/notion/sidebar.js +0 -42
  119. package/clis/notion/status.js +0 -17
  120. package/clis/notion/write.js +0 -41
@@ -0,0 +1,84 @@
1
+ import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
2
+ import { cli, Strategy } from '@jackwener/opencli/registry';
3
+
4
+ cli({
5
+ site: 'reddit',
6
+ name: 'whoami',
7
+ access: 'read',
8
+ description: 'Show the currently logged-in Reddit user',
9
+ domain: 'reddit.com',
10
+ strategy: Strategy.COOKIE,
11
+ browser: true,
12
+ siteSession: 'persistent',
13
+ args: [],
14
+ columns: ['field', 'value'],
15
+ func: async (page) => {
16
+ await page.goto('https://www.reddit.com');
17
+ // Probe identity via /api/me.json. Reddit returns 200 with an empty
18
+ // body for stale anonymous sessions, so 401/403 alone is not a
19
+ // sufficient logged-out signal — we also verify `data.name` exists
20
+ // (two-pronged auth detection from PR #1428).
21
+ //
22
+ // Intermediate object keys deliberately avoid `field` / `value` to
23
+ // sidestep the silent-column-drop audit (columns are ['field',
24
+ // 'value']) — see PR #1329 sediment "中间解析对象 key 不能跟 columns
25
+ // 任一项重叠".
26
+ const result = await page.evaluate(`(async () => {
27
+ try {
28
+ const res = await fetch('/api/me.json?raw_json=1', { credentials: 'include' });
29
+ if (res.status === 401 || res.status === 403) {
30
+ return { kind: 'auth', detail: 'Reddit /api/me.json returned HTTP ' + res.status };
31
+ }
32
+ if (!res.ok) {
33
+ return { kind: 'http', httpStatus: res.status, where: '/api/me.json' };
34
+ }
35
+ const d = await res.json();
36
+ const me = d?.data;
37
+ if (!me?.name) {
38
+ return { kind: 'auth', detail: 'Not logged in to reddit.com (no identity in /api/me.json)' };
39
+ }
40
+ return { kind: 'ok', identity: me };
41
+ } catch (e) {
42
+ return { kind: 'exception', detail: String(e && e.message || e) };
43
+ }
44
+ })()`);
45
+
46
+ if (result?.kind === 'auth') {
47
+ throw new AuthRequiredError('reddit.com', result.detail);
48
+ }
49
+ if (result?.kind === 'http') {
50
+ throw new CommandExecutionError(`HTTP ${result.httpStatus} from ${result.where}`);
51
+ }
52
+ if (result?.kind === 'exception') {
53
+ throw new CommandExecutionError(`whoami failed: ${result.detail}`);
54
+ }
55
+ if (result?.kind !== 'ok') {
56
+ throw new CommandExecutionError(`Unexpected result from reddit whoami: ${JSON.stringify(result)}`);
57
+ }
58
+
59
+ const u = result.identity;
60
+ const created = u.created_utc
61
+ ? new Date(u.created_utc * 1000).toISOString().split('T')[0]
62
+ : null;
63
+ const linkKarma = typeof u.link_karma === 'number' ? u.link_karma : null;
64
+ const commentKarma = typeof u.comment_karma === 'number' ? u.comment_karma : null;
65
+ const totalKarma = typeof u.total_karma === 'number'
66
+ ? u.total_karma
67
+ : (linkKarma != null && commentKarma != null ? linkKarma + commentKarma : null);
68
+ const inboxCount = typeof u.inbox_count === 'number' ? u.inbox_count : null;
69
+
70
+ return [
71
+ { field: 'Username', value: 'u/' + u.name },
72
+ { field: 'ID', value: u.id ? 't2_' + u.id : null },
73
+ { field: 'Post Karma', value: linkKarma != null ? String(linkKarma) : null },
74
+ { field: 'Comment Karma', value: commentKarma != null ? String(commentKarma) : null },
75
+ { field: 'Total Karma', value: totalKarma != null ? String(totalKarma) : null },
76
+ { field: 'Account Created', value: created },
77
+ { field: 'Gold', value: u.is_gold ? 'Yes' : 'No' },
78
+ { field: 'Mod', value: u.is_mod ? 'Yes' : 'No' },
79
+ { field: 'Verified Email', value: u.has_verified_email ? 'Yes' : 'No' },
80
+ { field: 'Has Mail', value: u.has_mail ? 'Yes' : 'No' },
81
+ { field: 'Inbox Count', value: inboxCount != null ? String(inboxCount) : null },
82
+ ];
83
+ },
84
+ });
@@ -0,0 +1,105 @@
1
+ import { describe, expect, it, vi } from 'vitest';
2
+ import { getRegistry } from '@jackwener/opencli/registry';
3
+ import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
4
+ import './whoami.js';
5
+
6
+ function makePage(result) {
7
+ return {
8
+ goto: vi.fn().mockResolvedValue(undefined),
9
+ evaluate: vi.fn().mockResolvedValue(result),
10
+ };
11
+ }
12
+
13
+ describe('reddit whoami command', () => {
14
+ const command = getRegistry().get('reddit/whoami');
15
+
16
+ it('registers with the expected shape', () => {
17
+ expect(command).toBeDefined();
18
+ expect(command.access).toBe('read');
19
+ expect(command.browser).toBe(true);
20
+ expect(command.columns).toEqual(['field', 'value']);
21
+ expect(command.args).toEqual([]);
22
+ });
23
+
24
+ it('throws AuthRequiredError on 401/403 from /api/me.json', async () => {
25
+ const page = makePage({ kind: 'auth', detail: 'Reddit /api/me.json returned HTTP 401' });
26
+ await expect(command.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
27
+ expect(page.goto).toHaveBeenCalledWith('https://www.reddit.com');
28
+ });
29
+
30
+ it('throws AuthRequiredError on 200 with missing data.name (stale anon session)', async () => {
31
+ const page = makePage({ kind: 'auth', detail: 'Not logged in to reddit.com (no identity in /api/me.json)' });
32
+ await expect(command.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
33
+ });
34
+
35
+ it('throws CommandExecutionError on HTTP / exception failure modes', async () => {
36
+ await expect(command.func(makePage({ kind: 'http', httpStatus: 500, where: '/api/me.json' }), {}))
37
+ .rejects.toBeInstanceOf(CommandExecutionError);
38
+ await expect(command.func(makePage({ kind: 'exception', detail: 'bad json' }), {}))
39
+ .rejects.toBeInstanceOf(CommandExecutionError);
40
+ });
41
+
42
+ it('maps a full identity payload into the field/value rows', async () => {
43
+ const identity = {
44
+ name: 'alice',
45
+ id: 'abcdef',
46
+ link_karma: 1234,
47
+ comment_karma: 5678,
48
+ total_karma: 6912,
49
+ created_utc: 1577836800, // 2020-01-01
50
+ is_gold: true,
51
+ is_mod: false,
52
+ has_verified_email: true,
53
+ has_mail: false,
54
+ inbox_count: 0,
55
+ };
56
+ const page = makePage({ kind: 'ok', identity });
57
+ const rows = await command.func(page, {});
58
+
59
+ const byField = Object.fromEntries(rows.map((r) => [r.field, r.value]));
60
+ expect(byField.Username).toBe('u/alice');
61
+ expect(byField.ID).toBe('t2_abcdef');
62
+ expect(byField['Post Karma']).toBe('1234');
63
+ expect(byField['Comment Karma']).toBe('5678');
64
+ expect(byField['Total Karma']).toBe('6912');
65
+ expect(byField['Account Created']).toBe('2020-01-01');
66
+ expect(byField.Gold).toBe('Yes');
67
+ expect(byField.Mod).toBe('No');
68
+ expect(byField['Verified Email']).toBe('Yes');
69
+ expect(byField['Has Mail']).toBe('No');
70
+ expect(byField['Inbox Count']).toBe('0');
71
+
72
+ // Row shape must match the declared columns exactly so the
73
+ // silent-column-drop audit can't be triggered.
74
+ for (const row of rows) {
75
+ expect(Object.keys(row).sort()).toEqual(['field', 'value']);
76
+ }
77
+ });
78
+
79
+ it('falls back to null for missing numeric karma fields rather than 0 sentinels', async () => {
80
+ const identity = {
81
+ name: 'bob',
82
+ id: 'xyz',
83
+ created_utc: null,
84
+ is_gold: false,
85
+ is_mod: false,
86
+ has_verified_email: false,
87
+ has_mail: false,
88
+ };
89
+ const page = makePage({ kind: 'ok', identity });
90
+ const rows = await command.func(page, {});
91
+ const byField = Object.fromEntries(rows.map((r) => [r.field, r.value]));
92
+ expect(byField['Post Karma']).toBeNull();
93
+ expect(byField['Comment Karma']).toBeNull();
94
+ expect(byField['Total Karma']).toBeNull();
95
+ expect(byField['Account Created']).toBeNull();
96
+ expect(byField['Inbox Count']).toBeNull();
97
+ });
98
+
99
+ it('does not throw on `data.name` present even if optional booleans are missing', async () => {
100
+ const identity = { name: 'carol', id: 'i1' };
101
+ const page = makePage({ kind: 'ok', identity });
102
+ const rows = await command.func(page, {});
103
+ expect(rows[0]).toEqual({ field: 'Username', value: 'u/carol' });
104
+ });
105
+ });
@@ -7,7 +7,7 @@
7
7
  */
8
8
  import { cli, Strategy } from '@jackwener/opencli/registry';
9
9
  import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
10
- import { buildSearchExtractJs, noteIdToDate } from '../xiaohongshu/search.js';
10
+ import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate } from '../xiaohongshu/search.js';
11
11
 
12
12
  function parseLimit(raw) {
13
13
  const parsed = Number(raw);
@@ -82,7 +82,11 @@ cli({
82
82
  if (waitResult === 'login_wall') {
83
83
  throw new AuthRequiredError('www.rednote.com', 'Rednote search results are blocked behind a login wall');
84
84
  }
85
- await page.autoScroll({ times: 2 });
85
+ // Scroll until enough rows are rendered or the lazy-load plateaus.
86
+ // Same fix as xiaohongshu/search (#1471): the previous fixed
87
+ // `autoScroll({ times: 2 })` capped extraction at ~13 notes regardless
88
+ // of `--limit`.
89
+ await page.evaluate(buildScrollUntilJs(limit));
86
90
  const payload = await page.evaluate(buildSearchExtractJs('www.rednote.com'));
87
91
  const data = Array.isArray(payload) ? payload : [];
88
92
  return data
@@ -1,7 +1,7 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
3
3
  import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js';
4
- import { resolveTwitterQueryId } from './shared.js';
4
+ import { extractMedia, resolveTwitterQueryId } from './shared.js';
5
5
 
6
6
  // Companion to bookmark-folders.js: reads tweets inside a single folder.
7
7
  // X exposes folder contents through a separate timeline operation
@@ -11,6 +11,7 @@ import { resolveTwitterQueryId } from './shared.js';
11
11
  const OPERATION_NAME = 'BookmarkFolderTimeline';
12
12
  const FALLBACK_QUERY_ID = '13H7EUATwethsj_jZ6QQAQ';
13
13
  const FOLDER_ID_PATTERN = /^[A-Za-z0-9_-]+$/;
14
+ const MAX_PAGINATION_PAGES = 100;
14
15
 
15
16
  const FEATURES = {
16
17
  rweb_video_screen_enabled: false,
@@ -53,7 +54,7 @@ function buildFolderTimelineUrl(queryId, folderId, count, cursor) {
53
54
  + `&features=${encodeURIComponent(JSON.stringify(FEATURES))}`;
54
55
  }
55
56
 
56
- function extractFolderTweet(result, seen) {
57
+ export function extractFolderTweet(result, seen) {
57
58
  if (!result) return null;
58
59
  const tw = result.tweet || result;
59
60
  const legacy = tw.legacy || {};
@@ -71,6 +72,7 @@ function extractFolderTweet(result, seen) {
71
72
  bookmarks: legacy.bookmark_count || 0,
72
73
  created_at: legacy.created_at || '',
73
74
  url: screenName ? `https://x.com/${screenName}/status/${tw.rest_id}` : `https://x.com/i/status/${tw.rest_id}`,
75
+ ...extractMedia(legacy),
74
76
  };
75
77
  }
76
78
 
@@ -128,7 +130,7 @@ cli({
128
130
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
129
131
  { name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the folder by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' },
130
132
  ],
131
- columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url'],
133
+ columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url', 'has_media', 'media_urls'],
132
134
  func: async (page, kwargs) => {
133
135
  const folderId = String(kwargs['folder-id'] || '').trim();
134
136
  if (!folderId || !FOLDER_ID_PATTERN.test(folderId)) {
@@ -158,7 +160,8 @@ cli({
158
160
  const allTweets = [];
159
161
  const seen = new Set();
160
162
  let cursor = null;
161
- for (let i = 0; i < 5 && allTweets.length < limit; i++) {
163
+ // Runaway guard only; --limit and cursor exhaustion control normal pagination.
164
+ for (let i = 0; i < MAX_PAGINATION_PAGES && allTweets.length < limit; i++) {
162
165
  const fetchCount = Math.min(100, limit - allTweets.length + 10);
163
166
  const apiUrl = buildFolderTimelineUrl(queryId, folderId, fetchCount, cursor);
164
167
  const data = await page.evaluate(`async () => {
@@ -182,6 +185,7 @@ cli({
182
185
 
183
186
  export const __test__ = {
184
187
  parseBookmarkFolderTimeline,
188
+ extractFolderTweet,
185
189
  buildFolderTimelineUrl,
186
190
  FOLDER_ID_PATTERN,
187
191
  };
@@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest';
2
2
  import { getRegistry } from '@jackwener/opencli/registry';
3
3
  import { __test__ } from './bookmark-folder.js';
4
4
 
5
- const { parseBookmarkFolderTimeline, buildFolderTimelineUrl, FOLDER_ID_PATTERN } = __test__;
5
+ const { parseBookmarkFolderTimeline, extractFolderTweet, buildFolderTimelineUrl, FOLDER_ID_PATTERN } = __test__;
6
6
 
7
7
  describe('twitter bookmark-folder URL builder', () => {
8
8
  it('embeds the folder id and count in the variables payload', () => {
@@ -97,6 +97,8 @@ describe('twitter bookmark-folder timeline parser', () => {
97
97
  bookmarks: 3,
98
98
  created_at: 'Tue Mar 17 09:00:00 +0000 2026',
99
99
  url: 'https://x.com/alice/status/1',
100
+ has_media: false,
101
+ media_urls: [],
100
102
  },
101
103
  ]);
102
104
  expect(nextCursor).toBe('NEXT_CURSOR');
@@ -247,6 +249,62 @@ describe('twitter bookmark-folder timeline parser', () => {
247
249
  it('returns empty array + null cursor for unknown envelope', () => {
248
250
  expect(parseBookmarkFolderTimeline({}, new Set())).toEqual({ tweets: [], nextCursor: null });
249
251
  });
252
+
253
+ it('includes photo media URLs from extended_entities', () => {
254
+ const tweet = extractFolderTweet({
255
+ rest_id: '101',
256
+ legacy: {
257
+ full_text: 'pic folder tweet',
258
+ extended_entities: {
259
+ media: [
260
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/abc.jpg' },
261
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/def.jpg' },
262
+ ],
263
+ },
264
+ },
265
+ core: { user_results: { result: { legacy: { screen_name: 'eve' } } } },
266
+ }, new Set());
267
+ expect(tweet?.has_media).toBe(true);
268
+ expect(tweet?.media_urls).toEqual([
269
+ 'https://pbs.twimg.com/media/abc.jpg',
270
+ 'https://pbs.twimg.com/media/def.jpg',
271
+ ]);
272
+ });
273
+
274
+ it('extracts mp4 variant URL for video media', () => {
275
+ const tweet = extractFolderTweet({
276
+ rest_id: '102',
277
+ legacy: {
278
+ full_text: 'video folder tweet',
279
+ extended_entities: {
280
+ media: [{
281
+ type: 'video',
282
+ media_url_https: 'https://pbs.twimg.com/amplify_video_thumb/thumb.jpg',
283
+ video_info: {
284
+ variants: [
285
+ { content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/playlist.m3u8' },
286
+ { content_type: 'video/mp4', bitrate: 832000, url: 'https://video.twimg.com/low.mp4' },
287
+ { content_type: 'video/mp4', bitrate: 2176000, url: 'https://video.twimg.com/high.mp4' },
288
+ ],
289
+ },
290
+ }],
291
+ },
292
+ },
293
+ core: { user_results: { result: { legacy: { screen_name: 'frank' } } } },
294
+ }, new Set());
295
+ expect(tweet?.has_media).toBe(true);
296
+ expect(tweet?.media_urls?.[0]).toMatch(/\.mp4$/);
297
+ });
298
+
299
+ it('returns has_media false / media_urls empty when no media present', () => {
300
+ const tweet = extractFolderTweet({
301
+ rest_id: '103',
302
+ legacy: { full_text: 'text only', favorite_count: 0, retweet_count: 0, bookmark_count: 0 },
303
+ core: { user_results: { result: { legacy: { screen_name: 'gail' } } } },
304
+ }, new Set());
305
+ expect(tweet?.has_media).toBe(false);
306
+ expect(tweet?.media_urls).toEqual([]);
307
+ });
250
308
  });
251
309
 
252
310
  describe('twitter bookmark-folder id validation', () => {
@@ -1,7 +1,9 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
3
+ import { extractMedia } from './shared.js';
3
4
  import { TWITTER_BEARER_TOKEN, applyTopByEngagement } from './utils.js';
4
5
  const BOOKMARKS_QUERY_ID = 'Fy0QMy4q_aZCpkO0PnyLYw';
6
+ const MAX_PAGINATION_PAGES = 100;
5
7
  const FEATURES = {
6
8
  rweb_video_screen_enabled: false,
7
9
  profile_label_improvements_pcf_label_in_post_enabled: true,
@@ -41,7 +43,7 @@ function buildBookmarksUrl(count, cursor) {
41
43
  + `?variables=${encodeURIComponent(JSON.stringify(vars))}`
42
44
  + `&features=${encodeURIComponent(JSON.stringify(FEATURES))}`;
43
45
  }
44
- function extractBookmarkTweet(result, seen) {
46
+ export function extractBookmarkTweet(result, seen) {
45
47
  if (!result)
46
48
  return null;
47
49
  const tw = result.tweet || result;
@@ -63,9 +65,10 @@ function extractBookmarkTweet(result, seen) {
63
65
  bookmarks: legacy.bookmark_count || 0,
64
66
  created_at: legacy.created_at || '',
65
67
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
68
+ ...extractMedia(legacy),
66
69
  };
67
70
  }
68
- function parseBookmarks(data, seen) {
71
+ export function parseBookmarks(data, seen) {
69
72
  const tweets = [];
70
73
  let nextCursor = null;
71
74
  const instructions = data?.data?.bookmark_timeline_v2?.timeline?.instructions
@@ -110,7 +113,7 @@ cli({
110
113
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
111
114
  { name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the bookmarks by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' },
112
115
  ],
113
- columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url'],
116
+ columns: ['id', 'author', 'text', 'likes', 'retweets', 'bookmarks', 'created_at', 'url', 'has_media', 'media_urls'],
114
117
  func: async (page, kwargs) => {
115
118
  const limit = kwargs.limit || 20;
116
119
  const cookies = await page.getCookies({ url: 'https://x.com' });
@@ -150,7 +153,8 @@ cli({
150
153
  const allTweets = [];
151
154
  const seen = new Set();
152
155
  let cursor = null;
153
- for (let i = 0; i < 5 && allTweets.length < limit; i++) {
156
+ // Runaway guard only; --limit and cursor exhaustion control normal pagination.
157
+ for (let i = 0; i < MAX_PAGINATION_PAGES && allTweets.length < limit; i++) {
154
158
  const fetchCount = Math.min(100, limit - allTweets.length + 10);
155
159
  const apiUrl = buildBookmarksUrl(fetchCount, cursor).replace(BOOKMARKS_QUERY_ID, queryId);
156
160
  const data = await page.evaluate(`async () => {
@@ -172,3 +176,7 @@ cli({
172
176
  return applyTopByEngagement(trimmed, kwargs['top-by-engagement']);
173
177
  },
174
178
  });
179
+ export const __test__ = {
180
+ parseBookmarks,
181
+ extractBookmarkTweet,
182
+ };
@@ -0,0 +1,205 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { __test__ } from './bookmarks.js';
3
+
4
+ const { parseBookmarks, extractBookmarkTweet } = __test__;
5
+
6
+ describe('twitter bookmarks parser', () => {
7
+ it('extracts a baseline tweet with no media (has_media false, media_urls empty)', () => {
8
+ const tweet = extractBookmarkTweet({
9
+ rest_id: '1',
10
+ legacy: {
11
+ full_text: 'plain bookmark',
12
+ favorite_count: 5,
13
+ retweet_count: 1,
14
+ bookmark_count: 2,
15
+ created_at: 'Wed Apr 16 10:00:00 +0000 2026',
16
+ },
17
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
18
+ }, new Set());
19
+ expect(tweet).toEqual({
20
+ id: '1',
21
+ author: 'alice',
22
+ name: 'Alice',
23
+ text: 'plain bookmark',
24
+ likes: 5,
25
+ retweets: 1,
26
+ bookmarks: 2,
27
+ created_at: 'Wed Apr 16 10:00:00 +0000 2026',
28
+ url: 'https://x.com/alice/status/1',
29
+ has_media: false,
30
+ media_urls: [],
31
+ });
32
+ });
33
+
34
+ it('includes photo media URLs from extended_entities', () => {
35
+ const tweet = extractBookmarkTweet({
36
+ rest_id: '101',
37
+ legacy: {
38
+ full_text: 'pic bookmark',
39
+ extended_entities: {
40
+ media: [
41
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/abc.jpg' },
42
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/def.jpg' },
43
+ ],
44
+ },
45
+ },
46
+ core: { user_results: { result: { legacy: { screen_name: 'bob' } } } },
47
+ }, new Set());
48
+ expect(tweet?.has_media).toBe(true);
49
+ expect(tweet?.media_urls).toEqual([
50
+ 'https://pbs.twimg.com/media/abc.jpg',
51
+ 'https://pbs.twimg.com/media/def.jpg',
52
+ ]);
53
+ });
54
+
55
+ it('extracts mp4 variant URL for video media', () => {
56
+ const tweet = extractBookmarkTweet({
57
+ rest_id: '102',
58
+ legacy: {
59
+ full_text: 'video bookmark',
60
+ extended_entities: {
61
+ media: [{
62
+ type: 'video',
63
+ media_url_https: 'https://pbs.twimg.com/amplify_video_thumb/thumb.jpg',
64
+ video_info: {
65
+ variants: [
66
+ { content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/playlist.m3u8' },
67
+ { content_type: 'video/mp4', bitrate: 832000, url: 'https://video.twimg.com/low.mp4' },
68
+ { content_type: 'video/mp4', bitrate: 2176000, url: 'https://video.twimg.com/high.mp4' },
69
+ ],
70
+ },
71
+ }],
72
+ },
73
+ },
74
+ core: { user_results: { result: { legacy: { screen_name: 'carol' } } } },
75
+ }, new Set());
76
+ expect(tweet?.has_media).toBe(true);
77
+ expect(tweet?.media_urls?.[0]).toMatch(/\.mp4$/);
78
+ });
79
+
80
+ it('falls back to entities.media when extended_entities is absent', () => {
81
+ const tweet = extractBookmarkTweet({
82
+ rest_id: '103',
83
+ legacy: {
84
+ full_text: 'entities-only media',
85
+ entities: {
86
+ media: [{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/legacy.jpg' }],
87
+ },
88
+ },
89
+ core: { user_results: { result: { legacy: { screen_name: 'dave' } } } },
90
+ }, new Set());
91
+ expect(tweet?.has_media).toBe(true);
92
+ expect(tweet?.media_urls).toEqual(['https://pbs.twimg.com/media/legacy.jpg']);
93
+ });
94
+
95
+ it('prefers note_tweet text over truncated full_text', () => {
96
+ const tweet = extractBookmarkTweet({
97
+ rest_id: '2',
98
+ legacy: { full_text: 'short text…', favorite_count: 0, retweet_count: 0, bookmark_count: 0 },
99
+ note_tweet: { note_tweet_results: { result: { text: 'full long-form text body' } } },
100
+ core: { user_results: { result: { core: { screen_name: 'erin' } } } },
101
+ }, new Set());
102
+ expect(tweet?.text).toBe('full long-form text body');
103
+ });
104
+
105
+ it('deduplicates tweets across the seen Set', () => {
106
+ const data = {
107
+ data: {
108
+ bookmark_timeline_v2: {
109
+ timeline: {
110
+ instructions: [{
111
+ entries: [
112
+ {
113
+ entryId: 'tweet-3',
114
+ content: {
115
+ itemContent: {
116
+ tweet_results: {
117
+ result: {
118
+ rest_id: '3',
119
+ legacy: { full_text: 'first', favorite_count: 0, retweet_count: 0, bookmark_count: 0 },
120
+ core: { user_results: { result: { legacy: { screen_name: 'frank' } } } },
121
+ },
122
+ },
123
+ },
124
+ },
125
+ },
126
+ {
127
+ entryId: 'tweet-3-dup',
128
+ content: {
129
+ itemContent: {
130
+ tweet_results: {
131
+ result: {
132
+ rest_id: '3',
133
+ legacy: { full_text: 'duplicate' },
134
+ core: { user_results: { result: { legacy: { screen_name: 'frank' } } } },
135
+ },
136
+ },
137
+ },
138
+ },
139
+ },
140
+ ],
141
+ }],
142
+ },
143
+ },
144
+ },
145
+ };
146
+ const seen = new Set();
147
+ const { tweets } = parseBookmarks(data, seen);
148
+ expect(tweets).toHaveLength(1);
149
+ expect(tweets[0].text).toBe('first');
150
+ });
151
+
152
+ it('extracts cursor + tweets from the bookmark_timeline_v2 envelope', () => {
153
+ const data = {
154
+ data: {
155
+ bookmark_timeline_v2: {
156
+ timeline: {
157
+ instructions: [
158
+ {
159
+ type: 'TimelineAddEntries',
160
+ entries: [
161
+ {
162
+ entryId: 'tweet-4',
163
+ content: {
164
+ itemContent: {
165
+ tweet_results: {
166
+ result: {
167
+ rest_id: '4',
168
+ legacy: {
169
+ full_text: 'envelope tweet',
170
+ favorite_count: 1,
171
+ retweet_count: 0,
172
+ bookmark_count: 0,
173
+ extended_entities: {
174
+ media: [{ type: 'photo', media_url_https: 'https://pbs.twimg.com/media/x.jpg' }],
175
+ },
176
+ },
177
+ core: { user_results: { result: { legacy: { screen_name: 'gina' } } } },
178
+ },
179
+ },
180
+ },
181
+ },
182
+ },
183
+ {
184
+ entryId: 'cursor-bottom-Y',
185
+ content: { __typename: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'NEXT' },
186
+ },
187
+ ],
188
+ },
189
+ ],
190
+ },
191
+ },
192
+ },
193
+ };
194
+ const { tweets, nextCursor } = parseBookmarks(data, new Set());
195
+ expect(tweets).toHaveLength(1);
196
+ expect(tweets[0].id).toBe('4');
197
+ expect(tweets[0].has_media).toBe(true);
198
+ expect(tweets[0].media_urls).toEqual(['https://pbs.twimg.com/media/x.jpg']);
199
+ expect(nextCursor).toBe('NEXT');
200
+ });
201
+
202
+ it('returns empty tweets + null cursor for unknown envelope', () => {
203
+ expect(parseBookmarks({}, new Set())).toEqual({ tweets: [], nextCursor: null });
204
+ });
205
+ });