@jackwener/opencli 1.7.5 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +5 -2
  2. package/README.zh-CN.md +5 -2
  3. package/cli-manifest.json +77 -1
  4. package/clis/bilibili/video.js +61 -0
  5. package/clis/bilibili/video.test.js +81 -0
  6. package/clis/deepseek/ask.js +21 -1
  7. package/clis/deepseek/ask.test.js +73 -0
  8. package/clis/deepseek/utils.js +84 -1
  9. package/clis/deepseek/utils.test.js +37 -0
  10. package/clis/jianyu/search.js +139 -3
  11. package/clis/jianyu/search.test.js +25 -0
  12. package/clis/jianyu/shared/procurement-detail.js +15 -0
  13. package/clis/jianyu/shared/procurement-detail.test.js +12 -0
  14. package/clis/twitter/shared.js +7 -2
  15. package/clis/twitter/tweets.js +218 -0
  16. package/clis/twitter/tweets.test.js +125 -0
  17. package/clis/youtube/channel.js +35 -0
  18. package/dist/src/browser/base-page.d.ts +13 -3
  19. package/dist/src/browser/base-page.js +35 -25
  20. package/dist/src/browser/cdp.d.ts +1 -0
  21. package/dist/src/browser/cdp.js +12 -3
  22. package/dist/src/browser/compound.d.ts +59 -0
  23. package/dist/src/browser/compound.js +112 -0
  24. package/dist/src/browser/compound.test.d.ts +1 -0
  25. package/dist/src/browser/compound.test.js +175 -0
  26. package/dist/src/browser/dom-snapshot.d.ts +7 -0
  27. package/dist/src/browser/dom-snapshot.js +76 -3
  28. package/dist/src/browser/dom-snapshot.test.js +65 -0
  29. package/dist/src/browser/extract.d.ts +69 -0
  30. package/dist/src/browser/extract.js +132 -0
  31. package/dist/src/browser/extract.test.d.ts +1 -0
  32. package/dist/src/browser/extract.test.js +129 -0
  33. package/dist/src/browser/find.d.ts +76 -0
  34. package/dist/src/browser/find.js +179 -0
  35. package/dist/src/browser/find.test.d.ts +1 -0
  36. package/dist/src/browser/find.test.js +120 -0
  37. package/dist/src/browser/html-tree.d.ts +75 -0
  38. package/dist/src/browser/html-tree.js +112 -0
  39. package/dist/src/browser/html-tree.test.d.ts +1 -0
  40. package/dist/src/browser/html-tree.test.js +181 -0
  41. package/dist/src/browser/network-cache.d.ts +48 -0
  42. package/dist/src/browser/network-cache.js +66 -0
  43. package/dist/src/browser/network-cache.test.d.ts +1 -0
  44. package/dist/src/browser/network-cache.test.js +58 -0
  45. package/dist/src/browser/network-key.d.ts +22 -0
  46. package/dist/src/browser/network-key.js +66 -0
  47. package/dist/src/browser/network-key.test.d.ts +1 -0
  48. package/dist/src/browser/network-key.test.js +49 -0
  49. package/dist/src/browser/shape-filter.d.ts +52 -0
  50. package/dist/src/browser/shape-filter.js +101 -0
  51. package/dist/src/browser/shape-filter.test.d.ts +1 -0
  52. package/dist/src/browser/shape-filter.test.js +101 -0
  53. package/dist/src/browser/shape.d.ts +23 -0
  54. package/dist/src/browser/shape.js +95 -0
  55. package/dist/src/browser/shape.test.d.ts +1 -0
  56. package/dist/src/browser/shape.test.js +82 -0
  57. package/dist/src/browser/target-errors.d.ts +14 -1
  58. package/dist/src/browser/target-errors.js +13 -0
  59. package/dist/src/browser/target-errors.test.js +39 -6
  60. package/dist/src/browser/target-resolver.d.ts +57 -10
  61. package/dist/src/browser/target-resolver.js +195 -75
  62. package/dist/src/browser/target-resolver.test.js +80 -5
  63. package/dist/src/cli.js +630 -125
  64. package/dist/src/cli.test.js +794 -0
  65. package/dist/src/execution.js +7 -2
  66. package/dist/src/execution.test.js +54 -0
  67. package/dist/src/main.js +16 -0
  68. package/dist/src/types.d.ts +18 -3
  69. package/package.json +1 -1
@@ -35,6 +35,10 @@ const NAVIGATION_PATH_PREFIXES = [
35
35
  '/exhibition/',
36
36
  '/swordfish/page_big_pc/search/',
37
37
  ];
38
+ const BLOCKED_DETAIL_PATH_PREFIXES = [
39
+ '/nologin/content/',
40
+ '/article/bdprivate/',
41
+ ];
38
42
  const JIANYU_API_TYPES = ['fType', 'eType', 'vType', 'mType'];
39
43
  export function buildSearchUrl(query) {
40
44
  const url = new URL(SEARCH_ENTRY);
@@ -74,6 +78,86 @@ function isLikelyNavigationUrl(rawUrl) {
74
78
  return true;
75
79
  }
76
80
  }
81
+ function classifyDetailStatus(rawUrl) {
82
+ const urlText = cleanText(rawUrl);
83
+ if (!urlText) {
84
+ return {
85
+ detail_status: 'blocked',
86
+ detail_reason: 'missing_url',
87
+ };
88
+ }
89
+ try {
90
+ const parsed = new URL(urlText);
91
+ const path = cleanText(parsed.pathname).toLowerCase().replace(/\/+$/, '/') || '/';
92
+ if (BLOCKED_DETAIL_PATH_PREFIXES.some((prefix) => path.includes(prefix))) {
93
+ return {
94
+ detail_status: 'blocked',
95
+ detail_reason: 'verification_or_paid_wall',
96
+ };
97
+ }
98
+ if (isLikelyNavigationUrl(urlText)) {
99
+ return {
100
+ detail_status: 'entry_only',
101
+ detail_reason: 'navigation_or_profile_entry',
102
+ };
103
+ }
104
+ return {
105
+ detail_status: 'ok',
106
+ detail_reason: path.includes('/jybx/') ? 'jybx_detail' : 'detail_candidate',
107
+ };
108
+ }
109
+ catch {
110
+ return {
111
+ detail_status: 'blocked',
112
+ detail_reason: 'invalid_url',
113
+ };
114
+ }
115
+ }
116
+ function extractNoticeId(rawUrl) {
117
+ const value = cleanText(rawUrl);
118
+ if (!value)
119
+ return '';
120
+ try {
121
+ const parsed = new URL(value);
122
+ const path = cleanText(parsed.pathname);
123
+ const jybxMatched = path.match(/\/jybx\/([^/?#]+)\.html$/i);
124
+ if (jybxMatched?.[1])
125
+ return cleanText(jybxMatched[1]);
126
+ const segments = path.split('/').filter(Boolean);
127
+ const tail = cleanText(segments[segments.length - 1] || '');
128
+ return cleanText(tail.replace(/\.html?$/i, ''));
129
+ }
130
+ catch {
131
+ return '';
132
+ }
133
+ }
134
+ function isWithinSinceDays(dateText, sinceDays, now = new Date()) {
135
+ const normalized = normalizeDate(dateText);
136
+ if (!normalized)
137
+ return false;
138
+ const timestamp = Date.parse(`${normalized}T00:00:00Z`);
139
+ if (!Number.isFinite(timestamp))
140
+ return false;
141
+ const today = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
142
+ const deltaDays = Math.floor((today - timestamp) / (24 * 3600 * 1000));
143
+ return deltaDays >= 0 && deltaDays <= sinceDays;
144
+ }
145
+ function dedupeByNoticeKey(items) {
146
+ const deduped = [];
147
+ const seen = new Set();
148
+ for (const item of items) {
149
+ const source = cleanText(item.source_id || '');
150
+ const notice = cleanText(item.notice_id || '');
151
+ const key = source && notice
152
+ ? `${source}\t${notice}`
153
+ : `${cleanText(item.title)}\t${cleanText(item.url)}`;
154
+ if (!key || seen.has(key))
155
+ continue;
156
+ seen.add(key);
157
+ deduped.push(item);
158
+ }
159
+ return deduped;
160
+ }
77
161
  function filterNavigationRows(query, items) {
78
162
  const queryTokens = cleanText(query).split(/\s+/).filter(Boolean).map((token) => token.toLowerCase());
79
163
  return items
@@ -86,6 +170,9 @@ function filterNavigationRows(query, items) {
86
170
  .filter((item) => {
87
171
  if (!item.title || !item.url)
88
172
  return false;
173
+ const detailSignal = classifyDetailStatus(item.url);
174
+ if (detailSignal.detail_status !== 'ok')
175
+ return false;
89
176
  const haystack = `${item.title} ${item.contextText}`.toLowerCase();
90
177
  const hasQuery = queryTokens.length === 0 || queryTokens.some((token) => haystack.includes(token));
91
178
  const hasProcurementHint = PROCUREMENT_TITLE_HINT.test(`${item.title} ${item.contextText}`);
@@ -446,11 +533,16 @@ cli({
446
533
  args: [
447
534
  { name: 'query', required: true, positional: true, help: 'Search keyword, e.g. "procurement"' },
448
535
  { name: 'limit', type: 'int', default: 20, help: 'Number of results (max 50)' },
536
+ { name: 'since_days', type: 'int', help: 'Only keep rows published within N days' },
449
537
  ],
450
- columns: ['rank', 'content_type', 'title', 'publish_time', 'project_code', 'budget_or_limit', 'url'],
538
+ columns: ['rank', 'content_type', 'title', 'published_at', 'detail_status', 'project_code', 'budget_or_limit', 'url'],
451
539
  func: async (page, kwargs) => {
452
540
  const query = cleanText(kwargs.query);
453
541
  const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
542
+ const rawSinceDays = Number(kwargs.since_days);
543
+ const sinceDays = Number.isFinite(rawSinceDays) && rawSinceDays > 0
544
+ ? Math.max(1, Math.min(rawSinceDays, 3650))
545
+ : null;
454
546
  const apiResult = await fetchJianyuApiRows(page, query, limit);
455
547
  const mergedRows = dedupeCandidates(filterNavigationRows(query, apiResult.rows));
456
548
  const extractedRows = await searchRowsFromEntries(page, {
@@ -465,21 +557,61 @@ cli({
465
557
  const indexedRows = await fetchDuckDuckGoIndexRows(query, limit);
466
558
  const filteredIndexedRows = dedupeCandidates(filterNavigationRows(query, indexedRows));
467
559
  if (filteredIndexedRows.length > 0) {
468
- return toProcurementSearchRecords(filteredIndexedRows, {
560
+ const records = toProcurementSearchRecords(filteredIndexedRows, {
469
561
  site: SITE,
470
562
  query,
471
563
  limit,
472
564
  });
565
+ const enriched = dedupeByNoticeKey(records.map((row) => {
566
+ const detailSignal = classifyDetailStatus(row.url);
567
+ const publishedAt = normalizeDate(row.publish_time || row.date);
568
+ return {
569
+ ...row,
570
+ source_id: SITE,
571
+ notice_id: extractNoticeId(row.url),
572
+ published_at: publishedAt,
573
+ detail_status: detailSignal.detail_status,
574
+ detail_reason: detailSignal.detail_reason,
575
+ };
576
+ }))
577
+ .filter((row) => row.detail_status === 'ok')
578
+ .filter((row) => sinceDays == null || isWithinSinceDays(row.published_at, sinceDays))
579
+ .slice(0, limit)
580
+ .map((row, index) => ({
581
+ ...row,
582
+ rank: index + 1,
583
+ }));
584
+ return enriched;
473
585
  }
474
586
  if (apiResult.challenge || await isAuthRequired(page)) {
475
587
  throw new AuthRequiredError(DOMAIN, '[taxonomy=selector_drift] site=jianyu command=search blocked by human verification / access challenge');
476
588
  }
477
589
  }
478
- return toProcurementSearchRecords(rows, {
590
+ const records = toProcurementSearchRecords(rows, {
479
591
  site: SITE,
480
592
  query,
481
593
  limit,
482
594
  });
595
+ const enriched = dedupeByNoticeKey(records.map((row) => {
596
+ const detailSignal = classifyDetailStatus(row.url);
597
+ const publishedAt = normalizeDate(row.publish_time || row.date);
598
+ return {
599
+ ...row,
600
+ source_id: SITE,
601
+ notice_id: extractNoticeId(row.url),
602
+ published_at: publishedAt,
603
+ detail_status: detailSignal.detail_status,
604
+ detail_reason: detailSignal.detail_reason,
605
+ };
606
+ }))
607
+ .filter((row) => row.detail_status === 'ok')
608
+ .filter((row) => sinceDays == null || isWithinSinceDays(row.published_at, sinceDays))
609
+ .slice(0, limit)
610
+ .map((row, index) => ({
611
+ ...row,
612
+ rank: index + 1,
613
+ }));
614
+ return enriched;
483
615
  },
484
616
  });
485
617
  export const __test__ = {
@@ -494,4 +626,8 @@ export const __test__ = {
494
626
  normalizeApiRow,
495
627
  fetchJianyuApiRows,
496
628
  collectApiRowsFromResponses,
629
+ classifyDetailStatus,
630
+ extractNoticeId,
631
+ isWithinSinceDays,
632
+ dedupeByNoticeKey,
497
633
  };
@@ -125,4 +125,29 @@ describe('jianyu search helpers', () => {
125
125
  expect(result.rows[0].title).toContain('电梯采购公告');
126
126
  expect(result.rows[1].title).toContain('另一条电梯采购公告');
127
127
  });
128
+ it('classifies nologin links as blocked detail targets', () => {
129
+ const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/nologin/content/ABC.html');
130
+ expect(signal.detail_status).toBe('blocked');
131
+ });
132
+ it('classifies accessible detail urls as ok even when they are not jybx paths', () => {
133
+ const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/notice/detail/123');
134
+ expect(signal.detail_status).toBe('ok');
135
+ expect(signal.detail_reason).toBe('detail_candidate');
136
+ });
137
+ it('classifies list pages as entry_only', () => {
138
+ const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/list/stype/ZBGG.html');
139
+ expect(signal.detail_status).toBe('entry_only');
140
+ });
141
+ it('extracts stable notice id from jybx urls', () => {
142
+ const id = __test__.extractNoticeId('https://shandong.jianyu360.cn/jybx/20260310_26030938267551.html');
143
+ expect(id).toBe('20260310_26030938267551');
144
+ });
145
+ it('keeps only rows inside recency window', () => {
146
+ const within = __test__.isWithinSinceDays('2026-03-20', 30, new Date('2026-04-09T00:00:00Z'));
147
+ const stale = __test__.isWithinSinceDays('2026-02-01', 30, new Date('2026-04-09T00:00:00Z'));
148
+ const missing = __test__.isWithinSinceDays('', 30, new Date('2026-04-09T00:00:00Z'));
149
+ expect(within).toBe(true);
150
+ expect(stale).toBe(false);
151
+ expect(missing).toBe(false);
152
+ });
128
153
  });
@@ -7,6 +7,13 @@ const RETRYABLE_DETAIL_ERROR_PATTERNS = [
7
7
  /cannot find context with specified id/i,
8
8
  /\[taxonomy=empty_result\]/i,
9
9
  ];
10
+ const DETAIL_AUTH_CHALLENGE_PATTERNS = [
11
+ /请在下图依次点击/i,
12
+ /验证码/i,
13
+ /请完成验证/i,
14
+ /验证登录/i,
15
+ /登录即可获得更多浏览权限/i,
16
+ ];
10
17
  function isRetryableDetailError(error) {
11
18
  const message = error instanceof Error
12
19
  ? cleanText(error.message)
@@ -61,6 +68,14 @@ export async function runProcurementDetail(page, { url, site, query = '', }) {
61
68
  const title = cleanText(row.title);
62
69
  const detailText = cleanText(row.detailText);
63
70
  const publishTime = cleanText(row.publishTime);
71
+ const authGateText = cleanText(`${title} ${detailText}`);
72
+ if (DETAIL_AUTH_CHALLENGE_PATTERNS.some((pattern) => pattern.test(authGateText))) {
73
+ throw taxonomyError('selector_drift', {
74
+ site,
75
+ command: 'detail',
76
+ detail: `detail page blocked by verification challenge: ${targetUrl}`,
77
+ });
78
+ }
64
79
  if (!title && !detailText) {
65
80
  throw taxonomyError('empty_result', {
66
81
  site,
@@ -69,4 +69,16 @@ describe('procurement detail runner', () => {
69
69
  })).rejects.toThrow('[taxonomy=extraction_drift]');
70
70
  expect(attempts).toBe(1);
71
71
  });
72
+ it('rejects captcha/verification pages as selector_drift', async () => {
73
+ const page = createPage(async () => ({
74
+ title: '验证码',
75
+ detailText: '请在下图依次点击:槨畽黛',
76
+ publishTime: '',
77
+ }));
78
+ await expect(runProcurementDetail(page, {
79
+ url: 'https://www.jianyu360.cn/nologin/content/ABC.html',
80
+ site: 'jianyu',
81
+ query: '电梯',
82
+ })).rejects.toThrow('[taxonomy=selector_drift]');
83
+ });
72
84
  });
@@ -5,14 +5,19 @@ export function sanitizeQueryId(resolved, fallbackId) {
5
5
  export async function resolveTwitterQueryId(page, operationName, fallbackId) {
6
6
  const resolved = await page.evaluate(`async () => {
7
7
  const operationName = ${JSON.stringify(operationName)};
8
+ const controller = new AbortController();
9
+ const timeout = setTimeout(() => controller.abort(), 5000);
8
10
  try {
9
- const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json');
11
+ const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json', { signal: controller.signal });
12
+ clearTimeout(timeout);
10
13
  if (ghResp.ok) {
11
14
  const data = await ghResp.json();
12
15
  const entry = data?.[operationName];
13
16
  if (entry && entry.queryId) return entry.queryId;
14
17
  }
15
- } catch {}
18
+ } catch {
19
+ clearTimeout(timeout);
20
+ }
16
21
  try {
17
22
  const scripts = performance.getEntriesByType('resource')
18
23
  .filter(r => r.name.includes('client-web') && r.name.endsWith('.js'))
@@ -0,0 +1,218 @@
1
+ import { cli, Strategy } from '@jackwener/opencli/registry';
2
+ import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
3
+ import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
4
+
5
+ const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
6
+ const USER_TWEETS_QUERY_ID = '6fWQaBPK51aGyC_VC7t9GQ';
7
+ const USER_BY_SCREEN_NAME_QUERY_ID = 'IGgvgiOx4QZndDHuD3x9TQ';
8
+
9
+ const USER_TWEETS_FEATURES = {
10
+ rweb_video_screen_enabled: false,
11
+ payments_enabled: false,
12
+ profile_label_improvements_pcf_label_in_post_enabled: true,
13
+ rweb_tipjar_consumption_enabled: true,
14
+ verified_phone_label_enabled: false,
15
+ creator_subscriptions_tweet_preview_api_enabled: true,
16
+ responsive_web_graphql_timeline_navigation_enabled: true,
17
+ responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
18
+ premium_content_api_read_enabled: false,
19
+ communities_web_enable_tweet_community_results_fetch: true,
20
+ c9s_tweet_anatomy_moderator_badge_enabled: true,
21
+ responsive_web_grok_analyze_button_fetch_trends_enabled: false,
22
+ responsive_web_grok_analyze_post_followups_enabled: true,
23
+ responsive_web_jetfuel_frame: true,
24
+ responsive_web_grok_share_attachment_enabled: true,
25
+ responsive_web_grok_annotations_enabled: true,
26
+ articles_preview_enabled: true,
27
+ responsive_web_edit_tweet_api_enabled: true,
28
+ graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
29
+ view_counts_everywhere_api_enabled: true,
30
+ longform_notetweets_consumption_enabled: true,
31
+ responsive_web_twitter_article_tweet_consumption_enabled: true,
32
+ tweet_awards_web_tipping_enabled: false,
33
+ content_disclosure_indicator_enabled: true,
34
+ content_disclosure_ai_generated_indicator_enabled: true,
35
+ responsive_web_grok_show_grok_translated_post: false,
36
+ responsive_web_grok_analysis_button_from_backend: true,
37
+ post_ctas_fetch_enabled: false,
38
+ freedom_of_speech_not_reach_fetch_enabled: true,
39
+ standardized_nudges_misinfo: true,
40
+ tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
41
+ longform_notetweets_rich_text_read_enabled: true,
42
+ longform_notetweets_inline_media_enabled: true,
43
+ responsive_web_grok_image_annotation_enabled: true,
44
+ responsive_web_grok_imagine_annotation_enabled: true,
45
+ responsive_web_grok_community_note_auto_translation_is_enabled: false,
46
+ responsive_web_enhance_cards_enabled: false,
47
+ };
48
+
49
+ const USER_BY_SCREEN_NAME_FEATURES = {
50
+ hidden_profile_subscriptions_enabled: true,
51
+ rweb_tipjar_consumption_enabled: true,
52
+ responsive_web_graphql_exclude_directive_enabled: true,
53
+ verified_phone_label_enabled: false,
54
+ subscriptions_verification_info_is_identity_verified_enabled: true,
55
+ subscriptions_verification_info_verified_since_enabled: true,
56
+ highlights_tweets_tab_ui_enabled: true,
57
+ responsive_web_twitter_article_notes_tab_enabled: true,
58
+ subscriptions_feature_can_gift_premium: true,
59
+ creator_subscriptions_tweet_preview_api_enabled: true,
60
+ responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
61
+ responsive_web_graphql_timeline_navigation_enabled: true,
62
+ };
63
+
64
+ function buildUserTweetsUrl(queryId, userId, count, cursor) {
65
+ const vars = {
66
+ userId,
67
+ count,
68
+ includePromotedContent: false,
69
+ withQuickPromoteEligibilityTweetFields: true,
70
+ withVoice: true,
71
+ };
72
+ if (cursor) vars.cursor = cursor;
73
+ return `/i/api/graphql/${queryId}/UserTweets`
74
+ + `?variables=${encodeURIComponent(JSON.stringify(vars))}`
75
+ + `&features=${encodeURIComponent(JSON.stringify(USER_TWEETS_FEATURES))}`;
76
+ }
77
+
78
+ function buildUserByScreenNameUrl(queryId, screenName) {
79
+ const vars = { screen_name: screenName, withSafetyModeUserFields: true };
80
+ return `/i/api/graphql/${queryId}/UserByScreenName`
81
+ + `?variables=${encodeURIComponent(JSON.stringify(vars))}`
82
+ + `&features=${encodeURIComponent(JSON.stringify(USER_BY_SCREEN_NAME_FEATURES))}`;
83
+ }
84
+
85
+ function extractTweet(result, seen) {
86
+ if (!result) return null;
87
+ const tw = result.tweet || result;
88
+ const legacy = tw.legacy || {};
89
+ if (!tw.rest_id || seen.has(tw.rest_id)) return null;
90
+ seen.add(tw.rest_id);
91
+ const user = tw.core?.user_results?.result;
92
+ const screenName = user?.legacy?.screen_name || user?.core?.screen_name || 'unknown';
93
+ const displayName = user?.legacy?.name || user?.core?.name || '';
94
+ const noteText = tw.note_tweet?.note_tweet_results?.result?.text;
95
+ const isRetweet = Boolean(legacy.retweeted_status_result || legacy.full_text?.startsWith('RT @'));
96
+ return {
97
+ id: tw.rest_id,
98
+ author: screenName,
99
+ name: displayName,
100
+ text: noteText || legacy.full_text || '',
101
+ likes: legacy.favorite_count || 0,
102
+ retweets: legacy.retweet_count || 0,
103
+ replies: legacy.reply_count || 0,
104
+ views: Number(tw.views?.count) || 0,
105
+ is_retweet: isRetweet,
106
+ created_at: legacy.created_at || '',
107
+ url: `https://x.com/${screenName}/status/${tw.rest_id}`,
108
+ };
109
+ }
110
+
111
+ function parseUserTweets(data, seen) {
112
+ const tweets = [];
113
+ let nextCursor = null;
114
+ const instructions = data?.data?.user?.result?.timeline_v2?.timeline?.instructions
115
+ || data?.data?.user?.result?.timeline?.timeline?.instructions
116
+ || [];
117
+ for (const inst of instructions) {
118
+ if (inst.type === 'TimelinePinEntry') continue;
119
+ for (const entry of inst.entries || []) {
120
+ const content = entry.content;
121
+ if (content?.entryType === 'TimelineTimelineCursor' || content?.__typename === 'TimelineTimelineCursor') {
122
+ if (content.cursorType === 'Bottom' || content.cursorType === 'ShowMore') nextCursor = content.value;
123
+ continue;
124
+ }
125
+ if (entry.entryId?.startsWith('cursor-bottom-') || entry.entryId?.startsWith('cursor-showMore-')) {
126
+ nextCursor = content?.value || content?.itemContent?.value || nextCursor;
127
+ continue;
128
+ }
129
+ const direct = extractTweet(content?.itemContent?.tweet_results?.result, seen);
130
+ if (direct) {
131
+ tweets.push(direct);
132
+ continue;
133
+ }
134
+ for (const item of content?.items || []) {
135
+ const nested = extractTweet(item.item?.itemContent?.tweet_results?.result, seen);
136
+ if (nested) tweets.push(nested);
137
+ }
138
+ }
139
+ }
140
+ return { tweets, nextCursor };
141
+ }
142
+
143
+ cli({
144
+ site: 'twitter',
145
+ name: 'tweets',
146
+ description: "Fetch a Twitter user's most recent tweets (chronological, excludes pinned)",
147
+ domain: 'x.com',
148
+ strategy: Strategy.COOKIE,
149
+ browser: true,
150
+ args: [
151
+ { name: 'username', type: 'string', positional: true, required: true, help: 'Twitter screen name (with or without @)' },
152
+ { name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
153
+ ],
154
+ columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url'],
155
+ func: async (page, kwargs) => {
156
+ const limit = Math.max(1, Math.min(200, kwargs.limit || 20));
157
+ const username = String(kwargs.username || '').replace(/^@/, '').trim();
158
+ if (!username) throw new CommandExecutionError('username is required');
159
+
160
+ await page.goto('https://x.com');
161
+ await page.wait(3);
162
+
163
+ const ct0 = await page.evaluate(`() => {
164
+ return document.cookie.split(';').map(c => c.trim()).find(c => c.startsWith('ct0='))?.split('=')[1] || null;
165
+ }`);
166
+ if (!ct0) throw new AuthRequiredError('x.com', 'Not logged into x.com (no ct0 cookie)');
167
+
168
+ const userTweetsQueryId = await resolveTwitterQueryId(page, 'UserTweets', USER_TWEETS_QUERY_ID);
169
+ const userByScreenNameQueryId = await resolveTwitterQueryId(page, 'UserByScreenName', USER_BY_SCREEN_NAME_QUERY_ID);
170
+
171
+ const headers = JSON.stringify({
172
+ 'Authorization': `Bearer ${decodeURIComponent(BEARER_TOKEN)}`,
173
+ 'X-Csrf-Token': ct0,
174
+ 'X-Twitter-Auth-Type': 'OAuth2Session',
175
+ 'X-Twitter-Active-User': 'yes',
176
+ });
177
+
178
+ const ubsUrl = buildUserByScreenNameUrl(userByScreenNameQueryId, username);
179
+ const userId = await page.evaluate(`async () => {
180
+ const resp = await fetch("${ubsUrl}", { headers: ${headers}, credentials: 'include' });
181
+ if (!resp.ok) return null;
182
+ const d = await resp.json();
183
+ return d?.data?.user?.result?.rest_id || null;
184
+ }`);
185
+ if (!userId) throw new CommandExecutionError(`Could not resolve @${username}`);
186
+
187
+ const seen = new Set();
188
+ const all = [];
189
+ let cursor = null;
190
+ for (let i = 0; i < 5 && all.length < limit; i++) {
191
+ const fetchCount = Math.min(100, limit - all.length + 10);
192
+ const url = buildUserTweetsUrl(userTweetsQueryId, userId, fetchCount, cursor);
193
+ const data = await page.evaluate(`async () => {
194
+ const r = await fetch("${url}", { headers: ${headers}, credentials: 'include' });
195
+ return r.ok ? await r.json() : { error: r.status };
196
+ }`);
197
+ if (data?.error) {
198
+ if (all.length === 0) throw new CommandExecutionError(`HTTP ${data.error}: UserTweets fetch failed — queryId may have expired`);
199
+ break;
200
+ }
201
+ const { tweets, nextCursor } = parseUserTweets(data, seen);
202
+ all.push(...tweets);
203
+ if (!nextCursor || nextCursor === cursor) break;
204
+ cursor = nextCursor;
205
+ }
206
+
207
+ if (all.length === 0) throw new EmptyResultError(`@${username} has no recent tweets`, 'Account may be private or suspended');
208
+ return all.slice(0, limit);
209
+ },
210
+ });
211
+
212
+ export const __test__ = {
213
+ sanitizeQueryId,
214
+ buildUserTweetsUrl,
215
+ buildUserByScreenNameUrl,
216
+ extractTweet,
217
+ parseUserTweets,
218
+ };
@@ -0,0 +1,125 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { getRegistry } from '@jackwener/opencli/registry';
3
+ import { __test__ } from './tweets.js';
4
+
5
+ describe('twitter tweets helpers', () => {
6
+ it('registers is_retweet in the default columns', () => {
7
+ const cmd = getRegistry().get('twitter/tweets');
8
+ expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url']);
9
+ });
10
+
11
+ it('falls back when queryId contains unsafe characters', () => {
12
+ expect(__test__.sanitizeQueryId('safe_Query-123', 'fallback')).toBe('safe_Query-123');
13
+ expect(__test__.sanitizeQueryId('bad"id', 'fallback')).toBe('fallback');
14
+ expect(__test__.sanitizeQueryId('bad/id', 'fallback')).toBe('fallback');
15
+ expect(__test__.sanitizeQueryId(null, 'fallback')).toBe('fallback');
16
+ });
17
+
18
+ it('builds UserTweets url with cursor and features', () => {
19
+ const url = __test__.buildUserTweetsUrl('query123', '42', 20, 'cursor-1');
20
+ expect(url).toContain('/i/api/graphql/query123/UserTweets');
21
+ const decoded = decodeURIComponent(url);
22
+ expect(decoded).toContain('"userId":"42"');
23
+ expect(decoded).toContain('"count":20');
24
+ expect(decoded).toContain('"cursor":"cursor-1"');
25
+ expect(decoded).toContain('longform_notetweets_consumption_enabled');
26
+ });
27
+
28
+ it('builds UserByScreenName url for the given handle', () => {
29
+ const url = __test__.buildUserByScreenNameUrl('uquery', 'jakevin7');
30
+ expect(url).toContain('/i/api/graphql/uquery/UserByScreenName');
31
+ expect(decodeURIComponent(url)).toContain('"screen_name":"jakevin7"');
32
+ });
33
+
34
+ it('prefers note_tweet text over legacy.full_text for long posts', () => {
35
+ const seen = new Set();
36
+ const tweet = __test__.extractTweet({
37
+ rest_id: '99',
38
+ legacy: { full_text: 'short truncated…', favorite_count: 1, retweet_count: 0, reply_count: 0, created_at: 'now' },
39
+ note_tweet: { note_tweet_results: { result: { text: 'full long-form body' } } },
40
+ core: { user_results: { result: { legacy: { screen_name: 'bob', name: 'Bob' } } } },
41
+ views: { count: '42' },
42
+ }, seen);
43
+ expect(tweet.text).toBe('full long-form body');
44
+ expect(tweet.views).toBe(42);
45
+ });
46
+
47
+ it('flags retweets via RT prefix or retweeted_status_result', () => {
48
+ const a = __test__.extractTweet({
49
+ rest_id: '1',
50
+ legacy: { full_text: 'RT @foo: hi', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: '' },
51
+ core: { user_results: { result: { legacy: { screen_name: 'u', name: 'U' } } } },
52
+ }, new Set());
53
+ expect(a.is_retweet).toBe(true);
54
+
55
+ const b = __test__.extractTweet({
56
+ rest_id: '2',
57
+ legacy: { full_text: 'hello', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: '', retweeted_status_result: { result: {} } },
58
+ core: { user_results: { result: { legacy: { screen_name: 'u', name: 'U' } } } },
59
+ }, new Set());
60
+ expect(b.is_retweet).toBe(true);
61
+ });
62
+
63
+ it('parses chronological tweets and skips pinned instruction', () => {
64
+ const chronEntry = {
65
+ entryId: 'tweet-1',
66
+ content: {
67
+ itemContent: {
68
+ tweet_results: {
69
+ result: {
70
+ rest_id: '1',
71
+ legacy: { full_text: 'chronological post', favorite_count: 5, retweet_count: 1, reply_count: 2, created_at: 'now' },
72
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
73
+ views: { count: '100' },
74
+ },
75
+ },
76
+ },
77
+ },
78
+ };
79
+ const cursorEntry = {
80
+ entryId: 'cursor-bottom-1',
81
+ content: { entryType: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'cursor-next' },
82
+ };
83
+ const pinnedEntry = {
84
+ entryId: 'tweet-pinned-999',
85
+ content: {
86
+ itemContent: {
87
+ tweet_results: {
88
+ result: {
89
+ rest_id: '999',
90
+ legacy: { full_text: 'pinned post', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: 'old' },
91
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
92
+ },
93
+ },
94
+ },
95
+ },
96
+ };
97
+ const payload = {
98
+ data: {
99
+ user: {
100
+ result: {
101
+ timeline_v2: {
102
+ timeline: {
103
+ instructions: [
104
+ { type: 'TimelinePinEntry', entries: [pinnedEntry] },
105
+ { entries: [chronEntry, cursorEntry] },
106
+ ],
107
+ },
108
+ },
109
+ },
110
+ },
111
+ },
112
+ };
113
+ const result = __test__.parseUserTweets(payload, new Set());
114
+ expect(result.nextCursor).toBe('cursor-next');
115
+ expect(result.tweets).toHaveLength(1);
116
+ expect(result.tweets[0]).toMatchObject({
117
+ id: '1',
118
+ author: 'alice',
119
+ text: 'chronological post',
120
+ likes: 5,
121
+ views: 100,
122
+ url: 'https://x.com/alice/status/1',
123
+ });
124
+ });
125
+ });