@jackwener/opencli 1.7.5 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/README.zh-CN.md +5 -2
- package/cli-manifest.json +77 -1
- package/clis/bilibili/video.js +61 -0
- package/clis/bilibili/video.test.js +81 -0
- package/clis/deepseek/ask.js +21 -1
- package/clis/deepseek/ask.test.js +73 -0
- package/clis/deepseek/utils.js +84 -1
- package/clis/deepseek/utils.test.js +37 -0
- package/clis/jianyu/search.js +139 -3
- package/clis/jianyu/search.test.js +25 -0
- package/clis/jianyu/shared/procurement-detail.js +15 -0
- package/clis/jianyu/shared/procurement-detail.test.js +12 -0
- package/clis/twitter/shared.js +7 -2
- package/clis/twitter/tweets.js +218 -0
- package/clis/twitter/tweets.test.js +125 -0
- package/clis/youtube/channel.js +35 -0
- package/dist/src/browser/base-page.d.ts +13 -3
- package/dist/src/browser/base-page.js +35 -25
- package/dist/src/browser/cdp.d.ts +1 -0
- package/dist/src/browser/cdp.js +12 -3
- package/dist/src/browser/compound.d.ts +59 -0
- package/dist/src/browser/compound.js +112 -0
- package/dist/src/browser/compound.test.d.ts +1 -0
- package/dist/src/browser/compound.test.js +175 -0
- package/dist/src/browser/dom-snapshot.d.ts +7 -0
- package/dist/src/browser/dom-snapshot.js +76 -3
- package/dist/src/browser/dom-snapshot.test.js +65 -0
- package/dist/src/browser/extract.d.ts +69 -0
- package/dist/src/browser/extract.js +132 -0
- package/dist/src/browser/extract.test.d.ts +1 -0
- package/dist/src/browser/extract.test.js +129 -0
- package/dist/src/browser/find.d.ts +76 -0
- package/dist/src/browser/find.js +179 -0
- package/dist/src/browser/find.test.d.ts +1 -0
- package/dist/src/browser/find.test.js +120 -0
- package/dist/src/browser/html-tree.d.ts +75 -0
- package/dist/src/browser/html-tree.js +112 -0
- package/dist/src/browser/html-tree.test.d.ts +1 -0
- package/dist/src/browser/html-tree.test.js +181 -0
- package/dist/src/browser/network-cache.d.ts +48 -0
- package/dist/src/browser/network-cache.js +66 -0
- package/dist/src/browser/network-cache.test.d.ts +1 -0
- package/dist/src/browser/network-cache.test.js +58 -0
- package/dist/src/browser/network-key.d.ts +22 -0
- package/dist/src/browser/network-key.js +66 -0
- package/dist/src/browser/network-key.test.d.ts +1 -0
- package/dist/src/browser/network-key.test.js +49 -0
- package/dist/src/browser/shape-filter.d.ts +52 -0
- package/dist/src/browser/shape-filter.js +101 -0
- package/dist/src/browser/shape-filter.test.d.ts +1 -0
- package/dist/src/browser/shape-filter.test.js +101 -0
- package/dist/src/browser/shape.d.ts +23 -0
- package/dist/src/browser/shape.js +95 -0
- package/dist/src/browser/shape.test.d.ts +1 -0
- package/dist/src/browser/shape.test.js +82 -0
- package/dist/src/browser/target-errors.d.ts +14 -1
- package/dist/src/browser/target-errors.js +13 -0
- package/dist/src/browser/target-errors.test.js +39 -6
- package/dist/src/browser/target-resolver.d.ts +57 -10
- package/dist/src/browser/target-resolver.js +195 -75
- package/dist/src/browser/target-resolver.test.js +80 -5
- package/dist/src/cli.js +630 -125
- package/dist/src/cli.test.js +794 -0
- package/dist/src/execution.js +7 -2
- package/dist/src/execution.test.js +54 -0
- package/dist/src/main.js +16 -0
- package/dist/src/types.d.ts +18 -3
- package/package.json +1 -1
package/clis/jianyu/search.js
CHANGED
|
@@ -35,6 +35,10 @@ const NAVIGATION_PATH_PREFIXES = [
|
|
|
35
35
|
'/exhibition/',
|
|
36
36
|
'/swordfish/page_big_pc/search/',
|
|
37
37
|
];
|
|
38
|
+
const BLOCKED_DETAIL_PATH_PREFIXES = [
|
|
39
|
+
'/nologin/content/',
|
|
40
|
+
'/article/bdprivate/',
|
|
41
|
+
];
|
|
38
42
|
const JIANYU_API_TYPES = ['fType', 'eType', 'vType', 'mType'];
|
|
39
43
|
export function buildSearchUrl(query) {
|
|
40
44
|
const url = new URL(SEARCH_ENTRY);
|
|
@@ -74,6 +78,86 @@ function isLikelyNavigationUrl(rawUrl) {
|
|
|
74
78
|
return true;
|
|
75
79
|
}
|
|
76
80
|
}
|
|
81
|
+
function classifyDetailStatus(rawUrl) {
|
|
82
|
+
const urlText = cleanText(rawUrl);
|
|
83
|
+
if (!urlText) {
|
|
84
|
+
return {
|
|
85
|
+
detail_status: 'blocked',
|
|
86
|
+
detail_reason: 'missing_url',
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
try {
|
|
90
|
+
const parsed = new URL(urlText);
|
|
91
|
+
const path = cleanText(parsed.pathname).toLowerCase().replace(/\/+$/, '/') || '/';
|
|
92
|
+
if (BLOCKED_DETAIL_PATH_PREFIXES.some((prefix) => path.includes(prefix))) {
|
|
93
|
+
return {
|
|
94
|
+
detail_status: 'blocked',
|
|
95
|
+
detail_reason: 'verification_or_paid_wall',
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
if (isLikelyNavigationUrl(urlText)) {
|
|
99
|
+
return {
|
|
100
|
+
detail_status: 'entry_only',
|
|
101
|
+
detail_reason: 'navigation_or_profile_entry',
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
detail_status: 'ok',
|
|
106
|
+
detail_reason: path.includes('/jybx/') ? 'jybx_detail' : 'detail_candidate',
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return {
|
|
111
|
+
detail_status: 'blocked',
|
|
112
|
+
detail_reason: 'invalid_url',
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function extractNoticeId(rawUrl) {
|
|
117
|
+
const value = cleanText(rawUrl);
|
|
118
|
+
if (!value)
|
|
119
|
+
return '';
|
|
120
|
+
try {
|
|
121
|
+
const parsed = new URL(value);
|
|
122
|
+
const path = cleanText(parsed.pathname);
|
|
123
|
+
const jybxMatched = path.match(/\/jybx\/([^/?#]+)\.html$/i);
|
|
124
|
+
if (jybxMatched?.[1])
|
|
125
|
+
return cleanText(jybxMatched[1]);
|
|
126
|
+
const segments = path.split('/').filter(Boolean);
|
|
127
|
+
const tail = cleanText(segments[segments.length - 1] || '');
|
|
128
|
+
return cleanText(tail.replace(/\.html?$/i, ''));
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
return '';
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
function isWithinSinceDays(dateText, sinceDays, now = new Date()) {
|
|
135
|
+
const normalized = normalizeDate(dateText);
|
|
136
|
+
if (!normalized)
|
|
137
|
+
return false;
|
|
138
|
+
const timestamp = Date.parse(`${normalized}T00:00:00Z`);
|
|
139
|
+
if (!Number.isFinite(timestamp))
|
|
140
|
+
return false;
|
|
141
|
+
const today = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
|
|
142
|
+
const deltaDays = Math.floor((today - timestamp) / (24 * 3600 * 1000));
|
|
143
|
+
return deltaDays >= 0 && deltaDays <= sinceDays;
|
|
144
|
+
}
|
|
145
|
+
function dedupeByNoticeKey(items) {
|
|
146
|
+
const deduped = [];
|
|
147
|
+
const seen = new Set();
|
|
148
|
+
for (const item of items) {
|
|
149
|
+
const source = cleanText(item.source_id || '');
|
|
150
|
+
const notice = cleanText(item.notice_id || '');
|
|
151
|
+
const key = source && notice
|
|
152
|
+
? `${source}\t${notice}`
|
|
153
|
+
: `${cleanText(item.title)}\t${cleanText(item.url)}`;
|
|
154
|
+
if (!key || seen.has(key))
|
|
155
|
+
continue;
|
|
156
|
+
seen.add(key);
|
|
157
|
+
deduped.push(item);
|
|
158
|
+
}
|
|
159
|
+
return deduped;
|
|
160
|
+
}
|
|
77
161
|
function filterNavigationRows(query, items) {
|
|
78
162
|
const queryTokens = cleanText(query).split(/\s+/).filter(Boolean).map((token) => token.toLowerCase());
|
|
79
163
|
return items
|
|
@@ -86,6 +170,9 @@ function filterNavigationRows(query, items) {
|
|
|
86
170
|
.filter((item) => {
|
|
87
171
|
if (!item.title || !item.url)
|
|
88
172
|
return false;
|
|
173
|
+
const detailSignal = classifyDetailStatus(item.url);
|
|
174
|
+
if (detailSignal.detail_status !== 'ok')
|
|
175
|
+
return false;
|
|
89
176
|
const haystack = `${item.title} ${item.contextText}`.toLowerCase();
|
|
90
177
|
const hasQuery = queryTokens.length === 0 || queryTokens.some((token) => haystack.includes(token));
|
|
91
178
|
const hasProcurementHint = PROCUREMENT_TITLE_HINT.test(`${item.title} ${item.contextText}`);
|
|
@@ -446,11 +533,16 @@ cli({
|
|
|
446
533
|
args: [
|
|
447
534
|
{ name: 'query', required: true, positional: true, help: 'Search keyword, e.g. "procurement"' },
|
|
448
535
|
{ name: 'limit', type: 'int', default: 20, help: 'Number of results (max 50)' },
|
|
536
|
+
{ name: 'since_days', type: 'int', help: 'Only keep rows published within N days' },
|
|
449
537
|
],
|
|
450
|
-
columns: ['rank', 'content_type', 'title', '
|
|
538
|
+
columns: ['rank', 'content_type', 'title', 'published_at', 'detail_status', 'project_code', 'budget_or_limit', 'url'],
|
|
451
539
|
func: async (page, kwargs) => {
|
|
452
540
|
const query = cleanText(kwargs.query);
|
|
453
541
|
const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
|
|
542
|
+
const rawSinceDays = Number(kwargs.since_days);
|
|
543
|
+
const sinceDays = Number.isFinite(rawSinceDays) && rawSinceDays > 0
|
|
544
|
+
? Math.max(1, Math.min(rawSinceDays, 3650))
|
|
545
|
+
: null;
|
|
454
546
|
const apiResult = await fetchJianyuApiRows(page, query, limit);
|
|
455
547
|
const mergedRows = dedupeCandidates(filterNavigationRows(query, apiResult.rows));
|
|
456
548
|
const extractedRows = await searchRowsFromEntries(page, {
|
|
@@ -465,21 +557,61 @@ cli({
|
|
|
465
557
|
const indexedRows = await fetchDuckDuckGoIndexRows(query, limit);
|
|
466
558
|
const filteredIndexedRows = dedupeCandidates(filterNavigationRows(query, indexedRows));
|
|
467
559
|
if (filteredIndexedRows.length > 0) {
|
|
468
|
-
|
|
560
|
+
const records = toProcurementSearchRecords(filteredIndexedRows, {
|
|
469
561
|
site: SITE,
|
|
470
562
|
query,
|
|
471
563
|
limit,
|
|
472
564
|
});
|
|
565
|
+
const enriched = dedupeByNoticeKey(records.map((row) => {
|
|
566
|
+
const detailSignal = classifyDetailStatus(row.url);
|
|
567
|
+
const publishedAt = normalizeDate(row.publish_time || row.date);
|
|
568
|
+
return {
|
|
569
|
+
...row,
|
|
570
|
+
source_id: SITE,
|
|
571
|
+
notice_id: extractNoticeId(row.url),
|
|
572
|
+
published_at: publishedAt,
|
|
573
|
+
detail_status: detailSignal.detail_status,
|
|
574
|
+
detail_reason: detailSignal.detail_reason,
|
|
575
|
+
};
|
|
576
|
+
}))
|
|
577
|
+
.filter((row) => row.detail_status === 'ok')
|
|
578
|
+
.filter((row) => sinceDays == null || isWithinSinceDays(row.published_at, sinceDays))
|
|
579
|
+
.slice(0, limit)
|
|
580
|
+
.map((row, index) => ({
|
|
581
|
+
...row,
|
|
582
|
+
rank: index + 1,
|
|
583
|
+
}));
|
|
584
|
+
return enriched;
|
|
473
585
|
}
|
|
474
586
|
if (apiResult.challenge || await isAuthRequired(page)) {
|
|
475
587
|
throw new AuthRequiredError(DOMAIN, '[taxonomy=selector_drift] site=jianyu command=search blocked by human verification / access challenge');
|
|
476
588
|
}
|
|
477
589
|
}
|
|
478
|
-
|
|
590
|
+
const records = toProcurementSearchRecords(rows, {
|
|
479
591
|
site: SITE,
|
|
480
592
|
query,
|
|
481
593
|
limit,
|
|
482
594
|
});
|
|
595
|
+
const enriched = dedupeByNoticeKey(records.map((row) => {
|
|
596
|
+
const detailSignal = classifyDetailStatus(row.url);
|
|
597
|
+
const publishedAt = normalizeDate(row.publish_time || row.date);
|
|
598
|
+
return {
|
|
599
|
+
...row,
|
|
600
|
+
source_id: SITE,
|
|
601
|
+
notice_id: extractNoticeId(row.url),
|
|
602
|
+
published_at: publishedAt,
|
|
603
|
+
detail_status: detailSignal.detail_status,
|
|
604
|
+
detail_reason: detailSignal.detail_reason,
|
|
605
|
+
};
|
|
606
|
+
}))
|
|
607
|
+
.filter((row) => row.detail_status === 'ok')
|
|
608
|
+
.filter((row) => sinceDays == null || isWithinSinceDays(row.published_at, sinceDays))
|
|
609
|
+
.slice(0, limit)
|
|
610
|
+
.map((row, index) => ({
|
|
611
|
+
...row,
|
|
612
|
+
rank: index + 1,
|
|
613
|
+
}));
|
|
614
|
+
return enriched;
|
|
483
615
|
},
|
|
484
616
|
});
|
|
485
617
|
export const __test__ = {
|
|
@@ -494,4 +626,8 @@ export const __test__ = {
|
|
|
494
626
|
normalizeApiRow,
|
|
495
627
|
fetchJianyuApiRows,
|
|
496
628
|
collectApiRowsFromResponses,
|
|
629
|
+
classifyDetailStatus,
|
|
630
|
+
extractNoticeId,
|
|
631
|
+
isWithinSinceDays,
|
|
632
|
+
dedupeByNoticeKey,
|
|
497
633
|
};
|
|
@@ -125,4 +125,29 @@ describe('jianyu search helpers', () => {
|
|
|
125
125
|
expect(result.rows[0].title).toContain('电梯采购公告');
|
|
126
126
|
expect(result.rows[1].title).toContain('另一条电梯采购公告');
|
|
127
127
|
});
|
|
128
|
+
it('classifies nologin links as blocked detail targets', () => {
|
|
129
|
+
const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/nologin/content/ABC.html');
|
|
130
|
+
expect(signal.detail_status).toBe('blocked');
|
|
131
|
+
});
|
|
132
|
+
it('classifies accessible detail urls as ok even when they are not jybx paths', () => {
|
|
133
|
+
const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/notice/detail/123');
|
|
134
|
+
expect(signal.detail_status).toBe('ok');
|
|
135
|
+
expect(signal.detail_reason).toBe('detail_candidate');
|
|
136
|
+
});
|
|
137
|
+
it('classifies list pages as entry_only', () => {
|
|
138
|
+
const signal = __test__.classifyDetailStatus('https://www.jianyu360.cn/list/stype/ZBGG.html');
|
|
139
|
+
expect(signal.detail_status).toBe('entry_only');
|
|
140
|
+
});
|
|
141
|
+
it('extracts stable notice id from jybx urls', () => {
|
|
142
|
+
const id = __test__.extractNoticeId('https://shandong.jianyu360.cn/jybx/20260310_26030938267551.html');
|
|
143
|
+
expect(id).toBe('20260310_26030938267551');
|
|
144
|
+
});
|
|
145
|
+
it('keeps only rows inside recency window', () => {
|
|
146
|
+
const within = __test__.isWithinSinceDays('2026-03-20', 30, new Date('2026-04-09T00:00:00Z'));
|
|
147
|
+
const stale = __test__.isWithinSinceDays('2026-02-01', 30, new Date('2026-04-09T00:00:00Z'));
|
|
148
|
+
const missing = __test__.isWithinSinceDays('', 30, new Date('2026-04-09T00:00:00Z'));
|
|
149
|
+
expect(within).toBe(true);
|
|
150
|
+
expect(stale).toBe(false);
|
|
151
|
+
expect(missing).toBe(false);
|
|
152
|
+
});
|
|
128
153
|
});
|
|
@@ -7,6 +7,13 @@ const RETRYABLE_DETAIL_ERROR_PATTERNS = [
|
|
|
7
7
|
/cannot find context with specified id/i,
|
|
8
8
|
/\[taxonomy=empty_result\]/i,
|
|
9
9
|
];
|
|
10
|
+
const DETAIL_AUTH_CHALLENGE_PATTERNS = [
|
|
11
|
+
/请在下图依次点击/i,
|
|
12
|
+
/验证码/i,
|
|
13
|
+
/请完成验证/i,
|
|
14
|
+
/验证登录/i,
|
|
15
|
+
/登录即可获得更多浏览权限/i,
|
|
16
|
+
];
|
|
10
17
|
function isRetryableDetailError(error) {
|
|
11
18
|
const message = error instanceof Error
|
|
12
19
|
? cleanText(error.message)
|
|
@@ -61,6 +68,14 @@ export async function runProcurementDetail(page, { url, site, query = '', }) {
|
|
|
61
68
|
const title = cleanText(row.title);
|
|
62
69
|
const detailText = cleanText(row.detailText);
|
|
63
70
|
const publishTime = cleanText(row.publishTime);
|
|
71
|
+
const authGateText = cleanText(`${title} ${detailText}`);
|
|
72
|
+
if (DETAIL_AUTH_CHALLENGE_PATTERNS.some((pattern) => pattern.test(authGateText))) {
|
|
73
|
+
throw taxonomyError('selector_drift', {
|
|
74
|
+
site,
|
|
75
|
+
command: 'detail',
|
|
76
|
+
detail: `detail page blocked by verification challenge: ${targetUrl}`,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
64
79
|
if (!title && !detailText) {
|
|
65
80
|
throw taxonomyError('empty_result', {
|
|
66
81
|
site,
|
|
@@ -69,4 +69,16 @@ describe('procurement detail runner', () => {
|
|
|
69
69
|
})).rejects.toThrow('[taxonomy=extraction_drift]');
|
|
70
70
|
expect(attempts).toBe(1);
|
|
71
71
|
});
|
|
72
|
+
it('rejects captcha/verification pages as selector_drift', async () => {
|
|
73
|
+
const page = createPage(async () => ({
|
|
74
|
+
title: '验证码',
|
|
75
|
+
detailText: '请在下图依次点击:槨畽黛',
|
|
76
|
+
publishTime: '',
|
|
77
|
+
}));
|
|
78
|
+
await expect(runProcurementDetail(page, {
|
|
79
|
+
url: 'https://www.jianyu360.cn/nologin/content/ABC.html',
|
|
80
|
+
site: 'jianyu',
|
|
81
|
+
query: '电梯',
|
|
82
|
+
})).rejects.toThrow('[taxonomy=selector_drift]');
|
|
83
|
+
});
|
|
72
84
|
});
|
package/clis/twitter/shared.js
CHANGED
|
@@ -5,14 +5,19 @@ export function sanitizeQueryId(resolved, fallbackId) {
|
|
|
5
5
|
export async function resolveTwitterQueryId(page, operationName, fallbackId) {
|
|
6
6
|
const resolved = await page.evaluate(`async () => {
|
|
7
7
|
const operationName = ${JSON.stringify(operationName)};
|
|
8
|
+
const controller = new AbortController();
|
|
9
|
+
const timeout = setTimeout(() => controller.abort(), 5000);
|
|
8
10
|
try {
|
|
9
|
-
const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json');
|
|
11
|
+
const ghResp = await fetch('https://raw.githubusercontent.com/fa0311/twitter-openapi/refs/heads/main/src/config/placeholder.json', { signal: controller.signal });
|
|
12
|
+
clearTimeout(timeout);
|
|
10
13
|
if (ghResp.ok) {
|
|
11
14
|
const data = await ghResp.json();
|
|
12
15
|
const entry = data?.[operationName];
|
|
13
16
|
if (entry && entry.queryId) return entry.queryId;
|
|
14
17
|
}
|
|
15
|
-
} catch {
|
|
18
|
+
} catch {
|
|
19
|
+
clearTimeout(timeout);
|
|
20
|
+
}
|
|
16
21
|
try {
|
|
17
22
|
const scripts = performance.getEntriesByType('resource')
|
|
18
23
|
.filter(r => r.name.includes('client-web') && r.name.endsWith('.js'))
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
|
|
4
|
+
|
|
5
|
+
const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
|
|
6
|
+
const USER_TWEETS_QUERY_ID = '6fWQaBPK51aGyC_VC7t9GQ';
|
|
7
|
+
const USER_BY_SCREEN_NAME_QUERY_ID = 'IGgvgiOx4QZndDHuD3x9TQ';
|
|
8
|
+
|
|
9
|
+
const USER_TWEETS_FEATURES = {
|
|
10
|
+
rweb_video_screen_enabled: false,
|
|
11
|
+
payments_enabled: false,
|
|
12
|
+
profile_label_improvements_pcf_label_in_post_enabled: true,
|
|
13
|
+
rweb_tipjar_consumption_enabled: true,
|
|
14
|
+
verified_phone_label_enabled: false,
|
|
15
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
16
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
17
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
18
|
+
premium_content_api_read_enabled: false,
|
|
19
|
+
communities_web_enable_tweet_community_results_fetch: true,
|
|
20
|
+
c9s_tweet_anatomy_moderator_badge_enabled: true,
|
|
21
|
+
responsive_web_grok_analyze_button_fetch_trends_enabled: false,
|
|
22
|
+
responsive_web_grok_analyze_post_followups_enabled: true,
|
|
23
|
+
responsive_web_jetfuel_frame: true,
|
|
24
|
+
responsive_web_grok_share_attachment_enabled: true,
|
|
25
|
+
responsive_web_grok_annotations_enabled: true,
|
|
26
|
+
articles_preview_enabled: true,
|
|
27
|
+
responsive_web_edit_tweet_api_enabled: true,
|
|
28
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
29
|
+
view_counts_everywhere_api_enabled: true,
|
|
30
|
+
longform_notetweets_consumption_enabled: true,
|
|
31
|
+
responsive_web_twitter_article_tweet_consumption_enabled: true,
|
|
32
|
+
tweet_awards_web_tipping_enabled: false,
|
|
33
|
+
content_disclosure_indicator_enabled: true,
|
|
34
|
+
content_disclosure_ai_generated_indicator_enabled: true,
|
|
35
|
+
responsive_web_grok_show_grok_translated_post: false,
|
|
36
|
+
responsive_web_grok_analysis_button_from_backend: true,
|
|
37
|
+
post_ctas_fetch_enabled: false,
|
|
38
|
+
freedom_of_speech_not_reach_fetch_enabled: true,
|
|
39
|
+
standardized_nudges_misinfo: true,
|
|
40
|
+
tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
|
|
41
|
+
longform_notetweets_rich_text_read_enabled: true,
|
|
42
|
+
longform_notetweets_inline_media_enabled: true,
|
|
43
|
+
responsive_web_grok_image_annotation_enabled: true,
|
|
44
|
+
responsive_web_grok_imagine_annotation_enabled: true,
|
|
45
|
+
responsive_web_grok_community_note_auto_translation_is_enabled: false,
|
|
46
|
+
responsive_web_enhance_cards_enabled: false,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const USER_BY_SCREEN_NAME_FEATURES = {
|
|
50
|
+
hidden_profile_subscriptions_enabled: true,
|
|
51
|
+
rweb_tipjar_consumption_enabled: true,
|
|
52
|
+
responsive_web_graphql_exclude_directive_enabled: true,
|
|
53
|
+
verified_phone_label_enabled: false,
|
|
54
|
+
subscriptions_verification_info_is_identity_verified_enabled: true,
|
|
55
|
+
subscriptions_verification_info_verified_since_enabled: true,
|
|
56
|
+
highlights_tweets_tab_ui_enabled: true,
|
|
57
|
+
responsive_web_twitter_article_notes_tab_enabled: true,
|
|
58
|
+
subscriptions_feature_can_gift_premium: true,
|
|
59
|
+
creator_subscriptions_tweet_preview_api_enabled: true,
|
|
60
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
61
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
function buildUserTweetsUrl(queryId, userId, count, cursor) {
|
|
65
|
+
const vars = {
|
|
66
|
+
userId,
|
|
67
|
+
count,
|
|
68
|
+
includePromotedContent: false,
|
|
69
|
+
withQuickPromoteEligibilityTweetFields: true,
|
|
70
|
+
withVoice: true,
|
|
71
|
+
};
|
|
72
|
+
if (cursor) vars.cursor = cursor;
|
|
73
|
+
return `/i/api/graphql/${queryId}/UserTweets`
|
|
74
|
+
+ `?variables=${encodeURIComponent(JSON.stringify(vars))}`
|
|
75
|
+
+ `&features=${encodeURIComponent(JSON.stringify(USER_TWEETS_FEATURES))}`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function buildUserByScreenNameUrl(queryId, screenName) {
|
|
79
|
+
const vars = { screen_name: screenName, withSafetyModeUserFields: true };
|
|
80
|
+
return `/i/api/graphql/${queryId}/UserByScreenName`
|
|
81
|
+
+ `?variables=${encodeURIComponent(JSON.stringify(vars))}`
|
|
82
|
+
+ `&features=${encodeURIComponent(JSON.stringify(USER_BY_SCREEN_NAME_FEATURES))}`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function extractTweet(result, seen) {
|
|
86
|
+
if (!result) return null;
|
|
87
|
+
const tw = result.tweet || result;
|
|
88
|
+
const legacy = tw.legacy || {};
|
|
89
|
+
if (!tw.rest_id || seen.has(tw.rest_id)) return null;
|
|
90
|
+
seen.add(tw.rest_id);
|
|
91
|
+
const user = tw.core?.user_results?.result;
|
|
92
|
+
const screenName = user?.legacy?.screen_name || user?.core?.screen_name || 'unknown';
|
|
93
|
+
const displayName = user?.legacy?.name || user?.core?.name || '';
|
|
94
|
+
const noteText = tw.note_tweet?.note_tweet_results?.result?.text;
|
|
95
|
+
const isRetweet = Boolean(legacy.retweeted_status_result || legacy.full_text?.startsWith('RT @'));
|
|
96
|
+
return {
|
|
97
|
+
id: tw.rest_id,
|
|
98
|
+
author: screenName,
|
|
99
|
+
name: displayName,
|
|
100
|
+
text: noteText || legacy.full_text || '',
|
|
101
|
+
likes: legacy.favorite_count || 0,
|
|
102
|
+
retweets: legacy.retweet_count || 0,
|
|
103
|
+
replies: legacy.reply_count || 0,
|
|
104
|
+
views: Number(tw.views?.count) || 0,
|
|
105
|
+
is_retweet: isRetweet,
|
|
106
|
+
created_at: legacy.created_at || '',
|
|
107
|
+
url: `https://x.com/${screenName}/status/${tw.rest_id}`,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function parseUserTweets(data, seen) {
|
|
112
|
+
const tweets = [];
|
|
113
|
+
let nextCursor = null;
|
|
114
|
+
const instructions = data?.data?.user?.result?.timeline_v2?.timeline?.instructions
|
|
115
|
+
|| data?.data?.user?.result?.timeline?.timeline?.instructions
|
|
116
|
+
|| [];
|
|
117
|
+
for (const inst of instructions) {
|
|
118
|
+
if (inst.type === 'TimelinePinEntry') continue;
|
|
119
|
+
for (const entry of inst.entries || []) {
|
|
120
|
+
const content = entry.content;
|
|
121
|
+
if (content?.entryType === 'TimelineTimelineCursor' || content?.__typename === 'TimelineTimelineCursor') {
|
|
122
|
+
if (content.cursorType === 'Bottom' || content.cursorType === 'ShowMore') nextCursor = content.value;
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
if (entry.entryId?.startsWith('cursor-bottom-') || entry.entryId?.startsWith('cursor-showMore-')) {
|
|
126
|
+
nextCursor = content?.value || content?.itemContent?.value || nextCursor;
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const direct = extractTweet(content?.itemContent?.tweet_results?.result, seen);
|
|
130
|
+
if (direct) {
|
|
131
|
+
tweets.push(direct);
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
for (const item of content?.items || []) {
|
|
135
|
+
const nested = extractTweet(item.item?.itemContent?.tweet_results?.result, seen);
|
|
136
|
+
if (nested) tweets.push(nested);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return { tweets, nextCursor };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
cli({
|
|
144
|
+
site: 'twitter',
|
|
145
|
+
name: 'tweets',
|
|
146
|
+
description: "Fetch a Twitter user's most recent tweets (chronological, excludes pinned)",
|
|
147
|
+
domain: 'x.com',
|
|
148
|
+
strategy: Strategy.COOKIE,
|
|
149
|
+
browser: true,
|
|
150
|
+
args: [
|
|
151
|
+
{ name: 'username', type: 'string', positional: true, required: true, help: 'Twitter screen name (with or without @)' },
|
|
152
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
|
|
153
|
+
],
|
|
154
|
+
columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url'],
|
|
155
|
+
func: async (page, kwargs) => {
|
|
156
|
+
const limit = Math.max(1, Math.min(200, kwargs.limit || 20));
|
|
157
|
+
const username = String(kwargs.username || '').replace(/^@/, '').trim();
|
|
158
|
+
if (!username) throw new CommandExecutionError('username is required');
|
|
159
|
+
|
|
160
|
+
await page.goto('https://x.com');
|
|
161
|
+
await page.wait(3);
|
|
162
|
+
|
|
163
|
+
const ct0 = await page.evaluate(`() => {
|
|
164
|
+
return document.cookie.split(';').map(c => c.trim()).find(c => c.startsWith('ct0='))?.split('=')[1] || null;
|
|
165
|
+
}`);
|
|
166
|
+
if (!ct0) throw new AuthRequiredError('x.com', 'Not logged into x.com (no ct0 cookie)');
|
|
167
|
+
|
|
168
|
+
const userTweetsQueryId = await resolveTwitterQueryId(page, 'UserTweets', USER_TWEETS_QUERY_ID);
|
|
169
|
+
const userByScreenNameQueryId = await resolveTwitterQueryId(page, 'UserByScreenName', USER_BY_SCREEN_NAME_QUERY_ID);
|
|
170
|
+
|
|
171
|
+
const headers = JSON.stringify({
|
|
172
|
+
'Authorization': `Bearer ${decodeURIComponent(BEARER_TOKEN)}`,
|
|
173
|
+
'X-Csrf-Token': ct0,
|
|
174
|
+
'X-Twitter-Auth-Type': 'OAuth2Session',
|
|
175
|
+
'X-Twitter-Active-User': 'yes',
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const ubsUrl = buildUserByScreenNameUrl(userByScreenNameQueryId, username);
|
|
179
|
+
const userId = await page.evaluate(`async () => {
|
|
180
|
+
const resp = await fetch("${ubsUrl}", { headers: ${headers}, credentials: 'include' });
|
|
181
|
+
if (!resp.ok) return null;
|
|
182
|
+
const d = await resp.json();
|
|
183
|
+
return d?.data?.user?.result?.rest_id || null;
|
|
184
|
+
}`);
|
|
185
|
+
if (!userId) throw new CommandExecutionError(`Could not resolve @${username}`);
|
|
186
|
+
|
|
187
|
+
const seen = new Set();
|
|
188
|
+
const all = [];
|
|
189
|
+
let cursor = null;
|
|
190
|
+
for (let i = 0; i < 5 && all.length < limit; i++) {
|
|
191
|
+
const fetchCount = Math.min(100, limit - all.length + 10);
|
|
192
|
+
const url = buildUserTweetsUrl(userTweetsQueryId, userId, fetchCount, cursor);
|
|
193
|
+
const data = await page.evaluate(`async () => {
|
|
194
|
+
const r = await fetch("${url}", { headers: ${headers}, credentials: 'include' });
|
|
195
|
+
return r.ok ? await r.json() : { error: r.status };
|
|
196
|
+
}`);
|
|
197
|
+
if (data?.error) {
|
|
198
|
+
if (all.length === 0) throw new CommandExecutionError(`HTTP ${data.error}: UserTweets fetch failed — queryId may have expired`);
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
const { tweets, nextCursor } = parseUserTweets(data, seen);
|
|
202
|
+
all.push(...tweets);
|
|
203
|
+
if (!nextCursor || nextCursor === cursor) break;
|
|
204
|
+
cursor = nextCursor;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (all.length === 0) throw new EmptyResultError(`@${username} has no recent tweets`, 'Account may be private or suspended');
|
|
208
|
+
return all.slice(0, limit);
|
|
209
|
+
},
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
export const __test__ = {
|
|
213
|
+
sanitizeQueryId,
|
|
214
|
+
buildUserTweetsUrl,
|
|
215
|
+
buildUserByScreenNameUrl,
|
|
216
|
+
extractTweet,
|
|
217
|
+
parseUserTweets,
|
|
218
|
+
};
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
import { __test__ } from './tweets.js';
|
|
4
|
+
|
|
5
|
+
describe('twitter tweets helpers', () => {
|
|
6
|
+
it('registers is_retweet in the default columns', () => {
|
|
7
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
8
|
+
expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url']);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it('falls back when queryId contains unsafe characters', () => {
|
|
12
|
+
expect(__test__.sanitizeQueryId('safe_Query-123', 'fallback')).toBe('safe_Query-123');
|
|
13
|
+
expect(__test__.sanitizeQueryId('bad"id', 'fallback')).toBe('fallback');
|
|
14
|
+
expect(__test__.sanitizeQueryId('bad/id', 'fallback')).toBe('fallback');
|
|
15
|
+
expect(__test__.sanitizeQueryId(null, 'fallback')).toBe('fallback');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('builds UserTweets url with cursor and features', () => {
|
|
19
|
+
const url = __test__.buildUserTweetsUrl('query123', '42', 20, 'cursor-1');
|
|
20
|
+
expect(url).toContain('/i/api/graphql/query123/UserTweets');
|
|
21
|
+
const decoded = decodeURIComponent(url);
|
|
22
|
+
expect(decoded).toContain('"userId":"42"');
|
|
23
|
+
expect(decoded).toContain('"count":20');
|
|
24
|
+
expect(decoded).toContain('"cursor":"cursor-1"');
|
|
25
|
+
expect(decoded).toContain('longform_notetweets_consumption_enabled');
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('builds UserByScreenName url for the given handle', () => {
|
|
29
|
+
const url = __test__.buildUserByScreenNameUrl('uquery', 'jakevin7');
|
|
30
|
+
expect(url).toContain('/i/api/graphql/uquery/UserByScreenName');
|
|
31
|
+
expect(decodeURIComponent(url)).toContain('"screen_name":"jakevin7"');
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('prefers note_tweet text over legacy.full_text for long posts', () => {
|
|
35
|
+
const seen = new Set();
|
|
36
|
+
const tweet = __test__.extractTweet({
|
|
37
|
+
rest_id: '99',
|
|
38
|
+
legacy: { full_text: 'short truncated…', favorite_count: 1, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
39
|
+
note_tweet: { note_tweet_results: { result: { text: 'full long-form body' } } },
|
|
40
|
+
core: { user_results: { result: { legacy: { screen_name: 'bob', name: 'Bob' } } } },
|
|
41
|
+
views: { count: '42' },
|
|
42
|
+
}, seen);
|
|
43
|
+
expect(tweet.text).toBe('full long-form body');
|
|
44
|
+
expect(tweet.views).toBe(42);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('flags retweets via RT prefix or retweeted_status_result', () => {
|
|
48
|
+
const a = __test__.extractTweet({
|
|
49
|
+
rest_id: '1',
|
|
50
|
+
legacy: { full_text: 'RT @foo: hi', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: '' },
|
|
51
|
+
core: { user_results: { result: { legacy: { screen_name: 'u', name: 'U' } } } },
|
|
52
|
+
}, new Set());
|
|
53
|
+
expect(a.is_retweet).toBe(true);
|
|
54
|
+
|
|
55
|
+
const b = __test__.extractTweet({
|
|
56
|
+
rest_id: '2',
|
|
57
|
+
legacy: { full_text: 'hello', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: '', retweeted_status_result: { result: {} } },
|
|
58
|
+
core: { user_results: { result: { legacy: { screen_name: 'u', name: 'U' } } } },
|
|
59
|
+
}, new Set());
|
|
60
|
+
expect(b.is_retweet).toBe(true);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('parses chronological tweets and skips pinned instruction', () => {
|
|
64
|
+
const chronEntry = {
|
|
65
|
+
entryId: 'tweet-1',
|
|
66
|
+
content: {
|
|
67
|
+
itemContent: {
|
|
68
|
+
tweet_results: {
|
|
69
|
+
result: {
|
|
70
|
+
rest_id: '1',
|
|
71
|
+
legacy: { full_text: 'chronological post', favorite_count: 5, retweet_count: 1, reply_count: 2, created_at: 'now' },
|
|
72
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
73
|
+
views: { count: '100' },
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
const cursorEntry = {
|
|
80
|
+
entryId: 'cursor-bottom-1',
|
|
81
|
+
content: { entryType: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'cursor-next' },
|
|
82
|
+
};
|
|
83
|
+
const pinnedEntry = {
|
|
84
|
+
entryId: 'tweet-pinned-999',
|
|
85
|
+
content: {
|
|
86
|
+
itemContent: {
|
|
87
|
+
tweet_results: {
|
|
88
|
+
result: {
|
|
89
|
+
rest_id: '999',
|
|
90
|
+
legacy: { full_text: 'pinned post', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: 'old' },
|
|
91
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
const payload = {
|
|
98
|
+
data: {
|
|
99
|
+
user: {
|
|
100
|
+
result: {
|
|
101
|
+
timeline_v2: {
|
|
102
|
+
timeline: {
|
|
103
|
+
instructions: [
|
|
104
|
+
{ type: 'TimelinePinEntry', entries: [pinnedEntry] },
|
|
105
|
+
{ entries: [chronEntry, cursorEntry] },
|
|
106
|
+
],
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
const result = __test__.parseUserTweets(payload, new Set());
|
|
114
|
+
expect(result.nextCursor).toBe('cursor-next');
|
|
115
|
+
expect(result.tweets).toHaveLength(1);
|
|
116
|
+
expect(result.tweets[0]).toMatchObject({
|
|
117
|
+
id: '1',
|
|
118
|
+
author: 'alice',
|
|
119
|
+
text: 'chronological post',
|
|
120
|
+
likes: 5,
|
|
121
|
+
views: 100,
|
|
122
|
+
url: 'https://x.com/alice/status/1',
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
});
|