@jackwener/opencli 1.7.20 → 1.7.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli-manifest.json +233 -72
- package/clis/_shared/search-adapter.js +70 -0
- package/clis/boss/chatlist.js +96 -14
- package/clis/boss/chatlist.test.js +211 -0
- package/clis/boss/chatmsg.js +98 -24
- package/clis/boss/chatmsg.test.js +230 -0
- package/clis/boss/utils.js +240 -11
- package/clis/brave/search.js +80 -0
- package/clis/brave/search.test.js +76 -0
- package/clis/duckduckgo/search.js +131 -0
- package/clis/duckduckgo/search.test.js +128 -0
- package/clis/duckduckgo/suggest.js +45 -0
- package/clis/duckduckgo/suggest.test.js +66 -0
- package/clis/facebook/feed.js +301 -56
- package/clis/facebook/feed.test.js +169 -0
- package/clis/reddit/comment.js +0 -1
- package/clis/reddit/frontpage.js +0 -1
- package/clis/reddit/home.js +0 -1
- package/clis/reddit/popular.js +0 -1
- package/clis/reddit/read.js +0 -1
- package/clis/reddit/read.test.js +2 -2
- package/clis/reddit/save.js +0 -1
- package/clis/reddit/saved.js +0 -1
- package/clis/reddit/search.js +0 -1
- package/clis/reddit/subreddit-info.js +0 -1
- package/clis/reddit/subreddit.js +0 -1
- package/clis/reddit/subscribe.js +0 -1
- package/clis/reddit/upvote.js +0 -1
- package/clis/reddit/upvoted.js +0 -1
- package/clis/reddit/user-comments.js +0 -1
- package/clis/reddit/user-posts.js +0 -1
- package/clis/reddit/user.js +0 -1
- package/clis/reddit/whoami.js +0 -1
- package/clis/rednote/rednote.test.js +65 -0
- package/clis/rednote/search.js +11 -5
- package/clis/twitter/article.js +0 -1
- package/clis/twitter/bookmark-folder.js +0 -1
- package/clis/twitter/bookmark-folders.js +0 -1
- package/clis/twitter/bookmarks.js +0 -1
- package/clis/twitter/download.js +0 -1
- package/clis/twitter/followers.js +0 -1
- package/clis/twitter/following.js +0 -1
- package/clis/twitter/likes.js +0 -1
- package/clis/twitter/list-tweets.js +0 -1
- package/clis/twitter/lists.js +0 -1
- package/clis/twitter/notifications.js +0 -1
- package/clis/twitter/profile.js +0 -1
- package/clis/twitter/search.js +0 -1
- package/clis/twitter/thread.js +0 -1
- package/clis/twitter/timeline.js +0 -1
- package/clis/twitter/trending.js +0 -1
- package/clis/twitter/tweets.js +0 -1
- package/clis/xiaohongshu/search.js +34 -16
- package/clis/xiaohongshu/search.test.js +66 -11
- package/clis/yahoo/search.js +92 -0
- package/clis/yahoo/search.test.js +94 -0
- package/package.json +1 -1
package/clis/facebook/feed.js
CHANGED
|
@@ -1,60 +1,305 @@
|
|
|
1
|
-
import {
|
|
2
|
-
cli
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
const
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
1
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
2
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
3
|
+
|
|
4
|
+
const FACEBOOK_HOME = 'https://www.facebook.com/';
|
|
5
|
+
const MAX_LIMIT = 50;
|
|
6
|
+
|
|
7
|
+
function requireLimit(value) {
|
|
8
|
+
const n = Number(value);
|
|
9
|
+
if (!Number.isInteger(n) || n < 1 || n > MAX_LIMIT) {
|
|
10
|
+
throw new ArgumentError(`facebook feed --limit must be an integer between 1 and ${MAX_LIMIT}`);
|
|
11
|
+
}
|
|
12
|
+
return n;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function unwrapBrowserResult(value) {
|
|
16
|
+
if (value && typeof value === 'object' && 'data' in value) {
|
|
17
|
+
return value.data;
|
|
18
|
+
}
|
|
19
|
+
return value;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function buildFeedExtractScript(limit) {
|
|
23
|
+
return `(() => {
|
|
24
|
+
const limit = ${limit};
|
|
25
|
+
|
|
26
|
+
function clean(value) {
|
|
27
|
+
return String(value || '').replace(/\\s+/g, ' ').trim();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function textOf(el) {
|
|
31
|
+
return clean(el && el.textContent);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function labelOf(el) {
|
|
35
|
+
return clean(el && el.getAttribute && el.getAttribute('aria-label'));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function isAuthPage() {
|
|
39
|
+
const path = window.location && window.location.pathname ? window.location.pathname : '';
|
|
40
|
+
const body = textOf(document.body);
|
|
41
|
+
return /^\\/(login|checkpoint)(\\/|$|\\.php)/.test(path)
|
|
42
|
+
|| /^(Log in to Facebook|Facebook登录|登录 Facebook)/i.test(body)
|
|
43
|
+
|| /You must log in to continue/i.test(body);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function isExplicitEmptyFeed() {
|
|
47
|
+
const body = textOf(document.body);
|
|
48
|
+
return /No posts available|Nothing to show|暂无动态|没有更多动态|还没有帖子/i.test(body);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isSuggestionOrChrome(text) {
|
|
52
|
+
return /^(People you may know|People You May Know|可能认识的人?|你可能认识的人?)/i.test(text)
|
|
53
|
+
|| /^(Suggested for you|Suggested Groups|推荐小组|推荐内容)/i.test(text);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function isSponsored(text) {
|
|
57
|
+
return /(^|\\s)(Sponsored|赞助|广告)(\\s|$)/i.test(text);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function isActionText(text) {
|
|
61
|
+
return /^(Like|Comment|Share|Send|Follow|赞|评论|分享|发送|关注)$/i.test(text);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function isMetricText(text) {
|
|
65
|
+
return /^(All:|所有心情:)/i.test(text)
|
|
66
|
+
|| /\\b(likes?|reactions?|comments?|shares?)\\b/i.test(text)
|
|
67
|
+
|| /(条评论|次分享)$/.test(text);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function isTimestampText(text) {
|
|
71
|
+
return /^(\\d+\\s*(s|m|h|d|w|mo|yr|min|sec|second|minute|hour|day|week|month|year)s?|Just now|Yesterday|刚刚|昨天|\\d+小时|\\d+天)(\\s*[·•.])?$/i.test(text);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function postUrlFrom(root) {
|
|
75
|
+
const links = Array.from(root.querySelectorAll('a[href]'));
|
|
76
|
+
for (const link of links) {
|
|
77
|
+
const href = link.href || link.getAttribute('href') || '';
|
|
78
|
+
if (/\\/posts\\/|\\/permalink\\.php|\\/story\\.php|\\/photo\\/\\?fbid=|\\/groups\\/[^/]+\\/posts\\//i.test(href)) {
|
|
79
|
+
return href;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return '';
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function actionKinds(root) {
|
|
86
|
+
const kinds = new Set();
|
|
87
|
+
for (const el of root.querySelectorAll('[aria-label]')) {
|
|
88
|
+
const label = labelOf(el);
|
|
89
|
+
if (/^(Like|赞)$/i.test(label)) kinds.add('like');
|
|
90
|
+
if (/^(Comment|评论)$/i.test(label)) kinds.add('comment');
|
|
91
|
+
if (/^(Share|分享)$/i.test(label)) kinds.add('share');
|
|
92
|
+
}
|
|
93
|
+
return kinds;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function visibleBlocks(root) {
|
|
97
|
+
const seen = new Set();
|
|
98
|
+
return Array.from(root.querySelectorAll('[dir="auto"]'))
|
|
99
|
+
.map(textOf)
|
|
100
|
+
.filter((text) => {
|
|
101
|
+
if (!text || text.length > 600 || seen.has(text)) return false;
|
|
102
|
+
seen.add(text);
|
|
103
|
+
return true;
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function findAuthor(root) {
|
|
108
|
+
const links = [
|
|
109
|
+
root.querySelector('h2 a[href], h3 a[href], h4 a[href], strong a[href]'),
|
|
110
|
+
...Array.from(root.querySelectorAll('a[role="link"][href]')),
|
|
111
|
+
].filter(Boolean);
|
|
112
|
+
for (const link of links) {
|
|
113
|
+
const text = textOf(link);
|
|
114
|
+
const href = link.href || link.getAttribute('href') || '';
|
|
115
|
+
if (text.length > 1 && text.length <= 80
|
|
116
|
+
&& !isActionText(text)
|
|
117
|
+
&& !isMetricText(text)
|
|
118
|
+
&& !isTimestampText(text)
|
|
119
|
+
&& !/\\/groups\\/|\\/watch\\/|\\/reel\\/|\\/events\\/|\\/friends\\//i.test(href)) {
|
|
120
|
+
return text;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return '';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function contentBlocks(root, author) {
|
|
127
|
+
return visibleBlocks(root).filter((text) => {
|
|
128
|
+
if (text === author) return false;
|
|
129
|
+
if (text.length <= 10) return false;
|
|
130
|
+
if (isSuggestionOrChrome(text) || isSponsored(text)) return false;
|
|
131
|
+
if (isActionText(text) || isMetricText(text) || isTimestampText(text)) return false;
|
|
132
|
+
if (/^(See more|查看更多|更多)$/i.test(text)) return false;
|
|
133
|
+
return true;
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function extractPost(root, index) {
|
|
138
|
+
const fullText = textOf(root);
|
|
139
|
+
if (!fullText || isSuggestionOrChrome(fullText) || isSponsored(fullText)) return null;
|
|
140
|
+
|
|
141
|
+
const author = findAuthor(root);
|
|
142
|
+
const blocks = contentBlocks(root, author);
|
|
143
|
+
const content = clean(blocks.join(' '));
|
|
144
|
+
const postUrl = postUrlFrom(root);
|
|
145
|
+
const kinds = actionKinds(root);
|
|
146
|
+
|
|
147
|
+
if (!author && !content) return null;
|
|
148
|
+
if (!content && !postUrl && kinds.size < 2) return null;
|
|
149
|
+
|
|
150
|
+
const likesMatch = fullText.match(/所有心情:\\s*(\\d[\\d,.\\s万亿KMk]*)/)
|
|
151
|
+
|| fullText.match(/All:\\s*(\\d[\\d,.KMk]*)/)
|
|
152
|
+
|| fullText.match(/(\\d[\\d,.KMk]*)\\s*(?:likes?|reactions?)/i);
|
|
153
|
+
const commentsMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/)
|
|
154
|
+
|| fullText.match(/(\\d[\\d,.KMk]*)\\s*comments?/i);
|
|
155
|
+
const sharesMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/)
|
|
156
|
+
|| fullText.match(/(\\d[\\d,.KMk]*)\\s*shares?/i);
|
|
47
157
|
|
|
48
158
|
return {
|
|
49
|
-
index
|
|
159
|
+
index,
|
|
50
160
|
author: author.substring(0, 50),
|
|
51
|
-
content: content.
|
|
52
|
-
likes: likesMatch ? likesMatch[1] : '-',
|
|
53
|
-
comments: commentsMatch ? commentsMatch[1] : '-',
|
|
54
|
-
shares: sharesMatch ? sharesMatch[1] : '-',
|
|
161
|
+
content: content.substring(0, 120),
|
|
162
|
+
likes: likesMatch ? clean(likesMatch[1]) : '-',
|
|
163
|
+
comments: commentsMatch ? clean(commentsMatch[1]) : '-',
|
|
164
|
+
shares: sharesMatch ? clean(sharesMatch[1]) : '-',
|
|
55
165
|
};
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function primaryContainers() {
|
|
169
|
+
return Array.from(document.querySelectorAll('[role="article"]'))
|
|
170
|
+
.filter((el) => textOf(el).length > 30);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function fallbackContainers() {
|
|
174
|
+
const main = document.querySelector('[role="main"]');
|
|
175
|
+
if (!main) return [];
|
|
176
|
+
const buttons = Array.from(main.querySelectorAll('[aria-label="Like"], [aria-label="赞"], [aria-label="Comment"], [aria-label="评论"], [aria-label="Share"], [aria-label="分享"]'));
|
|
177
|
+
const seen = new WeakSet();
|
|
178
|
+
const containers = [];
|
|
179
|
+
for (const button of buttons) {
|
|
180
|
+
let node = button.parentElement;
|
|
181
|
+
for (let depth = 0; depth < 16 && node && node !== main && node !== document.body; depth += 1, node = node.parentElement) {
|
|
182
|
+
const text = textOf(node);
|
|
183
|
+
const kinds = actionKinds(node);
|
|
184
|
+
const blocks = visibleBlocks(node);
|
|
185
|
+
const hasPostEvidence = Boolean(postUrlFrom(node)) || blocks.some((block) => block.length > 20 && !isActionText(block) && !isMetricText(block));
|
|
186
|
+
if (text.length >= 80 && kinds.has('like') && (kinds.has('comment') || kinds.has('share')) && hasPostEvidence) {
|
|
187
|
+
if (!seen.has(node)) {
|
|
188
|
+
seen.add(node);
|
|
189
|
+
containers.push(node);
|
|
190
|
+
}
|
|
191
|
+
break;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return containers;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function dedupe(containers) {
|
|
199
|
+
const seen = new Set();
|
|
200
|
+
const result = [];
|
|
201
|
+
for (const node of containers) {
|
|
202
|
+
const key = postUrlFrom(node) || contentBlocks(node, findAuthor(node)).join('|').substring(0, 200);
|
|
203
|
+
if (!key || seen.has(key)) continue;
|
|
204
|
+
seen.add(key);
|
|
205
|
+
result.push(node);
|
|
206
|
+
}
|
|
207
|
+
return result;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (isAuthPage()) return { status: 'auth', rows: [], diagnostics: {} };
|
|
211
|
+
|
|
212
|
+
const primary = primaryContainers();
|
|
213
|
+
const combined = dedupe([...primary, ...fallbackContainers()]);
|
|
214
|
+
const rows = [];
|
|
215
|
+
for (const container of combined) {
|
|
216
|
+
const row = extractPost(container, rows.length + 1);
|
|
217
|
+
if (row) rows.push(row);
|
|
218
|
+
if (rows.length >= limit) break;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
status: rows.length ? 'ok' : (isExplicitEmptyFeed() ? 'empty' : 'no_rows'),
|
|
223
|
+
rows,
|
|
224
|
+
diagnostics: {
|
|
225
|
+
articleCount: document.querySelectorAll('[role="article"]').length,
|
|
226
|
+
primaryCount: primary.length,
|
|
227
|
+
fallbackActionCount: document.querySelectorAll('[role="main"] [aria-label="Like"], [role="main"] [aria-label="赞"], [role="main"] [aria-label="Comment"], [role="main"] [aria-label="评论"]').length,
|
|
228
|
+
mainTextLength: textOf(document.querySelector('[role="main"]')).length,
|
|
229
|
+
},
|
|
230
|
+
};
|
|
231
|
+
})()`;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
async function getFacebookFeed(page, kwargs) {
|
|
235
|
+
const limit = requireLimit(kwargs.limit ?? 10);
|
|
236
|
+
try {
|
|
237
|
+
await page.goto(FACEBOOK_HOME, { settleMs: 4000 });
|
|
238
|
+
} catch (err) {
|
|
239
|
+
throw new CommandExecutionError(
|
|
240
|
+
`Failed to navigate to facebook feed: ${err instanceof Error ? err.message : err}`,
|
|
241
|
+
'Check that facebook.com is reachable and the browser extension is connected.',
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
let payload;
|
|
246
|
+
try {
|
|
247
|
+
payload = unwrapBrowserResult(await page.evaluate(buildFeedExtractScript(limit)));
|
|
248
|
+
} catch (err) {
|
|
249
|
+
throw new CommandExecutionError(
|
|
250
|
+
`Failed to read facebook feed: ${err instanceof Error ? err.message : err}`,
|
|
251
|
+
'Facebook may not have rendered or the feed markup may have changed.',
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if (!payload || typeof payload !== 'object' || !Array.isArray(payload.rows)) {
|
|
256
|
+
throw new CommandExecutionError('facebook feed returned malformed extraction payload');
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (payload.status === 'auth') {
|
|
260
|
+
throw new AuthRequiredError('www.facebook.com', 'Open Chrome and log in to Facebook before retrying.');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (payload.rows.length > 0) {
|
|
264
|
+
return payload.rows;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (payload.status === 'empty') {
|
|
268
|
+
throw new EmptyResultError('facebook feed', 'Facebook did not show any feed posts for this account.');
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const diagnostics = payload.diagnostics || {};
|
|
272
|
+
if (diagnostics.articleCount || diagnostics.fallbackActionCount || diagnostics.mainTextLength > 200) {
|
|
273
|
+
throw new CommandExecutionError(
|
|
274
|
+
'facebook feed page rendered but no feed rows could be extracted',
|
|
275
|
+
`Diagnostics: articles=${diagnostics.articleCount || 0}, actions=${diagnostics.fallbackActionCount || 0}, mainTextLength=${diagnostics.mainTextLength || 0}.`,
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
throw new EmptyResultError('facebook feed', 'No Facebook feed content was visible in the current browser session.');
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const command = {
|
|
283
|
+
site: 'facebook',
|
|
284
|
+
name: 'feed',
|
|
285
|
+
access: 'read',
|
|
286
|
+
description: 'Get your Facebook news feed',
|
|
287
|
+
domain: 'www.facebook.com',
|
|
288
|
+
strategy: Strategy.COOKIE,
|
|
289
|
+
browser: true,
|
|
290
|
+
navigateBefore: false,
|
|
291
|
+
args: [
|
|
292
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Number of posts' },
|
|
293
|
+
],
|
|
294
|
+
columns: ['index', 'author', 'content', 'likes', 'comments', 'shares'],
|
|
295
|
+
func: getFacebookFeed,
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
cli(command);
|
|
299
|
+
|
|
300
|
+
export const __test__ = {
|
|
301
|
+
buildFeedExtractScript,
|
|
302
|
+
command,
|
|
303
|
+
getFacebookFeed,
|
|
304
|
+
requireLimit,
|
|
305
|
+
};
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { JSDOM } from 'jsdom';
|
|
3
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
4
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
5
|
+
import { __test__ } from './feed.js';
|
|
6
|
+
|
|
7
|
+
function runExtract(html, limit = 10, url = 'https://www.facebook.com/') {
|
|
8
|
+
const dom = new JSDOM(html, { url });
|
|
9
|
+
return Function('window', 'document', `return ${__test__.buildFeedExtractScript(limit)};`)(dom.window, dom.window.document);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function createPage(payload) {
|
|
13
|
+
return {
|
|
14
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
15
|
+
evaluate: vi.fn().mockResolvedValue(payload),
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe('facebook feed', () => {
|
|
20
|
+
it('registers the feed command with the existing row contract', () => {
|
|
21
|
+
const cmd = getRegistry().get('facebook/feed');
|
|
22
|
+
expect(cmd).toBeDefined();
|
|
23
|
+
expect(cmd.columns).toEqual(['index', 'author', 'content', 'likes', 'comments', 'shares']);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('extracts existing role=article feed rows', () => {
|
|
27
|
+
const payload = runExtract(`
|
|
28
|
+
<main role="main">
|
|
29
|
+
<div role="article">
|
|
30
|
+
<h2><a href="https://www.facebook.com/alice">Alice Example</a></h2>
|
|
31
|
+
<div dir="auto">This is a normal Facebook feed post with enough text to extract.</div>
|
|
32
|
+
<span>All: 12</span>
|
|
33
|
+
<span>3 comments</span>
|
|
34
|
+
<span>2 shares</span>
|
|
35
|
+
<div aria-label="Like"></div><div aria-label="Comment"></div>
|
|
36
|
+
</div>
|
|
37
|
+
</main>
|
|
38
|
+
`);
|
|
39
|
+
|
|
40
|
+
expect(payload.status).toBe('ok');
|
|
41
|
+
expect(payload.rows).toEqual([{
|
|
42
|
+
index: 1,
|
|
43
|
+
author: 'Alice Example',
|
|
44
|
+
content: 'This is a normal Facebook feed post with enough text to extract.',
|
|
45
|
+
likes: '12',
|
|
46
|
+
comments: '3',
|
|
47
|
+
shares: '2',
|
|
48
|
+
}]);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('falls back from empty article nodes to action-bounded feed containers', () => {
|
|
52
|
+
const payload = runExtract(`
|
|
53
|
+
<main role="main">
|
|
54
|
+
<div role="article"></div>
|
|
55
|
+
<section>
|
|
56
|
+
<div>
|
|
57
|
+
<h2><a href="https://www.facebook.com/bob/posts/123">Bob Builder</a></h2>
|
|
58
|
+
<div dir="auto">Fallback post body from a Facebook feed card with empty article text.</div>
|
|
59
|
+
<a href="https://www.facebook.com/bob/posts/123">Permalink</a>
|
|
60
|
+
<span>All: 1.2K</span>
|
|
61
|
+
<span>4 comments</span>
|
|
62
|
+
<span>1 shares</span>
|
|
63
|
+
<div><button aria-label="Like">Like</button><button aria-label="Comment">Comment</button></div>
|
|
64
|
+
</div>
|
|
65
|
+
</section>
|
|
66
|
+
</main>
|
|
67
|
+
`);
|
|
68
|
+
|
|
69
|
+
expect(payload.status).toBe('ok');
|
|
70
|
+
expect(payload.rows).toEqual([{
|
|
71
|
+
index: 1,
|
|
72
|
+
author: 'Bob Builder',
|
|
73
|
+
content: 'Fallback post body from a Facebook feed card with empty article text.',
|
|
74
|
+
likes: '1.2K',
|
|
75
|
+
comments: '4',
|
|
76
|
+
shares: '1',
|
|
77
|
+
}]);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it('does not turn suggestions or side chrome action buttons into feed rows', () => {
|
|
81
|
+
const payload = runExtract(`
|
|
82
|
+
<main role="main">
|
|
83
|
+
<aside>
|
|
84
|
+
<h2>People you may know</h2>
|
|
85
|
+
<div dir="auto">Charlie Suggested</div>
|
|
86
|
+
<div dir="auto">Add friend from suggested people card with plenty of text.</div>
|
|
87
|
+
<button aria-label="Like">Like</button>
|
|
88
|
+
<button aria-label="Comment">Comment</button>
|
|
89
|
+
</aside>
|
|
90
|
+
<nav>
|
|
91
|
+
<div dir="auto">Navigation item with a Like button but not a feed post.</div>
|
|
92
|
+
<button aria-label="Like">Like</button>
|
|
93
|
+
<button aria-label="Comment">Comment</button>
|
|
94
|
+
</nav>
|
|
95
|
+
</main>
|
|
96
|
+
`);
|
|
97
|
+
|
|
98
|
+
expect(payload.status).toBe('no_rows');
|
|
99
|
+
expect(payload.rows).toEqual([]);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('still considers bounded fallback rows when article nodes are suggestion chrome', () => {
|
|
103
|
+
const payload = runExtract(`
|
|
104
|
+
<main role="main">
|
|
105
|
+
<div role="article">
|
|
106
|
+
<h2>People you may know</h2>
|
|
107
|
+
<div dir="auto">Suggested profile card with enough text to look article-like.</div>
|
|
108
|
+
<button aria-label="Like">Like</button>
|
|
109
|
+
<button aria-label="Comment">Comment</button>
|
|
110
|
+
</div>
|
|
111
|
+
<section>
|
|
112
|
+
<div>
|
|
113
|
+
<h2><a href="https://www.facebook.com/dana/posts/456">Dana Poster</a></h2>
|
|
114
|
+
<div dir="auto">Fallback feed post should still be extracted after suggestion articles are filtered.</div>
|
|
115
|
+
<a href="https://www.facebook.com/dana/posts/456">Permalink</a>
|
|
116
|
+
<button aria-label="Like">Like</button>
|
|
117
|
+
<button aria-label="Comment">Comment</button>
|
|
118
|
+
</div>
|
|
119
|
+
</section>
|
|
120
|
+
</main>
|
|
121
|
+
`, 1);
|
|
122
|
+
|
|
123
|
+
expect(payload.status).toBe('ok');
|
|
124
|
+
expect(payload.rows).toEqual([{
|
|
125
|
+
index: 1,
|
|
126
|
+
author: 'Dana Poster',
|
|
127
|
+
content: 'Fallback feed post should still be extracted after suggestion articles are filtered.',
|
|
128
|
+
likes: '-',
|
|
129
|
+
comments: '-',
|
|
130
|
+
shares: '-',
|
|
131
|
+
}]);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('reports auth pages from the browser extractor', () => {
|
|
135
|
+
const payload = runExtract('<main role="main">Log in to Facebook</main>', 10, 'https://www.facebook.com/login/');
|
|
136
|
+
expect(payload.status).toBe('auth');
|
|
137
|
+
expect(payload.rows).toEqual([]);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('validates limit before browser navigation', async () => {
|
|
141
|
+
const page = createPage({ status: 'ok', rows: [] });
|
|
142
|
+
await expect(__test__.command.func(page, { limit: 0 })).rejects.toBeInstanceOf(ArgumentError);
|
|
143
|
+
expect(page.goto).not.toHaveBeenCalled();
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('maps browser envelopes and returns extracted rows', async () => {
|
|
147
|
+
const page = createPage({ session: 'site:facebook', data: { status: 'ok', rows: [{ index: 1, author: 'A', content: 'Body', likes: '-', comments: '-', shares: '-' }] } });
|
|
148
|
+
|
|
149
|
+
await expect(__test__.command.func(page, { limit: 1 })).resolves.toEqual([{
|
|
150
|
+
index: 1,
|
|
151
|
+
author: 'A',
|
|
152
|
+
content: 'Body',
|
|
153
|
+
likes: '-',
|
|
154
|
+
comments: '-',
|
|
155
|
+
shares: '-',
|
|
156
|
+
}]);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('maps auth, real empty, parser drift, and malformed payloads to typed errors', async () => {
|
|
160
|
+
await expect(__test__.command.func(createPage({ status: 'auth', rows: [] }), { limit: 1 }))
|
|
161
|
+
.rejects.toBeInstanceOf(AuthRequiredError);
|
|
162
|
+
await expect(__test__.command.func(createPage({ status: 'empty', rows: [] }), { limit: 1 }))
|
|
163
|
+
.rejects.toBeInstanceOf(EmptyResultError);
|
|
164
|
+
await expect(__test__.command.func(createPage({ status: 'no_rows', rows: [], diagnostics: { articleCount: 1, fallbackActionCount: 2, mainTextLength: 500 } }), { limit: 1 }))
|
|
165
|
+
.rejects.toBeInstanceOf(CommandExecutionError);
|
|
166
|
+
await expect(__test__.command.func(createPage({ rows: null }), { limit: 1 }))
|
|
167
|
+
.rejects.toBeInstanceOf(CommandExecutionError);
|
|
168
|
+
});
|
|
169
|
+
});
|
package/clis/reddit/comment.js
CHANGED
|
@@ -8,7 +8,6 @@ cli({
|
|
|
8
8
|
domain: 'reddit.com',
|
|
9
9
|
strategy: Strategy.COOKIE,
|
|
10
10
|
browser: true,
|
|
11
|
-
siteSession: 'persistent',
|
|
12
11
|
args: [
|
|
13
12
|
{ name: 'post-id', type: 'string', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or fullname (t3_xxx)' },
|
|
14
13
|
{ name: 'text', type: 'string', required: true, positional: true, help: 'Comment text' },
|
package/clis/reddit/frontpage.js
CHANGED
package/clis/reddit/home.js
CHANGED
package/clis/reddit/popular.js
CHANGED
package/clis/reddit/read.js
CHANGED
|
@@ -105,7 +105,6 @@ cli({
|
|
|
105
105
|
domain: 'reddit.com',
|
|
106
106
|
strategy: Strategy.COOKIE,
|
|
107
107
|
browser: true,
|
|
108
|
-
siteSession: 'persistent',
|
|
109
108
|
args: [
|
|
110
109
|
{ name: 'post-id', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or full URL' },
|
|
111
110
|
{ name: 'sort', default: 'best', help: 'Comment sort: best, top, new, controversial, old, qa' },
|
package/clis/reddit/read.test.js
CHANGED
|
@@ -78,9 +78,9 @@ function makeRuntimePage(fetchImpl) {
|
|
|
78
78
|
describe('reddit read adapter', () => {
|
|
79
79
|
const command = getRegistry().get('reddit/read');
|
|
80
80
|
|
|
81
|
-
it('
|
|
81
|
+
it('uses an ephemeral Reddit site tab by default', () => {
|
|
82
82
|
expect(command?.browser).toBe(true);
|
|
83
|
-
expect(command?.siteSession).
|
|
83
|
+
expect(command?.siteSession).toBeUndefined();
|
|
84
84
|
expect(command?.columns).toEqual(['type', 'author', 'score', 'text']);
|
|
85
85
|
});
|
|
86
86
|
|
package/clis/reddit/save.js
CHANGED
|
@@ -8,7 +8,6 @@ cli({
|
|
|
8
8
|
domain: 'reddit.com',
|
|
9
9
|
strategy: Strategy.COOKIE,
|
|
10
10
|
browser: true,
|
|
11
|
-
siteSession: 'persistent',
|
|
12
11
|
args: [
|
|
13
12
|
{ name: 'post-id', type: 'string', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or fullname (t3_xxx)' },
|
|
14
13
|
{ name: 'undo', type: 'boolean', default: false, help: 'Unsave instead of save' },
|
package/clis/reddit/saved.js
CHANGED
package/clis/reddit/search.js
CHANGED
package/clis/reddit/subreddit.js
CHANGED
package/clis/reddit/subscribe.js
CHANGED
|
@@ -8,7 +8,6 @@ cli({
|
|
|
8
8
|
domain: 'reddit.com',
|
|
9
9
|
strategy: Strategy.COOKIE,
|
|
10
10
|
browser: true,
|
|
11
|
-
siteSession: 'persistent',
|
|
12
11
|
args: [
|
|
13
12
|
{ name: 'subreddit', type: 'string', required: true, positional: true, help: 'Subreddit name (e.g. python)' },
|
|
14
13
|
{ name: 'undo', type: 'boolean', default: false, help: 'Unsubscribe instead of subscribe' },
|
package/clis/reddit/upvote.js
CHANGED
|
@@ -8,7 +8,6 @@ cli({
|
|
|
8
8
|
domain: 'reddit.com',
|
|
9
9
|
strategy: Strategy.COOKIE,
|
|
10
10
|
browser: true,
|
|
11
|
-
siteSession: 'persistent',
|
|
12
11
|
args: [
|
|
13
12
|
{ name: 'post-id', type: 'string', required: true, positional: true, help: 'Post ID (e.g. 1abc123) or fullname (t3_xxx)' },
|
|
14
13
|
{ name: 'direction', type: 'string', default: 'up', help: 'Vote direction: up, down, none' },
|