@jackwener/opencli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +151 -75
- package/README.md +11 -8
- package/README.zh-CN.md +11 -8
- package/SKILL.md +42 -15
- package/dist/browser.d.ts +11 -1
- package/dist/browser.js +95 -3
- package/dist/clis/bilibili/dynamic.d.ts +1 -0
- package/dist/clis/bilibili/dynamic.js +33 -0
- package/dist/clis/bilibili/ranking.d.ts +1 -0
- package/dist/clis/bilibili/ranking.js +24 -0
- package/dist/clis/bilibili/subtitle.d.ts +1 -0
- package/dist/clis/bilibili/subtitle.js +86 -0
- package/dist/clis/reddit/frontpage.yaml +30 -0
- package/dist/clis/reddit/hot.yaml +3 -2
- package/dist/clis/reddit/search.yaml +34 -0
- package/dist/clis/reddit/subreddit.yaml +39 -0
- package/dist/clis/twitter/bookmarks.yaml +85 -0
- package/dist/clis/twitter/profile.d.ts +1 -0
- package/dist/clis/twitter/profile.js +56 -0
- package/dist/clis/twitter/search.d.ts +1 -0
- package/dist/clis/twitter/search.js +60 -0
- package/dist/clis/twitter/timeline.d.ts +1 -0
- package/dist/clis/twitter/timeline.js +47 -0
- package/dist/clis/xiaohongshu/user.d.ts +1 -0
- package/dist/clis/xiaohongshu/user.js +40 -0
- package/dist/clis/xueqiu/feed.yaml +53 -0
- package/dist/clis/xueqiu/hot-stock.yaml +49 -0
- package/dist/clis/xueqiu/hot.yaml +46 -0
- package/dist/clis/xueqiu/search.yaml +53 -0
- package/dist/clis/xueqiu/stock.yaml +67 -0
- package/dist/clis/xueqiu/watchlist.yaml +46 -0
- package/dist/clis/zhihu/hot.yaml +6 -2
- package/dist/clis/zhihu/search.yaml +3 -1
- package/dist/engine.d.ts +1 -1
- package/dist/engine.js +9 -1
- package/dist/explore.js +50 -0
- package/dist/main.d.ts +1 -1
- package/dist/main.js +12 -5
- package/dist/pipeline/steps/browser.js +4 -8
- package/dist/pipeline/steps/fetch.js +19 -6
- package/dist/pipeline/steps/intercept.js +56 -29
- package/dist/pipeline/steps/tap.js +8 -6
- package/dist/pipeline/template.js +3 -1
- package/dist/pipeline/template.test.js +6 -0
- package/dist/types.d.ts +11 -1
- package/package.json +1 -1
- package/src/browser.ts +101 -6
- package/src/clis/bilibili/dynamic.ts +34 -0
- package/src/clis/bilibili/ranking.ts +25 -0
- package/src/clis/bilibili/subtitle.ts +100 -0
- package/src/clis/reddit/frontpage.yaml +30 -0
- package/src/clis/reddit/hot.yaml +3 -2
- package/src/clis/reddit/search.yaml +34 -0
- package/src/clis/reddit/subreddit.yaml +39 -0
- package/src/clis/twitter/bookmarks.yaml +85 -0
- package/src/clis/twitter/profile.ts +61 -0
- package/src/clis/twitter/search.ts +65 -0
- package/src/clis/twitter/timeline.ts +50 -0
- package/src/clis/xiaohongshu/user.ts +45 -0
- package/src/clis/xueqiu/feed.yaml +53 -0
- package/src/clis/xueqiu/hot-stock.yaml +49 -0
- package/src/clis/xueqiu/hot.yaml +46 -0
- package/src/clis/xueqiu/search.yaml +53 -0
- package/src/clis/xueqiu/stock.yaml +67 -0
- package/src/clis/xueqiu/watchlist.yaml +46 -0
- package/src/clis/zhihu/hot.yaml +6 -2
- package/src/clis/zhihu/search.yaml +3 -1
- package/src/engine.ts +10 -1
- package/src/explore.ts +51 -0
- package/src/main.ts +11 -5
- package/src/pipeline/steps/browser.ts +4 -7
- package/src/pipeline/steps/fetch.ts +22 -6
- package/src/pipeline/steps/intercept.ts +58 -28
- package/src/pipeline/steps/tap.ts +8 -6
- package/src/pipeline/template.test.ts +6 -0
- package/src/pipeline/template.ts +3 -1
- package/src/types.ts +4 -1
- package/dist/clis/index.d.ts +0 -22
- package/dist/clis/index.js +0 -34
- package/src/clis/index.ts +0 -46
|
@@ -36,6 +36,8 @@ export async function stepTap(page, params, data, args) {
|
|
|
36
36
|
async () => {
|
|
37
37
|
// ── 1. Setup capture proxy (fetch + XHR dual interception) ──
|
|
38
38
|
let captured = null;
|
|
39
|
+
let captureResolve;
|
|
40
|
+
const capturePromise = new Promise(r => { captureResolve = r; });
|
|
39
41
|
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
40
42
|
|
|
41
43
|
// Intercept fetch API
|
|
@@ -46,7 +48,7 @@ export async function stepTap(page, params, data, args) {
|
|
|
46
48
|
const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
|
|
47
49
|
: fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
|
|
48
50
|
if (capturePattern && url.includes(capturePattern) && !captured) {
|
|
49
|
-
try { captured = await resp.clone().json(); } catch {}
|
|
51
|
+
try { captured = await resp.clone().json(); captureResolve(); } catch {}
|
|
50
52
|
}
|
|
51
53
|
} catch {}
|
|
52
54
|
return resp;
|
|
@@ -65,13 +67,13 @@ export async function stepTap(page, params, data, args) {
|
|
|
65
67
|
const origHandler = xhr.onreadystatechange;
|
|
66
68
|
xhr.onreadystatechange = function() {
|
|
67
69
|
if (xhr.readyState === 4 && !captured) {
|
|
68
|
-
try { captured = JSON.parse(xhr.responseText); } catch {}
|
|
70
|
+
try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {}
|
|
69
71
|
}
|
|
70
72
|
if (origHandler) origHandler.apply(this, arguments);
|
|
71
73
|
};
|
|
72
74
|
const origOnload = xhr.onload;
|
|
73
75
|
xhr.onload = function() {
|
|
74
|
-
if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
|
|
76
|
+
if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} }
|
|
75
77
|
if (origOnload) origOnload.apply(this, arguments);
|
|
76
78
|
};
|
|
77
79
|
}
|
|
@@ -111,9 +113,9 @@ export async function stepTap(page, params, data, args) {
|
|
|
111
113
|
await ${actionCall};
|
|
112
114
|
|
|
113
115
|
// ── 4. Wait for network response ──
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
await
|
|
116
|
+
if (!captured) {
|
|
117
|
+
const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000));
|
|
118
|
+
await Promise.race([capturePromise, timeoutPromise]);
|
|
117
119
|
}
|
|
118
120
|
} finally {
|
|
119
121
|
// ── 5. Always restore originals ──
|
|
@@ -68,7 +68,7 @@ export function evalExpr(expr, ctx) {
|
|
|
68
68
|
* Apply a named filter to a value.
|
|
69
69
|
* Supported filters:
|
|
70
70
|
* default(val), join(sep), upper, lower, truncate(n), trim,
|
|
71
|
-
* replace(old,new), keys, length, first, last
|
|
71
|
+
* replace(old,new), keys, length, first, last, json
|
|
72
72
|
*/
|
|
73
73
|
function applyFilter(filterExpr, value) {
|
|
74
74
|
const match = filterExpr.match(/^(\w+)(?:\((.+)\))?$/);
|
|
@@ -112,6 +112,8 @@ function applyFilter(filterExpr, value) {
|
|
|
112
112
|
return Array.isArray(value) ? value[0] : value;
|
|
113
113
|
case 'last':
|
|
114
114
|
return Array.isArray(value) ? value[value.length - 1] : value;
|
|
115
|
+
case 'json':
|
|
116
|
+
return JSON.stringify(value ?? null);
|
|
115
117
|
default:
|
|
116
118
|
return value;
|
|
117
119
|
}
|
|
@@ -72,6 +72,12 @@ describe('evalExpr', () => {
|
|
|
72
72
|
it('applies length filter', () => {
|
|
73
73
|
expect(evalExpr('item.items | length', { item: { items: [1, 2, 3] } })).toBe(3);
|
|
74
74
|
});
|
|
75
|
+
it('applies json filter to strings with quotes', () => {
|
|
76
|
+
expect(evalExpr('args.keyword | json', { args: { keyword: "O'Reilly" } })).toBe('"O\'Reilly"');
|
|
77
|
+
});
|
|
78
|
+
it('applies json filter to nullish values', () => {
|
|
79
|
+
expect(evalExpr('args.keyword | json', { args: {} })).toBe('null');
|
|
80
|
+
});
|
|
75
81
|
});
|
|
76
82
|
describe('render', () => {
|
|
77
83
|
it('renders full expression', () => {
|
package/dist/types.d.ts
CHANGED
|
@@ -16,7 +16,11 @@ export interface IPage {
|
|
|
16
16
|
click(ref: string): Promise<void>;
|
|
17
17
|
typeText(ref: string, text: string): Promise<void>;
|
|
18
18
|
pressKey(key: string): Promise<void>;
|
|
19
|
-
wait(
|
|
19
|
+
wait(options: number | {
|
|
20
|
+
text?: string;
|
|
21
|
+
time?: number;
|
|
22
|
+
timeout?: number;
|
|
23
|
+
}): Promise<void>;
|
|
20
24
|
tabs(): Promise<any>;
|
|
21
25
|
closeTab(index?: number): Promise<void>;
|
|
22
26
|
newTab(): Promise<void>;
|
|
@@ -24,4 +28,10 @@ export interface IPage {
|
|
|
24
28
|
networkRequests(includeStatic?: boolean): Promise<any>;
|
|
25
29
|
consoleMessages(level?: string): Promise<any>;
|
|
26
30
|
scroll(direction?: string, amount?: number): Promise<void>;
|
|
31
|
+
autoScroll(options?: {
|
|
32
|
+
times?: number;
|
|
33
|
+
delayMs?: number;
|
|
34
|
+
}): Promise<void>;
|
|
35
|
+
installInterceptor(pattern: string): Promise<void>;
|
|
36
|
+
getInterceptedRequests(): Promise<any[]>;
|
|
27
37
|
}
|
package/package.json
CHANGED
package/src/browser.ts
CHANGED
|
@@ -104,8 +104,13 @@ export class Page implements IPage {
|
|
|
104
104
|
await this.call('tools/call', { name: 'browser_press_key', arguments: { key } });
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
-
async wait(
|
|
108
|
-
|
|
107
|
+
async wait(options: number | { text?: string; time?: number; timeout?: number }): Promise<void> {
|
|
108
|
+
if (typeof options === 'number') {
|
|
109
|
+
await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } });
|
|
110
|
+
} else {
|
|
111
|
+
// Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling
|
|
112
|
+
await this.call('tools/call', { name: 'browser_wait_for', arguments: options });
|
|
113
|
+
}
|
|
109
114
|
}
|
|
110
115
|
|
|
111
116
|
async tabs(): Promise<any> {
|
|
@@ -135,6 +140,91 @@ export class Page implements IPage {
|
|
|
135
140
|
async scroll(direction: string = 'down', amount: number = 500): Promise<void> {
|
|
136
141
|
await this.call('tools/call', { name: 'browser_press_key', arguments: { key: direction === 'down' ? 'PageDown' : 'PageUp' } });
|
|
137
142
|
}
|
|
143
|
+
|
|
144
|
+
async autoScroll(options: { times?: number; delayMs?: number } = {}): Promise<void> {
|
|
145
|
+
const times = options.times ?? 3;
|
|
146
|
+
const delayMs = options.delayMs ?? 2000;
|
|
147
|
+
const js = `
|
|
148
|
+
async () => {
|
|
149
|
+
const maxTimes = ${times};
|
|
150
|
+
const maxWaitMs = ${delayMs};
|
|
151
|
+
for (let i = 0; i < maxTimes; i++) {
|
|
152
|
+
const lastHeight = document.body.scrollHeight;
|
|
153
|
+
window.scrollTo(0, lastHeight);
|
|
154
|
+
await new Promise(resolve => {
|
|
155
|
+
let timeoutId;
|
|
156
|
+
const observer = new MutationObserver(() => {
|
|
157
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
158
|
+
clearTimeout(timeoutId);
|
|
159
|
+
observer.disconnect();
|
|
160
|
+
setTimeout(resolve, 100); // Small debounce for rendering
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
observer.observe(document.body, { childList: true, subtree: true });
|
|
164
|
+
timeoutId = setTimeout(() => {
|
|
165
|
+
observer.disconnect();
|
|
166
|
+
resolve(null);
|
|
167
|
+
}, maxWaitMs);
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
`;
|
|
172
|
+
await this.evaluate(js);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async installInterceptor(pattern: string): Promise<void> {
|
|
176
|
+
const js = `
|
|
177
|
+
() => {
|
|
178
|
+
window.__opencli_xhr = window.__opencli_xhr || [];
|
|
179
|
+
window.__opencli_patterns = window.__opencli_patterns || [];
|
|
180
|
+
if (!window.__opencli_patterns.includes('${pattern}')) {
|
|
181
|
+
window.__opencli_patterns.push('${pattern}');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (!window.__patched_xhr) {
|
|
185
|
+
const checkMatch = (url) => window.__opencli_patterns.some(p => url.includes(p));
|
|
186
|
+
|
|
187
|
+
const XHR = XMLHttpRequest.prototype;
|
|
188
|
+
const open = XHR.open;
|
|
189
|
+
const send = XHR.send;
|
|
190
|
+
XHR.open = function(method, url) {
|
|
191
|
+
this._url = url;
|
|
192
|
+
return open.call(this, method, url, ...Array.prototype.slice.call(arguments, 2));
|
|
193
|
+
};
|
|
194
|
+
XHR.send = function() {
|
|
195
|
+
this.addEventListener('load', function() {
|
|
196
|
+
if (checkMatch(this._url)) {
|
|
197
|
+
try { window.__opencli_xhr.push({url: this._url, data: JSON.parse(this.responseText)}); } catch(e){}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
return send.apply(this, arguments);
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
const origFetch = window.fetch;
|
|
204
|
+
window.fetch = async function(...args) {
|
|
205
|
+
let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
|
|
206
|
+
const res = await origFetch.apply(this, args);
|
|
207
|
+
setTimeout(async () => {
|
|
208
|
+
try {
|
|
209
|
+
if (checkMatch(u)) {
|
|
210
|
+
const clone = res.clone();
|
|
211
|
+
const j = await clone.json();
|
|
212
|
+
window.__opencli_xhr.push({url: u, data: j});
|
|
213
|
+
}
|
|
214
|
+
} catch(e) {}
|
|
215
|
+
}, 0);
|
|
216
|
+
return res;
|
|
217
|
+
};
|
|
218
|
+
window.__patched_xhr = true;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
`;
|
|
222
|
+
await this.evaluate(js);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async getInterceptedRequests(): Promise<any[]> {
|
|
226
|
+
return (await this.evaluate('() => window.__opencli_xhr')) || [];
|
|
227
|
+
}
|
|
138
228
|
}
|
|
139
229
|
|
|
140
230
|
/**
|
|
@@ -158,10 +248,15 @@ export class PlaywrightMCP {
|
|
|
158
248
|
return new Promise<Page>((resolve, reject) => {
|
|
159
249
|
const timer = setTimeout(() => reject(new Error(`Timed out connecting to browser (${timeout}s)`)), timeout * 1000);
|
|
160
250
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
251
|
+
const mcpArgs = [mcpPath, '--extension'];
|
|
252
|
+
if (process.env.OPENCLI_BROWSER_EXECUTABLE_PATH) {
|
|
253
|
+
mcpArgs.push('--executablePath', process.env.OPENCLI_BROWSER_EXECUTABLE_PATH);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
this._proc = spawn('node', mcpArgs, {
|
|
257
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
258
|
+
env: { ...process.env, ...(process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN ? { PLAYWRIGHT_MCP_EXTENSION_TOKEN: process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN } : {}) },
|
|
259
|
+
});
|
|
165
260
|
|
|
166
261
|
// Increase max listeners to avoid warnings
|
|
167
262
|
this._proc.setMaxListeners(20);
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { apiGet } from '../../bilibili.js';
|
|
3
|
+
|
|
4
|
+
cli({
|
|
5
|
+
site: 'bilibili',
|
|
6
|
+
name: 'dynamic',
|
|
7
|
+
description: 'Get Bilibili user dynamic feed',
|
|
8
|
+
domain: 'www.bilibili.com',
|
|
9
|
+
strategy: Strategy.COOKIE,
|
|
10
|
+
args: [
|
|
11
|
+
{ name: 'limit', type: 'int', default: 15 },
|
|
12
|
+
],
|
|
13
|
+
columns: ['id', 'author', 'text', 'likes', 'url'],
|
|
14
|
+
func: async (page, kwargs) => {
|
|
15
|
+
const payload = await apiGet(page, '/x/polymer/web-dynamic/v1/feed/all', { params: {}, signed: false });
|
|
16
|
+
const results: any[] = payload?.data?.items ?? [];
|
|
17
|
+
return results.slice(0, Number(kwargs.limit)).map((item: any) => {
|
|
18
|
+
let text = '';
|
|
19
|
+
if (item.modules?.module_dynamic?.desc?.text) {
|
|
20
|
+
text = item.modules.module_dynamic.desc.text;
|
|
21
|
+
} else if (item.modules?.module_dynamic?.major?.archive?.title) {
|
|
22
|
+
text = item.modules.module_dynamic.major.archive.title;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
id: item.id_str ?? '',
|
|
27
|
+
author: item.modules?.module_author?.name ?? '',
|
|
28
|
+
text: text,
|
|
29
|
+
likes: item.modules?.module_stat?.like?.count ?? 0,
|
|
30
|
+
url: item.id_str ? `https://t.bilibili.com/${item.id_str}` : ''
|
|
31
|
+
};
|
|
32
|
+
});
|
|
33
|
+
},
|
|
34
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { apiGet } from '../../bilibili.js';
|
|
3
|
+
|
|
4
|
+
cli({
|
|
5
|
+
site: 'bilibili',
|
|
6
|
+
name: 'ranking',
|
|
7
|
+
description: 'Get Bilibili video ranking board',
|
|
8
|
+
domain: 'www.bilibili.com',
|
|
9
|
+
strategy: Strategy.COOKIE,
|
|
10
|
+
args: [
|
|
11
|
+
{ name: 'limit', type: 'int', default: 20 },
|
|
12
|
+
],
|
|
13
|
+
columns: ['rank', 'title', 'author', 'score', 'url'],
|
|
14
|
+
func: async (page, kwargs) => {
|
|
15
|
+
const payload = await apiGet(page, '/x/web-interface/ranking/v2', { params: { rid: 0, type: 'all' }, signed: false });
|
|
16
|
+
const results: any[] = payload?.data?.list ?? [];
|
|
17
|
+
return results.slice(0, Number(kwargs.limit)).map((item: any, i: number) => ({
|
|
18
|
+
rank: i + 1,
|
|
19
|
+
title: item.title ?? '',
|
|
20
|
+
author: item.owner?.name ?? '',
|
|
21
|
+
score: item.stat?.view ?? 0,
|
|
22
|
+
url: item.bvid ? `https://www.bilibili.com/video/${item.bvid}` : ''
|
|
23
|
+
}));
|
|
24
|
+
},
|
|
25
|
+
});
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import type { IPage } from '../../types.js';
|
|
3
|
+
import { apiGet } from '../../bilibili.js';
|
|
4
|
+
|
|
5
|
+
cli({
|
|
6
|
+
site: 'bilibili',
|
|
7
|
+
name: 'subtitle',
|
|
8
|
+
description: '获取 Bilibili 视频的字幕',
|
|
9
|
+
strategy: Strategy.COOKIE,
|
|
10
|
+
args: [
|
|
11
|
+
{ name: 'bvid', required: true },
|
|
12
|
+
{ name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' },
|
|
13
|
+
],
|
|
14
|
+
columns: ['index', 'from', 'to', 'content'],
|
|
15
|
+
func: async (page: IPage | null, kwargs: any) => {
|
|
16
|
+
if (!page) throw new Error('Requires browser');
|
|
17
|
+
// 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
|
|
18
|
+
await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
|
|
19
|
+
|
|
20
|
+
// 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
|
|
21
|
+
const cid = await page.evaluate(`(async () => {
|
|
22
|
+
const state = window.__INITIAL_STATE__ || {};
|
|
23
|
+
return state?.videoData?.cid;
|
|
24
|
+
})()`);
|
|
25
|
+
|
|
26
|
+
if (!cid) {
|
|
27
|
+
throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
|
|
31
|
+
// 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
|
|
32
|
+
const payload = await apiGet(page, '/x/player/wbi/v2', {
|
|
33
|
+
params: { bvid: kwargs.bvid, cid },
|
|
34
|
+
signed: true, // 开启 wbi_sign 自动签名
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
if (payload.code !== 0) {
|
|
38
|
+
throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const subtitles = payload.data?.subtitle?.subtitles || [];
|
|
42
|
+
if (subtitles.length === 0) {
|
|
43
|
+
throw new Error('此视频没有发现外挂或智能字幕。');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// 4. 选择目标字幕语言
|
|
47
|
+
const target = kwargs.lang
|
|
48
|
+
? subtitles.find((s: any) => s.lan === kwargs.lang) || subtitles[0]
|
|
49
|
+
: subtitles[0];
|
|
50
|
+
|
|
51
|
+
const targetSubUrl = target.subtitle_url;
|
|
52
|
+
if (!targetSubUrl || targetSubUrl === '') {
|
|
53
|
+
throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl;
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
// 5. 解析并拉取 CDN 的 JSON 文件
|
|
60
|
+
const fetchJs = `
|
|
61
|
+
(async () => {
|
|
62
|
+
const url = ${JSON.stringify(finalUrl)};
|
|
63
|
+
const res = await fetch(url);
|
|
64
|
+
const text = await res.text();
|
|
65
|
+
|
|
66
|
+
if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) {
|
|
67
|
+
return { error: 'HTML', text: text.substring(0, 100), url };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
const subJson = JSON.parse(text);
|
|
72
|
+
// B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] }
|
|
73
|
+
if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body };
|
|
74
|
+
if (Array.isArray(subJson)) return { success: true, data: subJson };
|
|
75
|
+
return { error: 'UNKNOWN_JSON', data: subJson };
|
|
76
|
+
} catch (e) {
|
|
77
|
+
return { error: 'PARSE_FAILED', text: text.substring(0, 100) };
|
|
78
|
+
}
|
|
79
|
+
})()
|
|
80
|
+
`;
|
|
81
|
+
const items = await page.evaluate(fetchJs);
|
|
82
|
+
|
|
83
|
+
if (items?.error) {
|
|
84
|
+
throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const finalItems = items?.data || [];
|
|
88
|
+
if (!Array.isArray(finalItems)) {
|
|
89
|
+
throw new Error('解析到的字幕列表对象不符合数组格式');
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// 6. 数据映射
|
|
93
|
+
return finalItems.map((item: any, idx: number) => ({
|
|
94
|
+
index: idx + 1,
|
|
95
|
+
from: Number(item.from || 0).toFixed(2) + 's',
|
|
96
|
+
to: Number(item.to || 0).toFixed(2) + 's',
|
|
97
|
+
content: item.content
|
|
98
|
+
}));
|
|
99
|
+
},
|
|
100
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: frontpage
|
|
3
|
+
description: Reddit Frontpage / r/all
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
limit:
|
|
10
|
+
type: int
|
|
11
|
+
default: 15
|
|
12
|
+
|
|
13
|
+
columns: [title, subreddit, author, upvotes, comments, url]
|
|
14
|
+
|
|
15
|
+
pipeline:
|
|
16
|
+
- navigate: https://www.reddit.com
|
|
17
|
+
- evaluate: |
|
|
18
|
+
(async () => {
|
|
19
|
+
const res = await fetch('/r/all.json?limit=${{ args.limit }}', { credentials: 'include' });
|
|
20
|
+
const j = await res.json();
|
|
21
|
+
return j?.data?.children || [];
|
|
22
|
+
})()
|
|
23
|
+
- map:
|
|
24
|
+
title: ${{ item.data.title }}
|
|
25
|
+
subreddit: ${{ item.data.subreddit_name_prefixed }}
|
|
26
|
+
author: ${{ item.data.author }}
|
|
27
|
+
upvotes: ${{ item.data.score }}
|
|
28
|
+
comments: ${{ item.data.num_comments }}
|
|
29
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
30
|
+
- limit: ${{ args.limit }}
|
package/src/clis/reddit/hot.yaml
CHANGED
|
@@ -18,9 +18,10 @@ pipeline:
|
|
|
18
18
|
|
|
19
19
|
- evaluate: |
|
|
20
20
|
(async () => {
|
|
21
|
-
const sub =
|
|
21
|
+
const sub = ${{ args.subreddit | json }};
|
|
22
22
|
const path = sub ? '/r/' + sub + '/hot.json' : '/hot.json';
|
|
23
|
-
const
|
|
23
|
+
const limit = ${{ args.limit }};
|
|
24
|
+
const res = await fetch(path + '?limit=' + limit + '&raw_json=1', {
|
|
24
25
|
credentials: 'include'
|
|
25
26
|
});
|
|
26
27
|
const d = await res.json();
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: search
|
|
3
|
+
description: Search Reddit Posts
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
query:
|
|
10
|
+
type: string
|
|
11
|
+
required: true
|
|
12
|
+
limit:
|
|
13
|
+
type: int
|
|
14
|
+
default: 15
|
|
15
|
+
|
|
16
|
+
columns: [title, subreddit, author, upvotes, comments, url]
|
|
17
|
+
|
|
18
|
+
pipeline:
|
|
19
|
+
- navigate: https://www.reddit.com
|
|
20
|
+
- evaluate: |
|
|
21
|
+
(async () => {
|
|
22
|
+
const q = encodeURIComponent('${{ args.query }}');
|
|
23
|
+
const res = await fetch('/search.json?q=' + q + '&limit=${{ args.limit }}', { credentials: 'include' });
|
|
24
|
+
const j = await res.json();
|
|
25
|
+
return j?.data?.children || [];
|
|
26
|
+
})()
|
|
27
|
+
- map:
|
|
28
|
+
title: ${{ item.data.title }}
|
|
29
|
+
subreddit: ${{ item.data.subreddit_name_prefixed }}
|
|
30
|
+
author: ${{ item.data.author }}
|
|
31
|
+
upvotes: ${{ item.data.score }}
|
|
32
|
+
comments: ${{ item.data.num_comments }}
|
|
33
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
34
|
+
- limit: ${{ args.limit }}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: subreddit
|
|
3
|
+
description: Get posts from a specific Subreddit
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
name:
|
|
10
|
+
type: string
|
|
11
|
+
required: true
|
|
12
|
+
sort:
|
|
13
|
+
type: string
|
|
14
|
+
default: hot
|
|
15
|
+
description: "Sorting method: hot, new, top, rising"
|
|
16
|
+
limit:
|
|
17
|
+
type: int
|
|
18
|
+
default: 15
|
|
19
|
+
|
|
20
|
+
columns: [title, author, upvotes, comments, url]
|
|
21
|
+
|
|
22
|
+
pipeline:
|
|
23
|
+
- navigate: https://www.reddit.com
|
|
24
|
+
- evaluate: |
|
|
25
|
+
(async () => {
|
|
26
|
+
let sub = '${{ args.name }}';
|
|
27
|
+
if (sub.startsWith('r/')) sub = sub.slice(2);
|
|
28
|
+
const sort = '${{ args.sort }}';
|
|
29
|
+
const res = await fetch('/r/' + sub + '/' + sort + '.json?limit=${{ args.limit }}', { credentials: 'include' });
|
|
30
|
+
const j = await res.json();
|
|
31
|
+
return j?.data?.children || [];
|
|
32
|
+
})()
|
|
33
|
+
- map:
|
|
34
|
+
title: ${{ item.data.title }}
|
|
35
|
+
author: ${{ item.data.author }}
|
|
36
|
+
upvotes: ${{ item.data.score }}
|
|
37
|
+
comments: ${{ item.data.num_comments }}
|
|
38
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
39
|
+
- limit: ${{ args.limit }}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
site: twitter
|
|
2
|
+
name: bookmarks
|
|
3
|
+
description: 获取 Twitter 书签列表
|
|
4
|
+
domain: x.com
|
|
5
|
+
browser: true
|
|
6
|
+
|
|
7
|
+
args:
|
|
8
|
+
limit:
|
|
9
|
+
type: int
|
|
10
|
+
default: 20
|
|
11
|
+
description: Number of bookmarks to return (default 20)
|
|
12
|
+
|
|
13
|
+
pipeline:
|
|
14
|
+
- navigate: https://x.com/i/bookmarks
|
|
15
|
+
- wait: 2
|
|
16
|
+
- evaluate: |
|
|
17
|
+
(async () => {
|
|
18
|
+
const ct0 = document.cookie.split(';').map(c=>c.trim()).find(c=>c.startsWith('ct0='))?.split('=')[1];
|
|
19
|
+
if (!ct0) throw new Error('No ct0 cookie. Hint: Not logged into x.com.');
|
|
20
|
+
const bearer = decodeURIComponent('AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA');
|
|
21
|
+
const _h = {'Authorization':'Bearer '+bearer, 'X-Csrf-Token':ct0, 'X-Twitter-Auth-Type':'OAuth2Session', 'X-Twitter-Active-User':'yes'};
|
|
22
|
+
|
|
23
|
+
const count = Math.min(${{ args.limit }}, 100);
|
|
24
|
+
const variables = JSON.stringify({count, includePromotedContent: false});
|
|
25
|
+
const features = JSON.stringify({
|
|
26
|
+
rweb_video_screen_enabled: false, profile_label_improvements_pcf_label_in_post_enabled: true,
|
|
27
|
+
responsive_web_profile_redirect_enabled: false, rweb_tipjar_consumption_enabled: false,
|
|
28
|
+
verified_phone_label_enabled: false, creator_subscriptions_tweet_preview_api_enabled: true,
|
|
29
|
+
responsive_web_graphql_timeline_navigation_enabled: true,
|
|
30
|
+
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
|
|
31
|
+
premium_content_api_read_enabled: false, communities_web_enable_tweet_community_results_fetch: true,
|
|
32
|
+
c9s_tweet_anatomy_moderator_badge_enabled: true,
|
|
33
|
+
articles_preview_enabled: true, responsive_web_edit_tweet_api_enabled: true,
|
|
34
|
+
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
|
|
35
|
+
view_counts_everywhere_api_enabled: true, longform_notetweets_consumption_enabled: true,
|
|
36
|
+
responsive_web_twitter_article_tweet_consumption_enabled: true,
|
|
37
|
+
tweet_awards_web_tipping_enabled: false,
|
|
38
|
+
content_disclosure_indicator_enabled: true, content_disclosure_ai_generated_indicator_enabled: true,
|
|
39
|
+
freedom_of_speech_not_reach_fetch_enabled: true, standardized_nudges_misinfo: true,
|
|
40
|
+
tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
|
|
41
|
+
longform_notetweets_rich_text_read_enabled: true, longform_notetweets_inline_media_enabled: false,
|
|
42
|
+
responsive_web_enhance_cards_enabled: false
|
|
43
|
+
});
|
|
44
|
+
const url = '/i/api/graphql/Fy0QMy4q_aZCpkO0PnyLYw/Bookmarks?variables=' + encodeURIComponent(variables) + '&features=' + encodeURIComponent(features);
|
|
45
|
+
const resp = await fetch(url, {headers: _h, credentials: 'include'});
|
|
46
|
+
if (!resp.ok) throw new Error('HTTP ' + resp.status + '. Hint: queryId may have changed.');
|
|
47
|
+
const d = await resp.json();
|
|
48
|
+
|
|
49
|
+
const instructions = d.data?.bookmark_timeline_v2?.timeline?.instructions || d.data?.bookmark_timeline?.timeline?.instructions || [];
|
|
50
|
+
let tweets = [], seen = new Set();
|
|
51
|
+
for (const inst of instructions) {
|
|
52
|
+
for (const entry of (inst.entries || [])) {
|
|
53
|
+
const r = entry.content?.itemContent?.tweet_results?.result;
|
|
54
|
+
if (!r) continue;
|
|
55
|
+
const tw = r.tweet || r;
|
|
56
|
+
const l = tw.legacy || {};
|
|
57
|
+
if (!tw.rest_id || seen.has(tw.rest_id)) continue;
|
|
58
|
+
seen.add(tw.rest_id);
|
|
59
|
+
const u = tw.core?.user_results?.result;
|
|
60
|
+
const nt = tw.note_tweet?.note_tweet_results?.result?.text;
|
|
61
|
+
const screenName = u?.legacy?.screen_name || u?.core?.screen_name;
|
|
62
|
+
tweets.push({
|
|
63
|
+
id: tw.rest_id,
|
|
64
|
+
author: screenName,
|
|
65
|
+
name: u?.legacy?.name || u?.core?.name,
|
|
66
|
+
url: 'https://x.com/' + (screenName || '_') + '/status/' + tw.rest_id,
|
|
67
|
+
text: nt || l.full_text || '',
|
|
68
|
+
likes: l.favorite_count,
|
|
69
|
+
retweets: l.retweet_count,
|
|
70
|
+
created_at: l.created_at
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return tweets;
|
|
75
|
+
})()
|
|
76
|
+
|
|
77
|
+
- map:
|
|
78
|
+
author: ${{ item.author }}
|
|
79
|
+
text: ${{ item.text }}
|
|
80
|
+
likes: ${{ item.likes }}
|
|
81
|
+
url: ${{ item.url }}
|
|
82
|
+
|
|
83
|
+
- limit: ${{ args.limit }}
|
|
84
|
+
|
|
85
|
+
columns: [author, text, likes, url]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
|
|
3
|
+
cli({
|
|
4
|
+
site: 'twitter',
|
|
5
|
+
name: 'profile',
|
|
6
|
+
description: 'Fetch tweets from a user profile',
|
|
7
|
+
domain: 'x.com',
|
|
8
|
+
strategy: Strategy.INTERCEPT,
|
|
9
|
+
browser: true,
|
|
10
|
+
args: [
|
|
11
|
+
{ name: 'username', type: 'string', required: true },
|
|
12
|
+
{ name: 'limit', type: 'int', default: 15 },
|
|
13
|
+
],
|
|
14
|
+
columns: ['id', 'text', 'likes', 'views', 'url'],
|
|
15
|
+
func: async (page, kwargs) => {
|
|
16
|
+
// Navigate to user profile via search for reliability
|
|
17
|
+
await page.goto(`https://x.com/search?q=from:${kwargs.username}&f=live`);
|
|
18
|
+
await page.wait(5);
|
|
19
|
+
|
|
20
|
+
// Inject XHR interceptor
|
|
21
|
+
await page.installInterceptor('SearchTimeline');
|
|
22
|
+
|
|
23
|
+
// Trigger API by scrolling
|
|
24
|
+
await page.autoScroll({ times: 3, delayMs: 2000 });
|
|
25
|
+
|
|
26
|
+
// Retrieve data
|
|
27
|
+
const requests = await page.getInterceptedRequests();
|
|
28
|
+
if (!requests || requests.length === 0) return [];
|
|
29
|
+
|
|
30
|
+
let results: any[] = [];
|
|
31
|
+
for (const req of requests) {
|
|
32
|
+
try {
|
|
33
|
+
const insts = req.data.data.search_by_raw_query.search_timeline.timeline.instructions;
|
|
34
|
+
const addEntries = insts.find((i: any) => i.type === 'TimelineAddEntries');
|
|
35
|
+
if (!addEntries) continue;
|
|
36
|
+
|
|
37
|
+
for (const entry of addEntries.entries) {
|
|
38
|
+
if (!entry.entryId.startsWith('tweet-')) continue;
|
|
39
|
+
|
|
40
|
+
let tweet = entry.content?.itemContent?.tweet_results?.result;
|
|
41
|
+
if (!tweet) continue;
|
|
42
|
+
|
|
43
|
+
if (tweet.__typename === 'TweetWithVisibilityResults' && tweet.tweet) {
|
|
44
|
+
tweet = tweet.tweet;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
results.push({
|
|
48
|
+
id: tweet.rest_id,
|
|
49
|
+
text: tweet.legacy?.full_text || '',
|
|
50
|
+
likes: tweet.legacy?.favorite_count || 0,
|
|
51
|
+
views: tweet.views?.count || '0',
|
|
52
|
+
url: `https://x.com/i/status/${tweet.rest_id}`
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
} catch (e) {
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return results.slice(0, kwargs.limit);
|
|
60
|
+
}
|
|
61
|
+
});
|