@harness.farm/social-cli 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/adapters/bilibili.yaml +23 -2
- package/adapters/douyin.yaml +20 -1
- package/dist/adapters/base.js +2 -0
- package/dist/adapters/index.js +8 -0
- package/dist/adapters/xiaohongshu.js +314 -0
- package/dist/browser/cdp.js +106 -0
- package/dist/browser/runner.js +75 -0
- package/dist/browser/session.js +38 -0
- package/dist/cli.js +99 -0
- package/dist/output/table.js +43 -0
- package/dist/runner/step-executor.js +142 -0
- package/dist/runner/yaml-runner.js +391 -0
- package/dist/scripts/explore-bili.js +37 -0
- package/dist/scripts/explore-douyin.js +30 -0
- package/dist/scripts/explore-x.js +31 -0
- package/package.json +2 -1
package/dist/cli.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* CLI entry — supports both YAML adapters and TypeScript adapters.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* tsx src/cli.ts <platform> <command> [args...]
|
|
7
|
+
*
|
|
8
|
+
* Adapter resolution order:
|
|
9
|
+
* 1. adapters/<platform>.yaml ← YAML-first
|
|
10
|
+
* 2. src/adapters/<platform>.ts ← TypeScript fallback
|
|
11
|
+
*
|
|
12
|
+
* Examples:
|
|
13
|
+
* tsx src/cli.ts xhs search 法律ai
|
|
14
|
+
* tsx src/cli.ts xhs like "https://..."
|
|
15
|
+
* tsx src/cli.ts xhs comment "https://..." "太棒了!"
|
|
16
|
+
* tsx src/cli.ts xhs post --title "标题" --content "内容"
|
|
17
|
+
*/
|
|
18
|
+
import path from 'path';
|
|
19
|
+
import fs from 'fs';
|
|
20
|
+
import { fileURLToPath } from 'url';
|
|
21
|
+
import { runYamlCommand } from './runner/yaml-runner.js';
|
|
22
|
+
import { run } from './browser/runner.js';
|
|
23
|
+
import { adapters } from './adapters/index.js';
|
|
24
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
25
|
+
const ROOT = path.resolve(__dirname, '..');
|
|
26
|
+
const [, , platform, command, ...rest] = process.argv;
|
|
27
|
+
if (!platform || !command) {
|
|
28
|
+
printHelp();
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
// ── Resolve adapter ────────────────────────────────────────────────────────
|
|
32
|
+
const yamlPath = path.join(ROOT, 'adapters', `${platform}.yaml`);
|
|
33
|
+
const hasYaml = fs.existsSync(yamlPath);
|
|
34
|
+
const tsAdapter = adapters[platform];
|
|
35
|
+
if (!hasYaml && !tsAdapter) {
|
|
36
|
+
console.error(`❌ Unknown platform "${platform}"`);
|
|
37
|
+
console.error(` YAML adapters: ${listYamlAdapters().join(', ') || '(none)'}`);
|
|
38
|
+
console.error(` TS adapters: ${Object.keys(adapters).join(', ')}`);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
// ── Parse args ─────────────────────────────────────────────────────────────
|
|
42
|
+
// Support both positional args and --key value flags
|
|
43
|
+
const args = parseArgs(rest);
|
|
44
|
+
// ── Run ───────────────────────────────────────────────────────────────────
|
|
45
|
+
if (hasYaml) {
|
|
46
|
+
// YAML adapter: pass positional args in order
|
|
47
|
+
runYamlCommand(yamlPath, command, args.positional).catch(err => {
|
|
48
|
+
console.error('❌', err.message);
|
|
49
|
+
process.exit(1);
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
// TypeScript adapter: pass flags as array (legacy)
|
|
54
|
+
run(tsAdapter, { command, args: rest }).catch(err => {
|
|
55
|
+
console.error('❌', err.message);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
60
|
+
function parseArgs(argv) {
|
|
61
|
+
const positional = [];
|
|
62
|
+
const flags = {};
|
|
63
|
+
for (let i = 0; i < argv.length; i++) {
|
|
64
|
+
if (argv[i].startsWith('--')) {
|
|
65
|
+
flags[argv[i].slice(2)] = argv[++i] ?? '';
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
positional.push(argv[i]);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return { positional, flags };
|
|
72
|
+
}
|
|
73
|
+
function listYamlAdapters() {
|
|
74
|
+
const dir = path.join(ROOT, 'adapters');
|
|
75
|
+
if (!fs.existsSync(dir))
|
|
76
|
+
return [];
|
|
77
|
+
return fs.readdirSync(dir)
|
|
78
|
+
.filter(f => f.endsWith('.yaml'))
|
|
79
|
+
.map(f => f.replace('.yaml', ''));
|
|
80
|
+
}
|
|
81
|
+
function printHelp() {
|
|
82
|
+
const yaml = listYamlAdapters();
|
|
83
|
+
const ts = Object.keys(adapters);
|
|
84
|
+
console.log('用法: tsx src/cli.ts <platform> <command> [args...]');
|
|
85
|
+
console.log('');
|
|
86
|
+
if (yaml.length) {
|
|
87
|
+
console.log('YAML 平台 (推荐):');
|
|
88
|
+
yaml.forEach(p => console.log(` ${p}`));
|
|
89
|
+
}
|
|
90
|
+
if (ts.length) {
|
|
91
|
+
console.log('TS 平台:');
|
|
92
|
+
ts.forEach(p => console.log(` ${p}`));
|
|
93
|
+
}
|
|
94
|
+
console.log('');
|
|
95
|
+
console.log('示例:');
|
|
96
|
+
console.log(' tsx src/cli.ts xhs search 法律ai');
|
|
97
|
+
console.log(' tsx src/cli.ts xhs like "https://www.xiaohongshu.com/explore/..."');
|
|
98
|
+
console.log(' tsx src/cli.ts xhs comment "https://..." "太棒了!"');
|
|
99
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI table renderer with CJK character width support.
|
|
3
|
+
*/
|
|
4
|
+
function dispWidth(s) {
|
|
5
|
+
let w = 0;
|
|
6
|
+
for (const c of s)
|
|
7
|
+
w += c.codePointAt(0) > 127 ? 2 : 1;
|
|
8
|
+
return w;
|
|
9
|
+
}
|
|
10
|
+
function truncate(s, maxW) {
|
|
11
|
+
let out = '', cur = 0;
|
|
12
|
+
for (const c of s) {
|
|
13
|
+
const cw = c.codePointAt(0) > 127 ? 2 : 1;
|
|
14
|
+
if (cur + cw > maxW - 1)
|
|
15
|
+
return out + '…';
|
|
16
|
+
out += c;
|
|
17
|
+
cur += cw;
|
|
18
|
+
}
|
|
19
|
+
return out;
|
|
20
|
+
}
|
|
21
|
+
function pad(s, width) {
|
|
22
|
+
return s + ' '.repeat(Math.max(0, width - dispWidth(s)));
|
|
23
|
+
}
|
|
24
|
+
function sep(cols, char = '-') {
|
|
25
|
+
return '+' + cols.map((c) => char.repeat(c.width + 2)).join('+') + '+';
|
|
26
|
+
}
|
|
27
|
+
function row(cols, values) {
|
|
28
|
+
return '|' + cols.map((c, i) => ` ${pad(truncate(String(values[i] ?? ''), c.width), c.width)} `).join('|') + '|';
|
|
29
|
+
}
|
|
30
|
+
export function renderTable(cols, data) {
|
|
31
|
+
const divider = sep(cols);
|
|
32
|
+
const doubleSep = sep(cols, '=');
|
|
33
|
+
console.log(divider);
|
|
34
|
+
console.log(row(cols, cols.map((c) => c.header)));
|
|
35
|
+
console.log(doubleSep);
|
|
36
|
+
data.forEach((item, i) => {
|
|
37
|
+
console.log(row(cols, cols.map((c) => String(item[c.key] ?? ''))));
|
|
38
|
+
if (i < data.length - 1)
|
|
39
|
+
console.log(divider);
|
|
40
|
+
});
|
|
41
|
+
console.log(divider);
|
|
42
|
+
console.log(`\n共 ${data.length} 条结果`);
|
|
43
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Step executor — maps YAML step types to agent-browser CLI calls.
|
|
3
|
+
*
|
|
4
|
+
* Each method runs `agent-browser <args>` and returns parsed JSON result.
|
|
5
|
+
*/
|
|
6
|
+
import { execSync } from 'child_process';
|
|
7
|
+
export class StepExecutor {
|
|
8
|
+
wsUrl;
|
|
9
|
+
connected = false;
|
|
10
|
+
constructor(wsUrl) {
|
|
11
|
+
this.wsUrl = wsUrl;
|
|
12
|
+
}
|
|
13
|
+
/** Connect to Chrome tab (once per session) */
|
|
14
|
+
connect() {
|
|
15
|
+
if (this.connected)
|
|
16
|
+
return;
|
|
17
|
+
this.run(['connect', this.wsUrl]);
|
|
18
|
+
this.connected = true;
|
|
19
|
+
}
|
|
20
|
+
// ── Core step methods ──────────────────────────────────────────────────────
|
|
21
|
+
open(url) {
|
|
22
|
+
return this.run(['open', url]);
|
|
23
|
+
}
|
|
24
|
+
click(selector) {
|
|
25
|
+
return this.run(['click', selector]);
|
|
26
|
+
}
|
|
27
|
+
/** Click an element by its visible text (uses eval under the hood) */
|
|
28
|
+
clickText(text) {
|
|
29
|
+
const js = `(function(){
|
|
30
|
+
var el = [...document.querySelectorAll('*')].find(function(e){
|
|
31
|
+
return e.textContent.trim() === ${JSON.stringify(text)} && e.children.length === 0;
|
|
32
|
+
});
|
|
33
|
+
if(el){ el.click(); return true; }
|
|
34
|
+
return false;
|
|
35
|
+
})()`;
|
|
36
|
+
const r = this.eval(js);
|
|
37
|
+
if (!r.value)
|
|
38
|
+
return { ok: false, error: `Text not found: "${text}"` };
|
|
39
|
+
return { ok: true };
|
|
40
|
+
}
|
|
41
|
+
fill(selector, value) {
|
|
42
|
+
// Use eval to avoid shell-quoting issues with complex selectors
|
|
43
|
+
const js = `(function(){
|
|
44
|
+
var el = document.querySelector(${JSON.stringify(selector)});
|
|
45
|
+
if (!el) return false;
|
|
46
|
+
el.focus();
|
|
47
|
+
var nativeSet = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value')
|
|
48
|
+
|| Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value');
|
|
49
|
+
if (nativeSet && nativeSet.set) nativeSet.set.call(el, ${JSON.stringify(value)});
|
|
50
|
+
else el.value = ${JSON.stringify(value)};
|
|
51
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
52
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
53
|
+
return true;
|
|
54
|
+
})()`;
|
|
55
|
+
return this.eval(js);
|
|
56
|
+
}
|
|
57
|
+
type(selector, value) {
|
|
58
|
+
return this.run(['type', selector, value]);
|
|
59
|
+
}
|
|
60
|
+
/** Type into a contenteditable element via execCommand */
|
|
61
|
+
typeContentEditable(selector, value) {
|
|
62
|
+
const js = `(function(){
|
|
63
|
+
var el = document.querySelector(${JSON.stringify(selector)});
|
|
64
|
+
if(!el) return false;
|
|
65
|
+
el.focus();
|
|
66
|
+
document.execCommand('selectAll', false, null);
|
|
67
|
+
document.execCommand('insertText', false, ${JSON.stringify(value)});
|
|
68
|
+
return el.textContent || el.value || true;
|
|
69
|
+
})()`;
|
|
70
|
+
return this.eval(js);
|
|
71
|
+
}
|
|
72
|
+
/** Type via agent-browser's real keystroke simulation (works with Draft.js / React) */
|
|
73
|
+
typeKeys(selector, value) {
|
|
74
|
+
return this.run(['type', selector, value]);
|
|
75
|
+
}
|
|
76
|
+
wait(msOrSelector) {
|
|
77
|
+
if (typeof msOrSelector === 'number') {
|
|
78
|
+
return this.run(['wait', String(msOrSelector)]);
|
|
79
|
+
}
|
|
80
|
+
return this.run(['wait', msOrSelector]);
|
|
81
|
+
}
|
|
82
|
+
/** Press a key via agent-browser press (real key event, e.g. z, x, Enter, Control+Enter) */
|
|
83
|
+
pressKey(key) {
|
|
84
|
+
return this.run(['press', key]);
|
|
85
|
+
}
|
|
86
|
+
/** Insert text into the currently focused element via agent-browser keyboard type */
|
|
87
|
+
keyboardInsertText(text) {
|
|
88
|
+
// 'keyboard type' sends real key events char-by-char — works with Draft.js
|
|
89
|
+
return this.run(['keyboard', 'type', text]);
|
|
90
|
+
}
|
|
91
|
+
eval(js) {
|
|
92
|
+
const r = this.run(['eval', js]);
|
|
93
|
+
if (r.ok && r.value !== undefined) {
|
|
94
|
+
// agent-browser wraps eval result in data.result
|
|
95
|
+
const data = r.value;
|
|
96
|
+
return { ok: true, value: data.result ?? r.value };
|
|
97
|
+
}
|
|
98
|
+
return r;
|
|
99
|
+
}
|
|
100
|
+
screenshot(path) {
|
|
101
|
+
return this.run(path ? ['screenshot', path] : ['screenshot']);
|
|
102
|
+
}
|
|
103
|
+
upload(selector, filePath) {
|
|
104
|
+
return this.run(['upload', selector, filePath]);
|
|
105
|
+
}
|
|
106
|
+
getUrl() {
|
|
107
|
+
const r = this.run(['get', 'url']);
|
|
108
|
+
const data = r.value;
|
|
109
|
+
return data?.url ?? '';
|
|
110
|
+
}
|
|
111
|
+
snapshot() {
|
|
112
|
+
const r = this.run(['snapshot']);
|
|
113
|
+
return String(r.value ?? '');
|
|
114
|
+
}
|
|
115
|
+
// ── Internal runner ────────────────────────────────────────────────────────
|
|
116
|
+
run(args) {
|
|
117
|
+
try {
|
|
118
|
+
const cmd = `agent-browser ${args.map(a => this.shellQuote(a)).join(' ')}`;
|
|
119
|
+
const out = execSync(cmd, {
|
|
120
|
+
env: { ...process.env, AGENT_BROWSER_JSON: '1' },
|
|
121
|
+
timeout: 30000,
|
|
122
|
+
encoding: 'utf8',
|
|
123
|
+
});
|
|
124
|
+
// Find last JSON line (agent-browser may emit warnings before JSON)
|
|
125
|
+
const jsonLine = out.trim().split('\n').reverse().find(l => l.startsWith('{'));
|
|
126
|
+
if (!jsonLine)
|
|
127
|
+
return { ok: true };
|
|
128
|
+
const parsed = JSON.parse(jsonLine);
|
|
129
|
+
if (!parsed.success)
|
|
130
|
+
return { ok: false, error: parsed.error ?? 'unknown error' };
|
|
131
|
+
return { ok: true, value: parsed.data };
|
|
132
|
+
}
|
|
133
|
+
catch (err) {
|
|
134
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
135
|
+
return { ok: false, error: msg };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
shellQuote(s) {
|
|
139
|
+
// Wrap in single quotes, escape internal single quotes
|
|
140
|
+
return `'${s.replace(/'/g, "'\\''")}'`;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YAML Runner — parses adapter YAML and executes commands step by step.
|
|
3
|
+
*
|
|
4
|
+
* YAML schema:
|
|
5
|
+
* platform: string
|
|
6
|
+
* login_url: string
|
|
7
|
+
* login_check:
|
|
8
|
+
* cookie: string # cookie name that indicates logged-in state
|
|
9
|
+
* commands:
|
|
10
|
+
* <name>:
|
|
11
|
+
* args: string[] # positional arg names
|
|
12
|
+
* steps: Step[]
|
|
13
|
+
*
|
|
14
|
+
* Step types:
|
|
15
|
+
* - open: "{{url}}"
|
|
16
|
+
* - click: ".selector"
|
|
17
|
+
* - click: { text: "visible text" }
|
|
18
|
+
* - fill: { selector: ".sel", value: "{{text}}" }
|
|
19
|
+
* - type_rich: { selector: ".sel", value: "{{text}}" } # for contenteditable
|
|
20
|
+
* - wait: 3000
|
|
21
|
+
* - wait: { selector: ".sel" }
|
|
22
|
+
* - eval: "js expression"
|
|
23
|
+
* - capture: { name: varName, eval: "js" }
|
|
24
|
+
* - upload: { selector: ".sel", file: "{{file}}" }
|
|
25
|
+
* - screenshot: path.png
|
|
26
|
+
* - extract: { selector, fields: { key: ".sel" | { selector, attr } } }
|
|
27
|
+
* - return: [ { field, value } ] → builds output table
|
|
28
|
+
* - assert: { eval: "js", message: "error msg" }
|
|
29
|
+
* - wait_until: { eval: "js", timeout: 120000, interval: 2000 } # poll until truthy
|
|
30
|
+
*/
|
|
31
|
+
import fs from 'fs';
|
|
32
|
+
import { parse as parseYaml } from 'yaml';
|
|
33
|
+
import { StepExecutor } from './step-executor.js';
|
|
34
|
+
import { renderTable } from '../output/table.js';
|
|
35
|
+
import { connectTab } from '../browser/cdp.js';
|
|
36
|
+
import { loadSession } from '../browser/session.js';
|
|
37
|
+
// ─── Runner ───────────────────────────────────────────────────────────────────
|
|
38
|
+
export async function runYamlCommand(adapterPath, commandName, argValues, cdpPort = 9222) {
|
|
39
|
+
// 1. Load & parse YAML
|
|
40
|
+
const raw = fs.readFileSync(adapterPath, 'utf-8');
|
|
41
|
+
const adapter = parseYaml(raw);
|
|
42
|
+
const cmdDef = adapter.commands[commandName];
|
|
43
|
+
if (!cmdDef) {
|
|
44
|
+
const available = Object.keys(adapter.commands).join(', ');
|
|
45
|
+
throw new Error(`Unknown command "${commandName}". Available: ${available}`);
|
|
46
|
+
}
|
|
47
|
+
// 2. Build variables map from args
|
|
48
|
+
const vars = {};
|
|
49
|
+
(cmdDef.args ?? []).forEach((name, i) => {
|
|
50
|
+
vars[name] = argValues[i] ?? '';
|
|
51
|
+
});
|
|
52
|
+
// 3. Ensure logged in → resolve tab ws URL
|
|
53
|
+
const wsUrl = await resolveTabWsUrl(adapter, cdpPort);
|
|
54
|
+
const exec = new StepExecutor(wsUrl);
|
|
55
|
+
exec.connect();
|
|
56
|
+
const cdpClient = await connectTab(cdpPort);
|
|
57
|
+
console.log(`✅ 已连接 ${adapter.platform}`);
|
|
58
|
+
console.log(`\n🔍 执行: ${adapter.platform} ${commandName} ${argValues.join(' ')}\n`);
|
|
59
|
+
// 4. Execute steps
|
|
60
|
+
let extractedRows = null;
|
|
61
|
+
let returnRows = null;
|
|
62
|
+
for (const step of cmdDef.steps) {
|
|
63
|
+
const key = Object.keys(step)[0];
|
|
64
|
+
const val = step[key];
|
|
65
|
+
switch (key) {
|
|
66
|
+
case 'open': {
|
|
67
|
+
const url = interpolate(val, vars);
|
|
68
|
+
console.log(` → open ${url}`);
|
|
69
|
+
exec.open(url);
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
case 'click': {
|
|
73
|
+
if (typeof val === 'string') {
|
|
74
|
+
// plain selector
|
|
75
|
+
const sel = interpolate(val, vars);
|
|
76
|
+
console.log(` → click "${sel}"`);
|
|
77
|
+
throwIfFail(exec.click(sel), `click failed: ${sel}`);
|
|
78
|
+
}
|
|
79
|
+
else if (typeof val === 'object' && val !== null && 'text' in val) {
|
|
80
|
+
const text = interpolate(val.text, vars);
|
|
81
|
+
console.log(` → click text="${text}"`);
|
|
82
|
+
throwIfFail(exec.clickText(text), `text not found: "${text}"`);
|
|
83
|
+
}
|
|
84
|
+
else if (typeof val === 'object' && val !== null && 'selector' in val) {
|
|
85
|
+
const sel = interpolate(val.selector, vars);
|
|
86
|
+
console.log(` → click selector="${sel}"`);
|
|
87
|
+
throwIfFail(exec.click(sel), `click failed: ${sel}`);
|
|
88
|
+
}
|
|
89
|
+
break;
|
|
90
|
+
}
|
|
91
|
+
case 'fill': {
|
|
92
|
+
const { selector, value } = val;
|
|
93
|
+
const sel = interpolate(selector, vars);
|
|
94
|
+
const v = interpolate(value, vars);
|
|
95
|
+
console.log(` → fill "${sel}" = "${v.slice(0, 40)}"`);
|
|
96
|
+
throwIfFail(exec.fill(sel, v), `fill failed: ${sel}`);
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
case 'type_rich': {
|
|
100
|
+
const { selector, value } = val;
|
|
101
|
+
const sel = interpolate(selector, vars);
|
|
102
|
+
const v = interpolate(value, vars);
|
|
103
|
+
console.log(` → type_rich "${sel}" = "${v.slice(0, 40)}"`);
|
|
104
|
+
throwIfFail(exec.typeContentEditable(sel, v), `type_rich failed: ${sel}`);
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
case 'wait': {
|
|
108
|
+
if (typeof val === 'number') {
|
|
109
|
+
console.log(` → wait ${val}ms`);
|
|
110
|
+
exec.wait(val);
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
const sel = interpolate(val.selector, vars);
|
|
114
|
+
console.log(` → wait selector="${sel}"`);
|
|
115
|
+
exec.wait(sel);
|
|
116
|
+
}
|
|
117
|
+
break;
|
|
118
|
+
}
|
|
119
|
+
case 'eval': {
|
|
120
|
+
const js = interpolate(val, vars);
|
|
121
|
+
console.log(` → eval ...`);
|
|
122
|
+
exec.eval(js);
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
case 'capture': {
|
|
126
|
+
const { name, eval: js } = val;
|
|
127
|
+
const interpolatedJs = interpolate(js, vars);
|
|
128
|
+
console.log(` → capture ${name}`);
|
|
129
|
+
const r = exec.eval(interpolatedJs);
|
|
130
|
+
vars[name] = String(r.value ?? '');
|
|
131
|
+
console.log(` ${name} = ${vars[name].slice(0, 60)}`);
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
case 'upload': {
|
|
135
|
+
const { selector, file } = val;
|
|
136
|
+
const sel = interpolate(selector, vars);
|
|
137
|
+
const f = interpolate(file, vars);
|
|
138
|
+
console.log(` → upload "${sel}" ← ${f}`);
|
|
139
|
+
throwIfFail(exec.upload(sel, f), `upload failed: ${sel}`);
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
case 'screenshot': {
|
|
143
|
+
const p = interpolate(val, vars);
|
|
144
|
+
console.log(` → screenshot → ${p}`);
|
|
145
|
+
exec.screenshot(p);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
case 'extract': {
|
|
149
|
+
const def = val;
|
|
150
|
+
console.log(` → extract "${def.selector}"`);
|
|
151
|
+
extractedRows = runExtract(exec, def, vars);
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
case 'return': {
|
|
155
|
+
returnRows = val.map(r => ({
|
|
156
|
+
field: interpolate(r.field, vars),
|
|
157
|
+
value: interpolate(r.value, vars),
|
|
158
|
+
}));
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
case 'assert': {
|
|
162
|
+
const { eval: js, message } = val;
|
|
163
|
+
const r = exec.eval(interpolate(js, vars));
|
|
164
|
+
if (!r.value)
|
|
165
|
+
throw new Error(message ?? `Assertion failed: ${js}`);
|
|
166
|
+
console.log(` → assert ✅`);
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
case 'key': {
|
|
170
|
+
const k = interpolate(val, vars);
|
|
171
|
+
console.log(` → key "${k}"`);
|
|
172
|
+
// For Enter/special keys that need to reach the focused element, use CDPClient directly
|
|
173
|
+
if (k === 'Enter' || k === 'Control+Enter') {
|
|
174
|
+
const modifiers = k.startsWith('Control') ? 2 : 0;
|
|
175
|
+
await cdpClient.send('Input.dispatchKeyEvent', { type: 'keyDown', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, modifiers });
|
|
176
|
+
await cdpClient.send('Input.dispatchKeyEvent', { type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, modifiers });
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
exec.pressKey(k);
|
|
180
|
+
}
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
case 'keyboard_insert': {
|
|
184
|
+
// Send text char-by-char via CDP Input.dispatchKeyEvent 'char' events
|
|
185
|
+
// This triggers Draft.js / React input handlers correctly
|
|
186
|
+
const text = interpolate(val, vars);
|
|
187
|
+
console.log(` → keyboard_insert "${text.slice(0, 40)}"`);
|
|
188
|
+
for (const char of text) {
|
|
189
|
+
const code = char.codePointAt(0);
|
|
190
|
+
await cdpClient.send('Input.dispatchKeyEvent', { type: 'keyDown', key: char, windowsVirtualKeyCode: code });
|
|
191
|
+
await cdpClient.send('Input.dispatchKeyEvent', { type: 'char', key: char, text: char });
|
|
192
|
+
await cdpClient.send('Input.dispatchKeyEvent', { type: 'keyUp', key: char, windowsVirtualKeyCode: code });
|
|
193
|
+
}
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
case 'insert_text': {
|
|
197
|
+
// CDP Input.insertText — inserts text directly into the focused element,
|
|
198
|
+
// works even inside shadow DOM (unlike keyboard_insert which targets document.activeElement)
|
|
199
|
+
const text = interpolate(val, vars);
|
|
200
|
+
console.log(` → insert_text "${text.slice(0, 40)}"`);
|
|
201
|
+
await cdpClient.send('Input.insertText', { text });
|
|
202
|
+
break;
|
|
203
|
+
}
|
|
204
|
+
case 'wait_until': {
|
|
205
|
+
// Poll a JS expression until it returns truthy, with timeout
|
|
206
|
+
const { eval: js, timeout = 120000, interval = 2000 } = val;
|
|
207
|
+
const interpolatedJs = interpolate(js, vars);
|
|
208
|
+
console.log(` → wait_until (timeout: ${timeout / 1000}s, interval: ${interval / 1000}s)`);
|
|
209
|
+
const deadline = Date.now() + timeout;
|
|
210
|
+
let resolved = false;
|
|
211
|
+
while (Date.now() < deadline) {
|
|
212
|
+
const r = exec.eval(interpolatedJs);
|
|
213
|
+
if (r.ok && r.value && r.value !== 'false' && r.value !== '' && r.value !== 0) {
|
|
214
|
+
resolved = true;
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
console.log(` ... waiting (${Math.round((deadline - Date.now()) / 1000)}s left)`);
|
|
218
|
+
exec.wait(interval);
|
|
219
|
+
}
|
|
220
|
+
if (!resolved) {
|
|
221
|
+
throw new Error(`wait_until timed out after ${timeout / 1000}s`);
|
|
222
|
+
}
|
|
223
|
+
console.log(` ✅ condition met`);
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// 5. Render output
|
|
229
|
+
console.log('');
|
|
230
|
+
if (extractedRows) {
|
|
231
|
+
const fields = Object.keys(extractedRows[0] ?? {});
|
|
232
|
+
const columns = fields.map(k => ({
|
|
233
|
+
key: k,
|
|
234
|
+
header: k,
|
|
235
|
+
width: k === 'index' ? 4 : k === 'link' ? 52 : k === 'title' ? 36 : 24,
|
|
236
|
+
}));
|
|
237
|
+
renderTable(columns, extractedRows);
|
|
238
|
+
}
|
|
239
|
+
else if (returnRows) {
|
|
240
|
+
renderTable([{ key: 'field', header: '字段', width: 12 }, { key: 'value', header: '值', width: 50 }], returnRows);
|
|
241
|
+
}
|
|
242
|
+
cdpClient.close();
|
|
243
|
+
}
|
|
244
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
245
|
+
/** Replace {{expr}} placeholders — supports plain var names and JS expressions */
|
|
246
|
+
function interpolate(template, vars) {
|
|
247
|
+
return template.replace(/\{\{(.+?)\}\}/g, (_, expr) => {
|
|
248
|
+
const trimmed = expr.trim();
|
|
249
|
+
// Plain variable name: fast path
|
|
250
|
+
if (/^\w+$/.test(trimmed))
|
|
251
|
+
return vars[trimmed] ?? '';
|
|
252
|
+
// JS expression: inject vars as locals and eval
|
|
253
|
+
try {
|
|
254
|
+
const keys = Object.keys(vars);
|
|
255
|
+
const vals = keys.map(k => vars[k]);
|
|
256
|
+
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
257
|
+
return String(new Function(...keys, `return (${trimmed})`)(...vals) ?? '');
|
|
258
|
+
}
|
|
259
|
+
catch {
|
|
260
|
+
return '';
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
function throwIfFail(r, msg) {
|
|
265
|
+
if (!r.ok)
|
|
266
|
+
throw new Error(`${msg}: ${r.error ?? ''}`);
|
|
267
|
+
}
|
|
268
|
+
/** Run an extract step: scrape a list of items with named fields */
|
|
269
|
+
function runExtract(exec, def, vars) {
|
|
270
|
+
const fieldsJson = JSON.stringify(def.fields);
|
|
271
|
+
const js = `(function(){
|
|
272
|
+
var fields = ${fieldsJson};
|
|
273
|
+
var results = [];
|
|
274
|
+
document.querySelectorAll(${JSON.stringify(def.selector)}).forEach(function(item, i) {
|
|
275
|
+
var row = { index: i + 1 };
|
|
276
|
+
Object.keys(fields).forEach(function(key) {
|
|
277
|
+
var spec = fields[key];
|
|
278
|
+
if (typeof spec === 'string') {
|
|
279
|
+
var el = item.querySelector(spec);
|
|
280
|
+
row[key] = el ? el.textContent.trim() : '';
|
|
281
|
+
} else {
|
|
282
|
+
var el2 = item.querySelector(spec.selector);
|
|
283
|
+
row[key] = el2 ? (spec.attr === 'href' ? (el2.href || el2.getAttribute(spec.attr)) : el2.getAttribute(spec.attr)) || el2.textContent.trim() : '';
|
|
284
|
+
}
|
|
285
|
+
});
|
|
286
|
+
if (Object.values(row).some(function(v){ return v && v !== i + 1; })) results.push(row);
|
|
287
|
+
});
|
|
288
|
+
return JSON.stringify(results);
|
|
289
|
+
})()`;
|
|
290
|
+
const r = exec.eval(js);
|
|
291
|
+
try {
|
|
292
|
+
return JSON.parse(String(r.value ?? '[]'));
|
|
293
|
+
}
|
|
294
|
+
catch {
|
|
295
|
+
return [];
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
/** Find the ws URL of the right tab, with full user onboarding if needed */
|
|
299
|
+
async function resolveTabWsUrl(adapter, cdpPort) {
|
|
300
|
+
// Step 1: Check Chrome is running with CDP
|
|
301
|
+
let tabs;
|
|
302
|
+
try {
|
|
303
|
+
const res = await fetch(`http://localhost:${cdpPort}/json`, { signal: AbortSignal.timeout(2000) });
|
|
304
|
+
tabs = (await res.json());
|
|
305
|
+
}
|
|
306
|
+
catch {
|
|
307
|
+
console.error(`\n❌ 无法连接到 Chrome CDP (端口 ${cdpPort})`);
|
|
308
|
+
console.error('\n👉 请先启动 Chrome,开启远程调试:\n');
|
|
309
|
+
console.error(` macOS: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome \\`);
|
|
310
|
+
console.error(` --remote-debugging-port=${cdpPort} \\`);
|
|
311
|
+
console.error(` --user-data-dir=$HOME/.cdp-scraper/chrome-profile\n`);
|
|
312
|
+
console.error(` Windows: chrome.exe --remote-debugging-port=${cdpPort} --user-data-dir=%USERPROFILE%\\.cdp-scraper\\chrome-profile\n`);
|
|
313
|
+
console.error(` Linux: google-chrome --remote-debugging-port=${cdpPort} --user-data-dir=~/.cdp-scraper/chrome-profile\n`);
|
|
314
|
+
console.error('启动后重新运行此命令。');
|
|
315
|
+
process.exit(1);
|
|
316
|
+
}
|
|
317
|
+
const pageTabs = tabs.filter(t => t.type === 'page');
|
|
318
|
+
if (pageTabs.length === 0) {
|
|
319
|
+
console.error('❌ Chrome 中没有打开的页面,请至少保持一个标签页打开。');
|
|
320
|
+
process.exit(1);
|
|
321
|
+
}
|
|
322
|
+
// Step 2: Prefer a tab already on the platform domain
|
|
323
|
+
const domain = new URL(adapter.login_url).hostname.replace('www.', '');
|
|
324
|
+
const existing = pageTabs.find(t => t.url.includes(domain) && !t.url.includes('creator'));
|
|
325
|
+
if (existing)
|
|
326
|
+
return existing.webSocketDebuggerUrl;
|
|
327
|
+
// Step 3: Check saved session cookie
|
|
328
|
+
const cookies = loadSession(adapter.platform);
|
|
329
|
+
const hasCookie = cookies?.some(c => c.name === adapter.login_check.cookie);
|
|
330
|
+
if (hasCookie) {
|
|
331
|
+
const page = pageTabs[0];
|
|
332
|
+
return page.webSocketDebuggerUrl;
|
|
333
|
+
}
|
|
334
|
+
// Step 4: Not logged in — guide user through login
|
|
335
|
+
console.log(`\n🔑 需要登录 ${adapter.platform}`);
|
|
336
|
+
console.log('─'.repeat(50));
|
|
337
|
+
// Open the platform login page in the first tab via agent-browser
|
|
338
|
+
const firstTab = pageTabs[0].webSocketDebuggerUrl;
|
|
339
|
+
console.log(`\n 正在打开登录页: ${adapter.login_url}`);
|
|
340
|
+
console.log(` (使用 agent-browser 连接到: ${firstTab})\n`);
|
|
341
|
+
const { execSync } = await import('child_process');
|
|
342
|
+
try {
|
|
343
|
+
execSync(`agent-browser connect '${firstTab}'`, {
|
|
344
|
+
env: { ...process.env, AGENT_BROWSER_JSON: '1' },
|
|
345
|
+
timeout: 5000,
|
|
346
|
+
encoding: 'utf8',
|
|
347
|
+
});
|
|
348
|
+
execSync(`agent-browser open '${adapter.login_url}'`, {
|
|
349
|
+
env: { ...process.env, AGENT_BROWSER_JSON: '1' },
|
|
350
|
+
timeout: 10000,
|
|
351
|
+
encoding: 'utf8',
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
catch {
|
|
355
|
+
console.log(` ⚠️ 无法自动打开页面,请手动在 Chrome 中访问: ${adapter.login_url}`);
|
|
356
|
+
}
|
|
357
|
+
console.log(`👀 请在 Chrome 中完成登录 ${adapter.platform},然后按 Enter...`);
|
|
358
|
+
await waitForEnter();
|
|
359
|
+
// Step 5: Verify login by checking for the cookie
|
|
360
|
+
const resTabs = await fetch(`http://localhost:${cdpPort}/json`);
|
|
361
|
+
const freshTabs = (await resTabs.json());
|
|
362
|
+
const freshPage = freshTabs.filter(t => t.type === 'page')[0];
|
|
363
|
+
if (!freshPage) {
|
|
364
|
+
console.error('❌ Chrome 中没有打开的页面');
|
|
365
|
+
process.exit(1);
|
|
366
|
+
}
|
|
367
|
+
// Save session from the browser
|
|
368
|
+
const { connectTab } = await import('../browser/cdp.js');
|
|
369
|
+
const { captureSession } = await import('../browser/session.js');
|
|
370
|
+
const client = await connectTab(cdpPort);
|
|
371
|
+
const saved = await captureSession(client, adapter.platform);
|
|
372
|
+
client.close();
|
|
373
|
+
const ok = saved.some(c => c.name === adapter.login_check.cookie);
|
|
374
|
+
if (!ok) {
|
|
375
|
+
console.error(`❌ 未检测到登录 cookie (${adapter.login_check.cookie}),请确认已登录后重试`);
|
|
376
|
+
process.exit(1);
|
|
377
|
+
}
|
|
378
|
+
console.log(`✅ 登录成功,session 已保存\n`);
|
|
379
|
+
return freshPage.webSocketDebuggerUrl;
|
|
380
|
+
}
|
|
381
|
+
function waitForEnter() {
|
|
382
|
+
return new Promise((resolve) => {
|
|
383
|
+
process.stdin.setRawMode?.(false);
|
|
384
|
+
process.stdin.resume();
|
|
385
|
+
process.stdout.write(' > ');
|
|
386
|
+
process.stdin.once('data', () => {
|
|
387
|
+
process.stdin.pause();
|
|
388
|
+
resolve();
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
}
|