yiyan-browser-agent 1.0.27 → 1.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yiyan-browser-agent",
3
- "version": "1.0.27",
3
+ "version": "1.0.29",
4
4
  "description": "AI coding agent powered by Yiyan (文心一言) via browser automation — no API key needed",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -1,68 +1,46 @@
1
- // src/agent.js — The core agent loop
1
+ // src/agent.js — Simple agent, one browser per process
2
2
  'use strict';
3
3
 
4
- const config = require('./config');
5
- const logger = require('./logger');
6
- const BrowserManager = require('./browser-manager');
7
- const PageAgent = require('./page-agent');
8
-
9
- // ─────────────────────────────────────────────
10
- // Agent class
11
- // ─────────────────────────────────────────────
4
+ const config = require('./config');
5
+ const logger = require('./logger');
6
+ const YiyanBrowser = require('./browser');
12
7
 
13
8
  class YiyanAgent {
14
9
  constructor(options = {}) {
15
- this.pageAgent = null;
16
- this.options = options;
17
- this._running = false;
10
+ this.browser = new YiyanBrowser();
11
+ this.options = options;
18
12
  }
19
13
 
20
- // ── Public API ──────────────────────────────────────────────────────────────
21
-
22
- /** Get browser and create page agent */
23
14
  async init() {
24
- const { browser, page } = await BrowserManager.getInstance();
25
- this.pageAgent = new PageAgent(page);
15
+ await this.browser.launch();
26
16
  }
27
17
 
28
- /** Shutdown */
29
18
  async shutdown() {
30
- await BrowserManager.forceClose();
19
+ await this.browser.close();
31
20
  }
32
21
 
33
- /**
34
- * Run a task
35
- */
36
22
  async run(task) {
37
- this._running = true;
38
23
  const startTime = Date.now();
39
24
 
40
25
  logger.info('Sending task to Yiyan...');
41
- await this.pageAgent.sendMessage(task);
26
+ await this.browser.sendMessage(task);
42
27
 
43
28
  logger.info('Waiting for response...');
44
- const answer = await this.pageAgent.waitForResponse();
29
+ const answer = await this.browser.waitForResponse();
45
30
 
46
31
  const duration = Date.now() - startTime;
47
- this._running = false;
48
32
 
49
33
  return {
50
34
  question: task,
51
- answer: answer || 'No response received',
35
+ answer: answer || 'No response',
52
36
  duration,
53
37
  status: answer ? 'success' : 'error'
54
38
  };
55
39
  }
56
40
 
57
- // ── Interactive Mode ────────────────────────────────────────────────────────
58
-
59
41
  async runInteractive() {
60
42
  const readline = require('readline');
61
- const rl = readline.createInterface({
62
- input: process.stdin,
63
- output: process.stdout,
64
- });
65
-
43
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
66
44
  const ask = () => new Promise(resolve => rl.question('', resolve));
67
45
 
68
46
  while (true) {
@@ -71,22 +49,15 @@ class YiyanAgent {
71
49
  if (['exit', 'quit', 'q'].includes(task.toLowerCase())) break;
72
50
 
73
51
  try {
74
- // Re-init for new tab
75
- await this.init();
52
+ await this.browser.newChat();
76
53
  const result = await this.run(task);
77
54
  console.log(JSON.stringify(result, null, 2));
78
55
  } catch (err) {
79
- console.log(JSON.stringify({
80
- question: task,
81
- answer: `Error: ${err.message}`,
82
- duration: 0,
83
- status: 'error'
84
- }, null, 2));
56
+ console.log(JSON.stringify({ question: task, answer: `Error: ${err.message}`, duration: 0, status: 'error' }, null, 2));
85
57
  }
86
58
  }
87
-
88
59
  rl.close();
89
60
  }
90
61
  }
91
62
 
92
- module.exports = YiyanAgent;
63
+ module.exports = YiyanAgent;
package/src/index.js CHANGED
@@ -1,223 +1,107 @@
1
1
  #!/usr/bin/env node
2
- // src/index.js — CLI entry point for Yiyan Agent
2
+ // src/index.js — CLI entry point
3
3
  'use strict';
4
4
 
5
- const path = require('path');
6
- const fs = require('fs');
7
- const config = require('./config');
8
- const logger = require('./logger');
9
- const YiyanAgent = require('./agent');
10
- const BrowserManager = require('./browser-manager');
11
-
12
- // ─────────────────────────────────────────────
13
- // Parse CLI arguments
14
- // ─────────────────────────────────────────────
5
+ const path = require('path');
6
+ const fs = require('fs');
7
+ const config = require('./config');
8
+ const logger = require('./logger');
9
+ const YiyanAgent = require('./agent');
15
10
 
16
11
  function parseArgs(argv) {
17
12
  const args = argv.slice(2);
18
- const opts = {
19
- task : null,
20
- interactive : false,
21
- debug : false,
22
- headless : false,
23
- saveLog : false,
24
- workingDir : null,
25
- calibrate : false,
26
- help : false,
27
- };
13
+ const opts = { task: null, interactive: false, debug: false, headless: false, workingDir: null, calibrate: false, help: false };
28
14
 
29
- let i = 0;
30
- while (i < args.length) {
15
+ for (let i = 0; i < args.length; i++) {
31
16
  const a = args[i];
32
- switch (a) {
33
- case '-i':
34
- case '--interactive': opts.interactive = true; break;
35
- case '--debug': opts.debug = true; break;
36
- case '--headless': opts.headless = true; break;
37
- case '--save-log': opts.saveLog = true; break;
38
- case '--calibrate': opts.calibrate = true; break;
39
- case '-h':
40
- case '--help': opts.help = true; break;
41
-
42
- case '-d':
43
- case '--dir':
44
- opts.workingDir = args[++i];
45
- break;
46
-
47
- case '-t':
48
- case '--task':
49
- opts.task = args[++i];
50
- break;
51
-
52
- default:
53
- // If it doesn't start with '-', treat it as an inline task
54
- if (!a.startsWith('-')) {
55
- opts.task = args.slice(i).join(' ');
56
- i = args.length; // consume the rest
57
- }
58
- }
59
- i++;
17
+ if (a === '-i' || a === '--interactive') opts.interactive = true;
18
+ else if (a === '--debug') opts.debug = true;
19
+ else if (a === '--headless') opts.headless = true;
20
+ else if (a === '--calibrate') opts.calibrate = true;
21
+ else if (a === '-h' || a === '--help') opts.help = true;
22
+ else if (a === '-d' || a === '--dir') opts.workingDir = args[++i];
23
+ else if (a === '-t' || a === '--task') opts.task = args[++i];
24
+ else if (!a.startsWith('-')) { opts.task = args.slice(i).join(' '); break; }
60
25
  }
61
-
62
26
  return opts;
63
27
  }
64
28
 
65
- // ─────────────────────────────────────────────
66
- // Help text
67
- // ─────────────────────────────────────────────
68
-
69
29
  function printHelp() {
70
30
  console.log(`
71
- \x1b[1mYIYAN AGENT (文心一言)\x1b[0m — AI Coding Agent via Browser Automation
31
+ \x1b[1mYIYAN AGENT\x1b[0m — AI Coding Agent via Browser
72
32
 
73
33
  \x1b[33mUSAGE\x1b[0m
74
- node src/index.js [OPTIONS] [TASK]
34
+ yiyan-agent [TASK]
35
+ yiyan-agent --interactive
75
36
 
76
37
  \x1b[33mOPTIONS\x1b[0m
77
- -t, --task <task> Task to run (can also be the last argument without a flag)
78
- -i, --interactive Interactive REPL mode — keep browser open, run multiple tasks
79
- -d, --dir <path> Set working directory (default: current directory)
80
- --debug Verbose debug output
81
- --headless Run browser in headless mode (must be logged in already)
82
- --save-log Save conversation log to ~/.yiyan-agent/logs/
83
- --calibrate Open browser and print DOM info to help fix selectors
84
- -h, --help Show this help
38
+ -i, --interactive Interactive mode
39
+ --headless Run without visible browser
40
+ --debug Show debug info
41
+ --calibrate Debug DOM selectors
42
+ -h, --help Show help
85
43
 
86
44
  \x1b[33mEXAMPLES\x1b[0m
87
- # Run a single task
88
- node src/index.js "Create a REST API in Express with CRUD for users"
89
-
90
- # Interactive mode (recommended)
91
- node src/index.js --interactive
92
-
93
- # Run on a specific project directory
94
- node src/index.js --dir ~/projects/myapp "Add TypeScript to this project"
95
-
96
- # Debug mode (shows raw responses)
97
- node src/index.js --debug "Write a binary search in Python"
98
-
99
- # Headless (faster, requires prior login)
100
- node src/index.js --headless "Refactor index.js to use async/await"
101
-
102
- \x1b[33mFIRST-TIME SETUP\x1b[0m
103
- 1. npm run setup (installs deps + Playwright browser)
104
- 2. node src/index.js -i (opens browser, log in to Yiyan, then use normally)
105
- Session is saved — you only log in once.
106
-
107
- \x1b[33mCONFIG FILE\x1b[0m
108
- Create \x1b[36myiyan-agent.config.json\x1b[0m in your working directory to override settings:
109
- {
110
- "HEADLESS": true,
111
- "MAX_ITERATIONS": 50,
112
- "STABLE_DELAY": 3000
113
- }
45
+ yiyan-agent "济宁天气"
46
+ yiyan-agent -i
114
47
  `);
115
48
  }
116
49
 
117
- // ─────────────────────────────────────────────
118
- // Main
119
- // ─────────────────────────────────────────────
120
-
121
50
  async function main() {
122
51
  const opts = parseArgs(process.argv);
123
52
 
124
- // ── Help ───────────────────────────────────────────────────────────────────
125
- if (opts.help) {
126
- printHelp();
127
- process.exit(0);
128
- }
53
+ if (opts.help) { printHelp(); process.exit(0); }
129
54
 
130
- // ── Apply options to config ────────────────────────────────────────────────
131
- if (opts.debug) config.DEBUG = true;
132
- if (opts.headless) config.HEADLESS = true;
55
+ if (opts.debug) config.DEBUG = true;
56
+ if (opts.headless) config.HEADLESS = true;
133
57
  if (opts.workingDir) {
134
58
  const resolved = path.resolve(opts.workingDir);
135
- if (!fs.existsSync(resolved)) {
136
- logger.error(`Working directory not found: ${resolved}`);
137
- process.exit(1);
138
- }
59
+ if (!fs.existsSync(resolved)) { logger.error(`Dir not found: ${resolved}`); process.exit(1); }
139
60
  config.WORKING_DIR = resolved;
140
61
  }
141
62
 
142
- // ── Banner ─────────────────────────────────────────────────────────────────
143
63
  logger.banner();
144
- logger.info(`Working directory : \x1b[36m${config.WORKING_DIR}\x1b[0m`);
145
- logger.info(`Session directory : \x1b[36m${config.SESSION_DIR}\x1b[0m`);
146
- logger.info(`Headless mode : \x1b[36m${config.HEADLESS}\x1b[0m`);
147
- logger.info(`Debug mode : \x1b[36m${config.DEBUG}\x1b[0m`);
148
- console.log('');
149
-
150
- // ── Create agent ───────────────────────────────────────────────────────────
151
- const agent = new YiyanAgent({ saveLog: opts.saveLog });
152
-
153
- // ── Graceful shutdown handler ──────────────────────────────────────────────
154
- // Only delete endpoint file on Ctrl+C (explicit exit)
155
- const shutdown = async (code = 0, keepBrowser = true) => {
156
- if (!keepBrowser) {
157
- try { BrowserManager.forceClose(); } catch {}
158
- }
64
+ logger.info(`Working dir: ${config.WORKING_DIR}`);
65
+
66
+ const agent = new YiyanAgent();
67
+
68
+ const shutdown = async (code = 0) => {
69
+ logger.info('Shutting down...');
70
+ try { await agent.shutdown(); } catch {}
159
71
  process.exit(code);
160
72
  };
161
73
 
162
- process.on('SIGINT', () => shutdown(0, false)); // Ctrl+C: close browser
163
- process.on('SIGTERM', () => shutdown(0, false)); // Kill: close browser
164
- process.on('uncaughtException', async err => {
165
- logger.error(`Uncaught error: ${err.message}`);
166
- if (config.DEBUG) console.error(err.stack);
167
- await shutdown(1, false); // Error: close browser
168
- });
169
- process.on('unhandledRejection', async reason => {
170
- logger.error(`Unhandled rejection: ${reason}`);
171
- if (config.DEBUG) console.error(reason);
172
- await shutdown(1, false); // Error: close browser
173
- });
174
-
175
- // ── Calibrate mode ─────────────────────────────────────────────────────────
74
+ process.on('SIGINT', () => shutdown(0));
75
+ process.on('SIGTERM', () => shutdown(0));
76
+
176
77
  if (opts.calibrate) {
177
- logger.header('Calibration Mode — Reading DOM selectors');
178
78
  await agent.init();
179
79
  await agent.browser.dumpDebugInfo();
180
80
  await agent.browser.screenshot();
181
- logger.info('Done. Check the output above to update selectors in src/browser.js if needed.');
182
81
  await shutdown(0);
183
82
  }
184
83
 
185
- // ── Validate we have a task or interactive mode ────────────────────────────
186
- if (!opts.interactive && !opts.task) {
187
- logger.warn('No task provided. Switching to interactive mode...\n');
188
- opts.interactive = true;
189
- }
84
+ if (!opts.interactive && !opts.task) opts.interactive = true;
190
85
 
191
- // ── Launch browser ─────────────────────────────────────────────────────────
192
86
  try {
193
87
  await agent.init();
194
88
  } catch (err) {
195
- logger.error(`Failed to launch browser: ${err.message}`);
196
- if (config.DEBUG) console.error(err.stack);
89
+ logger.error(`Failed: ${err.message}`);
197
90
  process.exit(1);
198
91
  }
199
92
 
200
- // ── Run ────────────────────────────────────────────────────────────────────
201
93
  try {
202
94
  if (opts.interactive) {
203
95
  await agent.runInteractive();
204
- await shutdown(0, true);
205
96
  } else {
206
97
  const result = await agent.run(opts.task);
207
98
  console.log(JSON.stringify(result, null, 2));
208
- // Success: don't close browser, next process can connect to it
209
- // Error: close browser
210
- process.exit(result.status === 'error' ? 1 : 0);
211
99
  }
212
100
  } catch (err) {
213
- console.log(JSON.stringify({
214
- question: opts.task || '',
215
- answer: `Error: ${err.message}`,
216
- duration: 0,
217
- status: 'error'
218
- }, null, 2));
219
- await shutdown(1, true); // Error: close browser
101
+ console.log(JSON.stringify({ question: opts.task || '', answer: `Error: ${err.message}`, duration: 0, status: 'error' }, null, 2));
220
102
  }
103
+
104
+ await shutdown(0);
221
105
  }
222
106
 
223
- main();
107
+ main();
@@ -1,123 +0,0 @@
1
- // src/browser-manager.js — Cross-process browser reuse via CDP
2
- 'use strict';
3
-
4
- const { chromium } = require('playwright');
5
- const path = require('path');
6
- const os = require('os');
7
- const fs = require('fs');
8
- const logger = require('./logger');
9
- const config = require('./config');
10
-
11
- const CDP_PORT_FILE = path.join(os.homedir(), '.yiyan-agent', 'cdp-port.json');
12
- const CDP_PORT = 9222;
13
- const SESSION_DIR = path.join(os.homedir(), '.yiyan-agent', 'session');
14
-
15
- class BrowserManager {
16
- /**
17
- * Get browser - connect existing or launch new
18
- */
19
- static async getInstance() {
20
- // Ensure directories exist
21
- fs.mkdirSync(SESSION_DIR, { recursive: true });
22
-
23
- // Try connect existing browser (with retries for concurrent launches)
24
- for (let retry = 0; retry < 3; retry++) {
25
- try {
26
- if (fs.existsSync(CDP_PORT_FILE)) {
27
- const endpointInfo = JSON.parse(fs.readFileSync(CDP_PORT_FILE, 'utf8'));
28
- const wsEndpoint = endpointInfo.wsEndpoint;
29
-
30
- if (!wsEndpoint) {
31
- throw new Error('No wsEndpoint in file');
32
- }
33
-
34
- logger.info('Connecting to existing browser...');
35
- const browser = await chromium.connect({ wsEndpoint, timeout: 10000 });
36
-
37
- // Create NEW context and page (tab) for this task
38
- const context = await browser.newContext({
39
- viewport: { width: 1280, height: 900 },
40
- userAgent: 'Mozilla/5.0 AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
41
- });
42
- const page = await context.newPage();
43
- await page.goto(config.YIYAN_URL, { waitUntil: 'networkidle', timeout: 20000 });
44
- await page.waitForTimeout(1500);
45
-
46
- logger.success('Connected! New tab opened.');
47
- return { browser, context, page, isNew: false };
48
- }
49
- } catch (err) {
50
- // If connection failed, wait and retry (another process might be starting)
51
- if (retry < 2) {
52
- logger.warn(`Connection attempt ${retry + 1} failed, waiting 3 seconds...`);
53
- await new Promise(r => setTimeout(r, 3000));
54
- } else {
55
- logger.warn('Connection failed after retries, launching new browser...');
56
- try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
57
- }
58
- }
59
- }
60
-
61
- // Launch new browser
62
- return await this._launchNew();
63
- }
64
-
65
- /**
66
- * Launch new browser with CDP port
67
- */
68
- static async _launchNew() {
69
- logger.info('Launching new browser server...');
70
-
71
- // Use launchServer to create independent browser process
72
- const browserServer = await chromium.launchServer({
73
- headless: false,
74
- args: [
75
- '--disable-blink-features=AutomationControlled',
76
- '--no-first-run',
77
- '--no-sandbox',
78
- ],
79
- });
80
-
81
- // Get the WebSocket endpoint
82
- const wsEndpoint = browserServer.wsEndpoint();
83
- logger.dim('WebSocket endpoint: ' + wsEndpoint);
84
-
85
- // Save endpoint info IMMEDIATELY before any other operations
86
- fs.writeFileSync(CDP_PORT_FILE, JSON.stringify({
87
- wsEndpoint: wsEndpoint,
88
- launchedAt: Date.now()
89
- }));
90
- logger.dim('Endpoint saved to: ' + CDP_PORT_FILE);
91
-
92
- logger.success('Browser server started!');
93
-
94
- // Connect to our own server
95
- const browser = await chromium.connect({ wsEndpoint });
96
-
97
- // Create context and page
98
- const context = await browser.newContext({
99
- viewport: { width: 1280, height: 900 },
100
- userAgent: 'Mozilla/5.0 AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
101
- });
102
- const page = await context.newPage();
103
-
104
- await page.goto(config.YIYAN_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
105
- await page.waitForTimeout(800);
106
-
107
- logger.success('Tab ready.');
108
-
109
- // Return without browserServer - let it run independently
110
- // Do NOT close browserServer on process exit
111
- return { browser, context, page, isNew: true };
112
- }
113
-
114
- /**
115
- * Force close everything (for cleanup)
116
- */
117
- static async forceClose() {
118
- try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
119
- // Browser continues running independently
120
- }
121
- }
122
-
123
- module.exports = BrowserManager;
package/src/page-agent.js DELETED
@@ -1,157 +0,0 @@
1
- // src/page-agent.js — Agent for a single page/tab
2
- 'use strict';
3
-
4
- const logger = require('./logger');
5
- const config = require('./config');
6
-
7
- // Selectors for Yiyan
8
- const SEL = {
9
- chatInput: [
10
- '.editable__T7WAW4uW',
11
- '[role="textbox"]',
12
- '[contenteditable="true"]',
13
- 'textarea',
14
- ],
15
- sendButton: [
16
- 'button[aria-label*="发送"]',
17
- 'button[type="submit"]',
18
- ],
19
- };
20
-
21
- class PageAgent {
22
- constructor(page) {
23
- this.page = page;
24
- }
25
-
26
- /**
27
- * Send message to Yiyan
28
- */
29
- async sendMessage(text) {
30
- // Wait for page to be fully loaded
31
- await this.page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {});
32
- await this.page.waitForTimeout(1000);
33
-
34
- // Find input
35
- let inputEl = null;
36
- for (const sel of SEL.chatInput) {
37
- try {
38
- inputEl = await this.page.waitForSelector(sel, { timeout: 8000, state: 'visible' });
39
- if (inputEl) {
40
- logger.dim('Found input: ' + sel);
41
- break;
42
- }
43
- } catch {}
44
- }
45
-
46
- if (!inputEl) {
47
- throw new Error('Cannot find input box. Make sure Yiyan page is loaded.');
48
- }
49
-
50
- // Focus and clear
51
- await inputEl.click({ clickCount: 3, force: true });
52
- await this.page.waitForTimeout(100);
53
- await this.page.keyboard.press('Delete');
54
- await this.page.waitForTimeout(50);
55
-
56
- // Type message
57
- await this.page.keyboard.type(text, { delay: 10 });
58
- await this.page.keyboard.press('Enter');
59
- }
60
-
61
- /**
62
- * Wait for response
63
- */
64
- async waitForResponse() {
65
- const timeout = config.RESPONSE_TIMEOUT;
66
- const stableDelay = config.STABLE_DELAY;
67
- const start = Date.now();
68
-
69
- // Wait for new content
70
- await this.page.waitForTimeout(1000);
71
-
72
- // Poll for stable response
73
- let lastText = '';
74
- let stableStart = null;
75
-
76
- while (Date.now() - start < timeout) {
77
- const text = await this._extractAnswer();
78
-
79
- if (text !== lastText && text.length > 0) {
80
- lastText = text;
81
- stableStart = Date.now();
82
- } else if (stableStart && Date.now() - stableStart >= stableDelay) {
83
- if (!await this._isGenerating()) {
84
- break;
85
- }
86
- stableStart = null;
87
- }
88
-
89
- await this.page.waitForTimeout(200);
90
- }
91
-
92
- return this._cleanText(lastText);
93
- }
94
-
95
- /**
96
- * Extract answer from page
97
- */
98
- async _extractAnswer() {
99
- return await this.page.evaluate(() => {
100
- // Try specific selector first
101
- const answerEl = document.querySelector('#answer_text_id');
102
- if (answerEl) return answerEl.textContent || '';
103
-
104
- // Fallback
105
- const candidates = document.querySelectorAll('[class*="answer"], [class*="response"], [class*="markdown"]');
106
- for (const el of candidates) {
107
- const text = el.textContent || '';
108
- if (text.length > 20) return text;
109
- }
110
-
111
- return '';
112
- });
113
- }
114
-
115
- /**
116
- * Check if still generating
117
- */
118
- async _isGenerating() {
119
- return await this.page.evaluate(() => {
120
- const stopBtn = document.querySelector('button[aria-label*="停止"]');
121
- if (stopBtn && stopBtn.offsetParent !== null) return true;
122
-
123
- const loading = document.querySelector('[class*="loading"], [class*="typing"]');
124
- if (loading) return true;
125
-
126
- return false;
127
- });
128
- }
129
-
130
- /**
131
- * Clean response text
132
- */
133
- _cleanText(text) {
134
- if (!text) return '';
135
-
136
- // Remove before "准备输出结果"
137
- const marker = '准备输出结果';
138
- const idx = text.indexOf(marker);
139
- if (idx !== -1) {
140
- text = text.slice(idx + marker.length).trim();
141
- }
142
-
143
- // Remove after markers
144
- const cutMarkers = ['重新生成', '换个回答', '输出更详细的', '再多提供'];
145
- for (const m of cutMarkers) {
146
- const cutIdx = text.indexOf(m);
147
- if (cutIdx !== -1) {
148
- text = text.slice(0, cutIdx).trim();
149
- break;
150
- }
151
- }
152
-
153
- return text.trim();
154
- }
155
- }
156
-
157
- module.exports = PageAgent;