yiyan-browser-agent 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yiyan-browser-agent",
3
- "version": "1.0.21",
3
+ "version": "1.0.22",
4
4
  "description": "AI coding agent powered by Yiyan (文心一言) via browser automation — no API key needed",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -1,16 +1,10 @@
1
- // src/agent.js — The core agent loop that ties everything together
1
+ // src/agent.js — The core agent loop
2
2
  'use strict';
3
3
 
4
- const fs = require('fs');
5
- const path = require('path');
6
- const { execSync } = require('child_process');
7
4
  const config = require('./config');
8
5
  const logger = require('./logger');
9
6
  const BrowserManager = require('./browser-manager');
10
- const { executeTool } = require('./tools');
11
- const { parseResponse,
12
- formatToolResult } = require('./parser');
13
- const { ConversationManager } = require('./prompt');
7
+ const PageAgent = require('./page-agent');
14
8
 
15
9
  // ─────────────────────────────────────────────
16
10
  // Agent class
@@ -18,161 +12,68 @@ const { ConversationManager } = require('./prompt');
18
12
 
19
13
  class YiyanAgent {
20
14
  constructor(options = {}) {
21
- this.browser = null; // Will be obtained from BrowserManager
22
- this.conversation = new ConversationManager();
23
- this.options = options;
24
- this._running = false;
15
+ this.pageAgent = null;
16
+ this.options = options;
17
+ this._running = false;
25
18
  }
26
19
 
27
20
  // ── Public API ──────────────────────────────────────────────────────────────
28
21
 
29
- /** Boot the browser (reuse if available, restart if error) */
22
+ /** Get browser and create page agent */
30
23
  async init() {
31
- this.browser = await BrowserManager.getInstance();
32
- await BrowserManager.newChat();
24
+ const { browser, page } = await BrowserManager.getInstance();
25
+ this.pageAgent = new PageAgent(page);
33
26
  }
34
27
 
35
- /** Shut down cleanly (only on explicit exit, not after each task) */
28
+ /** Shutdown */
36
29
  async shutdown() {
37
- await BrowserManager.close();
30
+ await BrowserManager.forceClose();
38
31
  }
39
32
 
40
33
  /**
41
- * Run a task to completion.
42
- * Returns JSON object with question, answer, duration.
34
+ * Run a task
43
35
  */
44
36
  async run(task) {
45
- this._running = true;
46
- const maxIter = config.MAX_ITERATIONS;
37
+ this._running = true;
47
38
  const startTime = Date.now();
48
39
 
49
- // ── Send task directly ───────────────────────────────────
50
40
  logger.info('Sending task to Yiyan...');
51
- try {
52
- await this.browser.sendMessage(task);
53
- } catch (err) {
54
- throw err;
55
- }
56
-
57
- // ── Agent loop ──────────────────────────────────────────────────────
58
- let finalAnswer = '';
59
- for (let iter = 1; iter <= maxIter; iter++) {
60
- logger.iteration(iter, maxIter);
61
-
62
- // Wait for response from Yiyan
63
- const rawResponse = await this.browser.waitForResponse();
64
-
65
- if (!rawResponse || rawResponse.trim().length === 0) {
66
- logger.warn('Empty response received — retrying...');
67
- await this.browser.sendMessage('Please continue.');
68
- continue;
69
- }
70
-
71
- if (config.DEBUG) {
72
- logger.dim(`--- Raw response (${rawResponse.length} chars) ---`);
73
- logger.dim(rawResponse.slice(0, 400));
74
- }
75
-
76
- this.conversation.addAssistantMessage(rawResponse);
77
- const parsed = parseResponse(rawResponse);
78
-
79
- // ── Tool call ────────────────────────────────────────────────────
80
- if (parsed.type === 'tool_call') {
81
- logger.toolCall(parsed.name, parsed.args);
82
- let result, isError = false;
83
- try {
84
- result = await executeTool(parsed.name, parsed.args);
85
- logger.toolResult(result);
86
- } catch (err) {
87
- result = `Error: ${err.message}`;
88
- isError = true;
89
- logger.toolResult(result, true);
90
- }
91
- const feedbackMsg = this.conversation.addToolResult(parsed.name, result, isError);
92
- await this.browser.sendMessage(feedbackMsg);
93
- continue;
94
- }
95
-
96
- // ── Parse error ──────────────────────────────────────────────────
97
- if (parsed.type === 'error') {
98
- logger.warn(`Parse error: ${parsed.message}`);
99
- const recovery = this.conversation.addToolResult('SYSTEM', `Parse error: ${parsed.message}`, true);
100
- await this.browser.sendMessage(recovery);
101
- continue;
102
- }
41
+ await this.pageAgent.sendMessage(task);
103
42
 
104
- // ── Final response ───────────────────────────────────────────────
105
- if (parsed.type === 'final') {
106
- finalAnswer = parsed.content;
107
- if (this.options.saveLog) {
108
- await this._saveConversationLog(task, parsed.content);
109
- }
110
- this._running = false;
111
- break;
112
- }
113
- }
43
+ logger.info('Waiting for response...');
44
+ const answer = await this.pageAgent.waitForResponse();
114
45
 
115
46
  const duration = Date.now() - startTime;
116
- let status = 'success';
117
- if (!finalAnswer) {
118
- finalAnswer = `Reached maximum iterations (${maxIter}).`;
119
- status = 'incomplete';
120
- }
121
-
122
47
  this._running = false;
123
48
 
124
- // Return JSON
125
49
  return {
126
50
  question: task,
127
- answer: finalAnswer,
128
- duration: duration,
129
- status: status
51
+ answer: answer || 'No response received',
52
+ duration,
53
+ status: answer ? 'success' : 'error'
130
54
  };
131
55
  }
132
56
 
133
- // ── Interactive (REPL) Mode ────────────────────────────────────────────────
57
+ // ── Interactive Mode ────────────────────────────────────────────────────────
134
58
 
135
- /**
136
- * Run the agent in interactive mode — keeps the browser open
137
- * and outputs JSON format.
138
- */
139
59
  async runInteractive() {
140
60
  const readline = require('readline');
141
-
142
61
  const rl = readline.createInterface({
143
- input : process.stdin,
144
- output : process.stdout,
145
- terminal : true,
62
+ input: process.stdin,
63
+ output: process.stdout,
146
64
  });
147
65
 
148
66
  const ask = () => new Promise(resolve => rl.question('', resolve));
149
67
 
150
68
  while (true) {
151
- let task;
152
- try {
153
- task = (await ask()).trim();
154
- } catch {
155
- break;
156
- }
157
-
69
+ const task = (await ask()).trim();
158
70
  if (!task) continue;
159
-
160
- if (['exit', 'quit', 'q'].includes(task.toLowerCase())) {
161
- break;
162
- }
163
-
164
- if (task.toLowerCase() === 'new') {
165
- await this.browser.newChat();
166
- this.conversation = new ConversationManager();
167
- continue;
168
- }
169
-
170
- this.conversation = new ConversationManager();
71
+ if (['exit', 'quit', 'q'].includes(task.toLowerCase())) break;
171
72
 
172
73
  try {
173
- await this.browser.newChat();
74
+ // Re-init for new tab
75
+ await this.init();
174
76
  const result = await this.run(task);
175
- // Output JSON to stdout
176
77
  console.log(JSON.stringify(result, null, 2));
177
78
  } catch (err) {
178
79
  console.log(JSON.stringify({
@@ -186,77 +87,6 @@ class YiyanAgent {
186
87
 
187
88
  rl.close();
188
89
  }
189
-
190
- // ── Helpers ────────────────────────────────────────────────────────────────
191
-
192
- _getWorkingDirListing() {
193
- try {
194
- // Use Node.js fs for cross-platform compatibility (Windows compatible)
195
- const fs = require('fs');
196
- const pathModule = require('path');
197
- const cwd = config.WORKING_DIR;
198
-
199
- const excludeDirs = ['node_modules', '.git', 'dist', '.next', 'build', '__pycache__', '.idea', '.vscode'];
200
- const excludeFiles = ['.lock', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml'];
201
-
202
- let results = [];
203
-
204
- function walk(dir, depth) {
205
- if (depth > 3) return;
206
- try {
207
- const entries = fs.readdirSync(dir, { withFileTypes: true });
208
- for (const entry of entries) {
209
- const fullPath = pathModule.join(dir, entry.name);
210
- const relativePath = pathModule.relative(cwd, fullPath);
211
-
212
- if (entry.isDirectory()) {
213
- if (excludeDirs.includes(entry.name)) continue;
214
- results.push(relativePath);
215
- walk(fullPath, depth + 1);
216
- } else if (entry.isFile()) {
217
- if (excludeFiles.some(ext => entry.name.endsWith(ext))) continue;
218
- results.push(relativePath);
219
- }
220
- }
221
- } catch {}
222
- }
223
-
224
- walk(cwd, 1);
225
- return results.slice(0, 80).join('\n') || '(empty directory)';
226
- } catch {
227
- return '(could not read directory)';
228
- }
229
- }
230
-
231
- async _saveConversationLog(task, finalResponse) {
232
- try {
233
- const logsDir = path.join(os.homedir(), '.yiyan-agent', 'logs');
234
- fs.mkdirSync(logsDir, { recursive: true });
235
-
236
- const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
237
- const logFile = path.join(logsDir, `session-${ts}.txt`);
238
- const content = [
239
- `Yiyan Agent — Session Log`,
240
- `Date: ${new Date().toISOString()}`,
241
- `Task: ${task}`,
242
- `Working Dir: ${config.WORKING_DIR}`,
243
- '═'.repeat(60),
244
- this.conversation.exportLog(),
245
- '',
246
- '═'.repeat(60),
247
- 'FINAL RESPONSE:',
248
- finalResponse,
249
- ].join('\n');
250
-
251
- fs.writeFileSync(logFile, content, 'utf8');
252
- logger.dim(`Conversation saved: ${logFile}`);
253
- } catch (err) {
254
- logger.warn(`Could not save log: ${err.message}`);
255
- }
256
- }
257
90
  }
258
91
 
259
- // Pull os into scope for the log save helper
260
- const os = require('os');
261
-
262
92
  module.exports = YiyanAgent;
@@ -1,78 +1,58 @@
1
- // src/browser-manager.js — Singleton browser instance manager for reuse
1
+ // src/browser-manager.js — Cross-process browser reuse via CDP
2
2
  'use strict';
3
3
 
4
4
  const { chromium } = require('playwright');
5
- const YiyanBrowser = require('./browser');
6
- const logger = require('./logger');
7
- const fs = require('fs');
8
5
  const path = require('path');
9
6
  const os = require('os');
7
+ const fs = require('fs');
8
+ const logger = require('./logger');
9
+ const config = require('./config');
10
10
 
11
- // CDP port file path
12
11
  const CDP_PORT_FILE = path.join(os.homedir(), '.yiyan-agent', 'cdp-port.json');
13
12
  const CDP_PORT = 9222;
14
13
 
15
- // Global instance for current process
16
- let _instance = null;
17
-
18
14
  class BrowserManager {
19
15
  /**
20
- * Get browser instance - connect to existing or launch new
16
+ * Get browser - connect existing or launch new
21
17
  */
22
18
  static async getInstance() {
23
- // Try to connect to existing browser first
24
- const existing = await this._tryConnectExisting();
25
- if (existing) {
26
- logger.success('Connected to existing browser (reused)');
27
- _instance = existing;
28
- return _instance;
29
- }
30
-
31
- // Launch new browser with CDP port
32
- logger.info('Launching new browser...');
33
- _instance = await this._launchWithCDP();
34
- return _instance;
35
- }
36
-
37
- /**
38
- * Try to connect to existing browser via CDP
39
- */
40
- static async _tryConnectExisting() {
19
+ // Try connect existing browser
41
20
  try {
42
- // Check if CDP port file exists
43
- if (!fs.existsSync(CDP_PORT_FILE)) return null;
21
+ if (fs.existsSync(CDP_PORT_FILE)) {
22
+ const portInfo = JSON.parse(fs.readFileSync(CDP_PORT_FILE, 'utf8'));
23
+ const browserURL = `http://localhost:${portInfo.port || CDP_PORT}`;
44
24
 
45
- const portInfo = JSON.parse(fs.readFileSync(CDP_PORT_FILE, 'utf8'));
46
- const browserURL = `http://localhost:${portInfo.port || CDP_PORT}`;
25
+ logger.info('Connecting to existing browser...');
26
+ const browser = await chromium.connectOverCDP(browserURL, { timeout: 5000 });
47
27
 
48
- // Try to connect
49
- const context = await chromium.connectOverCDP(browserURL, {
50
- timeout: 3000
51
- });
28
+ // Create NEW page (tab) for this task
29
+ const page = await browser.newPage();
30
+ await page.goto(config.YIYAN_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
31
+ await page.waitForTimeout(500);
52
32
 
53
- // Create wrapper instance
54
- const wrapper = new YiyanBrowser();
55
- wrapper.context = context;
56
- wrapper.page = context.pages()[0] || await context.newPage();
57
- wrapper._closed = false;
58
- wrapper._connected = true; // Mark as connected, not owned
59
-
60
- return wrapper;
33
+ logger.success('Connected! Using new tab.');
34
+ return { browser, page, isNew: false };
35
+ }
61
36
  } catch (err) {
62
- // Connection failed, remove stale port file
37
+ logger.warn('Connection failed, launching new browser...');
63
38
  try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
64
- return null;
65
39
  }
40
+
41
+ // Launch new browser with CDP
42
+ return await this._launchNew();
66
43
  }
67
44
 
68
45
  /**
69
- * Launch new browser with CDP port for future connections
46
+ * Launch new browser with CDP port
70
47
  */
71
- static async _launchWithCDP() {
72
- const wrapper = new YiyanBrowser();
48
+ static async _launchNew() {
49
+ logger.info('Launching new browser...');
73
50
 
74
- // Launch browser with CDP port
51
+ // Ensure session directory
75
52
  const sessionDir = path.join(os.homedir(), '.yiyan-agent', 'session');
53
+ fs.mkdirSync(sessionDir, { recursive: true });
54
+
55
+ // Launch with persistent context + CDP port
76
56
  const context = await chromium.launchPersistentContext(sessionDir, {
77
57
  headless: false,
78
58
  viewport: { width: 1280, height: 900 },
@@ -82,59 +62,45 @@ class BrowserManager {
82
62
  '--no-first-run',
83
63
  '--no-sandbox',
84
64
  ],
85
- userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
65
+ userAgent: 'Mozilla/5.0 AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
86
66
  });
87
67
 
88
- wrapper.context = context;
89
- wrapper.page = context.pages()[0] || await context.newPage();
90
- wrapper._closed = false;
91
- wrapper._connected = false; // Owned by this process
68
+ // Get the browser from context
69
+ const browser = context.browser();
92
70
 
93
- // Save CDP port info
71
+ // Save CDP port
94
72
  fs.writeFileSync(CDP_PORT_FILE, JSON.stringify({
95
73
  port: CDP_PORT,
74
+ pid: process.pid,
96
75
  launchedAt: Date.now()
97
76
  }));
98
77
 
99
- // Navigate to Yiyan
100
- await wrapper._navigate('https://yiyan.baidu.com/');
78
+ // Use existing page or create new
79
+ const pages = context.pages();
80
+ const page = pages.length > 0 ? pages[0] : await context.newPage();
101
81
 
102
- return wrapper;
103
- }
82
+ await page.goto(config.YIYAN_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
83
+ await page.waitForTimeout(800);
104
84
 
105
- /**
106
- * New chat session
107
- */
108
- static async newChat() {
109
- if (_instance) {
110
- await _instance.newChat();
111
- }
85
+ logger.success('Browser launched! Tab ready.');
86
+ return { browser, context, page, isNew: true };
112
87
  }
113
88
 
114
89
  /**
115
- * Close browser (only if we own it)
90
+ * Close current page (tab) but keep browser running
116
91
  */
117
- static async close() {
118
- if (_instance && !_instance._connected) {
119
- // We own the browser, close it
120
- try {
121
- await _instance.close();
122
- fs.unlinkSync(CDP_PORT_FILE);
123
- } catch {}
124
- }
125
- // If connected to existing browser, don't close it
126
- _instance = null;
92
+ static async closePage(page) {
93
+ try {
94
+ await page.close();
95
+ } catch {}
127
96
  }
128
97
 
129
98
  /**
130
- * Force close (for cleanup on error)
99
+ * Force close everything (for cleanup)
131
100
  */
132
101
  static async forceClose() {
133
- if (_instance) {
134
- try { await _instance.close(); } catch {}
135
- try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
136
- _instance = null;
137
- }
102
+ try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
103
+ // Browser will close when user does Ctrl+C or manually
138
104
  }
139
105
  }
140
106
 
@@ -0,0 +1,150 @@
1
+ // src/page-agent.js — Agent for a single page/tab
2
+ 'use strict';
3
+
4
+ const logger = require('./logger');
5
+ const config = require('./config');
6
+
7
+ // Selectors for Yiyan
8
+ const SEL = {
9
+ chatInput: [
10
+ '.editable__T7WAW4uW',
11
+ '[role="textbox"]',
12
+ '[contenteditable="true"]',
13
+ 'textarea',
14
+ ],
15
+ sendButton: [
16
+ 'button[aria-label*="发送"]',
17
+ 'button[type="submit"]',
18
+ ],
19
+ };
20
+
21
+ class PageAgent {
22
+ constructor(page) {
23
+ this.page = page;
24
+ }
25
+
26
+ /**
27
+ * Send message to Yiyan
28
+ */
29
+ async sendMessage(text) {
30
+ // Find input
31
+ let inputEl = null;
32
+ for (const sel of SEL.chatInput) {
33
+ try {
34
+ inputEl = await this.page.waitForSelector(sel, { timeout: 5000, state: 'visible' });
35
+ if (inputEl) break;
36
+ } catch {}
37
+ }
38
+
39
+ if (!inputEl) {
40
+ throw new Error('Cannot find input box');
41
+ }
42
+
43
+ // Focus and clear
44
+ await inputEl.click({ clickCount: 3, force: true });
45
+ await this.page.waitForTimeout(100);
46
+ await this.page.keyboard.press('Delete');
47
+ await this.page.waitForTimeout(50);
48
+
49
+ // Type message
50
+ await this.page.keyboard.type(text, { delay: 10 });
51
+ await this.page.keyboard.press('Enter');
52
+ }
53
+
54
+ /**
55
+ * Wait for response
56
+ */
57
+ async waitForResponse() {
58
+ const timeout = config.RESPONSE_TIMEOUT;
59
+ const stableDelay = config.STABLE_DELAY;
60
+ const start = Date.now();
61
+
62
+ // Wait for new content
63
+ await this.page.waitForTimeout(1000);
64
+
65
+ // Poll for stable response
66
+ let lastText = '';
67
+ let stableStart = null;
68
+
69
+ while (Date.now() - start < timeout) {
70
+ const text = await this._extractAnswer();
71
+
72
+ if (text !== lastText && text.length > 0) {
73
+ lastText = text;
74
+ stableStart = Date.now();
75
+ } else if (stableStart && Date.now() - stableStart >= stableDelay) {
76
+ if (!await this._isGenerating()) {
77
+ break;
78
+ }
79
+ stableStart = null;
80
+ }
81
+
82
+ await this.page.waitForTimeout(200);
83
+ }
84
+
85
+ return this._cleanText(lastText);
86
+ }
87
+
88
+ /**
89
+ * Extract answer from page
90
+ */
91
+ async _extractAnswer() {
92
+ return await this.page.evaluate(() => {
93
+ // Try specific selector first
94
+ const answerEl = document.querySelector('#answer_text_id');
95
+ if (answerEl) return answerEl.textContent || '';
96
+
97
+ // Fallback
98
+ const candidates = document.querySelectorAll('[class*="answer"], [class*="response"], [class*="markdown"]');
99
+ for (const el of candidates) {
100
+ const text = el.textContent || '';
101
+ if (text.length > 20) return text;
102
+ }
103
+
104
+ return '';
105
+ });
106
+ }
107
+
108
+ /**
109
+ * Check if still generating
110
+ */
111
+ async _isGenerating() {
112
+ return await this.page.evaluate(() => {
113
+ const stopBtn = document.querySelector('button[aria-label*="停止"]');
114
+ if (stopBtn && stopBtn.offsetParent !== null) return true;
115
+
116
+ const loading = document.querySelector('[class*="loading"], [class*="typing"]');
117
+ if (loading) return true;
118
+
119
+ return false;
120
+ });
121
+ }
122
+
123
+ /**
124
+ * Clean response text
125
+ */
126
+ _cleanText(text) {
127
+ if (!text) return '';
128
+
129
+ // Remove before "准备输出结果"
130
+ const marker = '准备输出结果';
131
+ const idx = text.indexOf(marker);
132
+ if (idx !== -1) {
133
+ text = text.slice(idx + marker.length).trim();
134
+ }
135
+
136
+ // Remove after markers
137
+ const cutMarkers = ['重新生成', '换个回答', '输出更详细的', '再多提供'];
138
+ for (const m of cutMarkers) {
139
+ const cutIdx = text.indexOf(m);
140
+ if (cutIdx !== -1) {
141
+ text = text.slice(0, cutIdx).trim();
142
+ break;
143
+ }
144
+ }
145
+
146
+ return text.trim();
147
+ }
148
+ }
149
+
150
+ module.exports = PageAgent;