yiyan-browser-agent 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yiyan-browser-agent",
3
- "version": "1.0.20",
3
+ "version": "1.0.22",
4
4
  "description": "AI coding agent powered by Yiyan (文心一言) via browser automation — no API key needed",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -1,16 +1,10 @@
1
- // src/agent.js — The core agent loop that ties everything together
1
+ // src/agent.js — The core agent loop
2
2
  'use strict';
3
3
 
4
- const fs = require('fs');
5
- const path = require('path');
6
- const { execSync } = require('child_process');
7
4
  const config = require('./config');
8
5
  const logger = require('./logger');
9
6
  const BrowserManager = require('./browser-manager');
10
- const { executeTool } = require('./tools');
11
- const { parseResponse,
12
- formatToolResult } = require('./parser');
13
- const { ConversationManager } = require('./prompt');
7
+ const PageAgent = require('./page-agent');
14
8
 
15
9
  // ─────────────────────────────────────────────
16
10
  // Agent class
@@ -18,170 +12,68 @@ const { ConversationManager } = require('./prompt');
18
12
 
19
13
  class YiyanAgent {
20
14
  constructor(options = {}) {
21
- this.browser = null; // Will be obtained from BrowserManager
22
- this.conversation = new ConversationManager();
23
- this.options = options;
24
- this._running = false;
15
+ this.pageAgent = null;
16
+ this.options = options;
17
+ this._running = false;
25
18
  }
26
19
 
27
20
  // ── Public API ──────────────────────────────────────────────────────────────
28
21
 
29
- /** Boot the browser (reuse if available, restart if error) */
22
+ /** Get browser and create page agent */
30
23
  async init() {
31
- this.browser = await BrowserManager.getInstance();
32
- await BrowserManager.newChat();
24
+ const { browser, page } = await BrowserManager.getInstance();
25
+ this.pageAgent = new PageAgent(page);
33
26
  }
34
27
 
35
- /** Shut down cleanly (only on explicit exit, not after each task) */
28
+ /** Shutdown */
36
29
  async shutdown() {
37
- await BrowserManager.close();
30
+ await BrowserManager.forceClose();
38
31
  }
39
32
 
40
33
  /**
41
- * Run a task to completion.
42
- * Returns JSON object with question, answer, duration.
34
+ * Run a task
43
35
  */
44
36
  async run(task) {
45
- this._running = true;
46
- const maxIter = config.MAX_ITERATIONS;
37
+ this._running = true;
47
38
  const startTime = Date.now();
48
39
 
49
- // ── Send task directly ───────────────────────────────────
50
40
  logger.info('Sending task to Yiyan...');
51
- try {
52
- await this.browser.sendMessage(task);
53
- } catch (err) {
54
- // Input error - mark for restart
55
- BrowserManager.markError();
56
- throw err;
57
- }
58
-
59
- // ── Agent loop ──────────────────────────────────────────────────────
60
- let finalAnswer = '';
61
- for (let iter = 1; iter <= maxIter; iter++) {
62
- logger.iteration(iter, maxIter);
63
-
64
- // Wait for response from Yiyan
65
- const rawResponse = await this.browser.waitForResponse();
66
-
67
- if (!rawResponse || rawResponse.trim().length === 0) {
68
- logger.warn('Empty response received — retrying...');
69
- await this.browser.sendMessage('Please continue.');
70
- continue;
71
- }
72
-
73
- if (config.DEBUG) {
74
- logger.dim(`--- Raw response (${rawResponse.length} chars) ---`);
75
- logger.dim(rawResponse.slice(0, 400));
76
- }
77
-
78
- this.conversation.addAssistantMessage(rawResponse);
79
- const parsed = parseResponse(rawResponse);
80
-
81
- // ── Tool call ────────────────────────────────────────────────────
82
- if (parsed.type === 'tool_call') {
83
- logger.toolCall(parsed.name, parsed.args);
84
- let result, isError = false;
85
- try {
86
- result = await executeTool(parsed.name, parsed.args);
87
- logger.toolResult(result);
88
- } catch (err) {
89
- result = `Error: ${err.message}`;
90
- isError = true;
91
- logger.toolResult(result, true);
92
- }
93
- const feedbackMsg = this.conversation.addToolResult(parsed.name, result, isError);
94
- await this.browser.sendMessage(feedbackMsg);
95
- continue;
96
- }
41
+ await this.pageAgent.sendMessage(task);
97
42
 
98
- // ── Parse error ──────────────────────────────────────────────────
99
- if (parsed.type === 'error') {
100
- logger.warn(`Parse error: ${parsed.message}`);
101
- const recovery = this.conversation.addToolResult('SYSTEM', `Parse error: ${parsed.message}`, true);
102
- await this.browser.sendMessage(recovery);
103
- continue;
104
- }
105
-
106
- // ── Final response ───────────────────────────────────────────────
107
- if (parsed.type === 'final') {
108
- finalAnswer = parsed.content;
109
- if (this.options.saveLog) {
110
- await this._saveConversationLog(task, parsed.content);
111
- }
112
- this._running = false;
113
- break;
114
- }
115
- }
43
+ logger.info('Waiting for response...');
44
+ const answer = await this.pageAgent.waitForResponse();
116
45
 
117
46
  const duration = Date.now() - startTime;
118
- let status = 'success';
119
- if (!finalAnswer) {
120
- finalAnswer = `Reached maximum iterations (${maxIter}).`;
121
- status = 'incomplete';
122
- }
123
-
124
- // Mark browser status for reuse
125
- if (status === 'success') {
126
- BrowserManager.markSuccess(); // Keep browser open for next task
127
- } else {
128
- BrowserManager.markError(); // Restart browser next time
129
- }
130
-
131
47
  this._running = false;
132
48
 
133
- // Return JSON
134
49
  return {
135
50
  question: task,
136
- answer: finalAnswer,
137
- duration: duration,
138
- status: status
51
+ answer: answer || 'No response received',
52
+ duration,
53
+ status: answer ? 'success' : 'error'
139
54
  };
140
55
  }
141
56
 
142
- // ── Interactive (REPL) Mode ────────────────────────────────────────────────
57
+ // ── Interactive Mode ────────────────────────────────────────────────────────
143
58
 
144
- /**
145
- * Run the agent in interactive mode — keeps the browser open
146
- * and outputs JSON format.
147
- */
148
59
  async runInteractive() {
149
60
  const readline = require('readline');
150
-
151
61
  const rl = readline.createInterface({
152
- input : process.stdin,
153
- output : process.stdout,
154
- terminal : true,
62
+ input: process.stdin,
63
+ output: process.stdout,
155
64
  });
156
65
 
157
66
  const ask = () => new Promise(resolve => rl.question('', resolve));
158
67
 
159
68
  while (true) {
160
- let task;
161
- try {
162
- task = (await ask()).trim();
163
- } catch {
164
- break;
165
- }
166
-
69
+ const task = (await ask()).trim();
167
70
  if (!task) continue;
168
-
169
- if (['exit', 'quit', 'q'].includes(task.toLowerCase())) {
170
- break;
171
- }
172
-
173
- if (task.toLowerCase() === 'new') {
174
- await this.browser.newChat();
175
- this.conversation = new ConversationManager();
176
- continue;
177
- }
178
-
179
- this.conversation = new ConversationManager();
71
+ if (['exit', 'quit', 'q'].includes(task.toLowerCase())) break;
180
72
 
181
73
  try {
182
- await this.browser.newChat();
74
+ // Re-init for new tab
75
+ await this.init();
183
76
  const result = await this.run(task);
184
- // Output JSON to stdout
185
77
  console.log(JSON.stringify(result, null, 2));
186
78
  } catch (err) {
187
79
  console.log(JSON.stringify({
@@ -195,77 +87,6 @@ class YiyanAgent {
195
87
 
196
88
  rl.close();
197
89
  }
198
-
199
- // ── Helpers ────────────────────────────────────────────────────────────────
200
-
201
- _getWorkingDirListing() {
202
- try {
203
- // Use Node.js fs for cross-platform compatibility (Windows compatible)
204
- const fs = require('fs');
205
- const pathModule = require('path');
206
- const cwd = config.WORKING_DIR;
207
-
208
- const excludeDirs = ['node_modules', '.git', 'dist', '.next', 'build', '__pycache__', '.idea', '.vscode'];
209
- const excludeFiles = ['.lock', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml'];
210
-
211
- let results = [];
212
-
213
- function walk(dir, depth) {
214
- if (depth > 3) return;
215
- try {
216
- const entries = fs.readdirSync(dir, { withFileTypes: true });
217
- for (const entry of entries) {
218
- const fullPath = pathModule.join(dir, entry.name);
219
- const relativePath = pathModule.relative(cwd, fullPath);
220
-
221
- if (entry.isDirectory()) {
222
- if (excludeDirs.includes(entry.name)) continue;
223
- results.push(relativePath);
224
- walk(fullPath, depth + 1);
225
- } else if (entry.isFile()) {
226
- if (excludeFiles.some(ext => entry.name.endsWith(ext))) continue;
227
- results.push(relativePath);
228
- }
229
- }
230
- } catch {}
231
- }
232
-
233
- walk(cwd, 1);
234
- return results.slice(0, 80).join('\n') || '(empty directory)';
235
- } catch {
236
- return '(could not read directory)';
237
- }
238
- }
239
-
240
- async _saveConversationLog(task, finalResponse) {
241
- try {
242
- const logsDir = path.join(os.homedir(), '.yiyan-agent', 'logs');
243
- fs.mkdirSync(logsDir, { recursive: true });
244
-
245
- const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
246
- const logFile = path.join(logsDir, `session-${ts}.txt`);
247
- const content = [
248
- `Yiyan Agent — Session Log`,
249
- `Date: ${new Date().toISOString()}`,
250
- `Task: ${task}`,
251
- `Working Dir: ${config.WORKING_DIR}`,
252
- '═'.repeat(60),
253
- this.conversation.exportLog(),
254
- '',
255
- '═'.repeat(60),
256
- 'FINAL RESPONSE:',
257
- finalResponse,
258
- ].join('\n');
259
-
260
- fs.writeFileSync(logFile, content, 'utf8');
261
- logger.dim(`Conversation saved: ${logFile}`);
262
- } catch (err) {
263
- logger.warn(`Could not save log: ${err.message}`);
264
- }
265
- }
266
90
  }
267
91
 
268
- // Pull os into scope for the log save helper
269
- const os = require('os');
270
-
271
92
  module.exports = YiyanAgent;
@@ -1,78 +1,58 @@
1
- // src/browser-manager.js — Singleton browser instance manager for reuse
1
+ // src/browser-manager.js — Cross-process browser reuse via CDP
2
2
  'use strict';
3
3
 
4
4
  const { chromium } = require('playwright');
5
- const YiyanBrowser = require('./browser');
6
- const logger = require('./logger');
7
- const fs = require('fs');
8
5
  const path = require('path');
9
6
  const os = require('os');
7
+ const fs = require('fs');
8
+ const logger = require('./logger');
9
+ const config = require('./config');
10
10
 
11
- // CDP port file path
12
11
  const CDP_PORT_FILE = path.join(os.homedir(), '.yiyan-agent', 'cdp-port.json');
13
12
  const CDP_PORT = 9222;
14
13
 
15
- // Global instance for current process
16
- let _instance = null;
17
-
18
14
  class BrowserManager {
19
15
  /**
20
- * Get browser instance - connect to existing or launch new
16
+ * Get browser - connect existing or launch new
21
17
  */
22
18
  static async getInstance() {
23
- // Try to connect to existing browser first
24
- const existing = await this._tryConnectExisting();
25
- if (existing) {
26
- logger.success('Connected to existing browser (reused)');
27
- _instance = existing;
28
- return _instance;
29
- }
30
-
31
- // Launch new browser with CDP port
32
- logger.info('Launching new browser...');
33
- _instance = await this._launchWithCDP();
34
- return _instance;
35
- }
36
-
37
- /**
38
- * Try to connect to existing browser via CDP
39
- */
40
- static async _tryConnectExisting() {
19
+ // Try connect existing browser
41
20
  try {
42
- // Check if CDP port file exists
43
- if (!fs.existsSync(CDP_PORT_FILE)) return null;
21
+ if (fs.existsSync(CDP_PORT_FILE)) {
22
+ const portInfo = JSON.parse(fs.readFileSync(CDP_PORT_FILE, 'utf8'));
23
+ const browserURL = `http://localhost:${portInfo.port || CDP_PORT}`;
44
24
 
45
- const portInfo = JSON.parse(fs.readFileSync(CDP_PORT_FILE, 'utf8'));
46
- const browserURL = `http://localhost:${portInfo.port || CDP_PORT}`;
25
+ logger.info('Connecting to existing browser...');
26
+ const browser = await chromium.connectOverCDP(browserURL, { timeout: 5000 });
47
27
 
48
- // Try to connect
49
- const context = await chromium.connectOverCDP(browserURL, {
50
- timeout: 3000
51
- });
28
+ // Create NEW page (tab) for this task
29
+ const page = await browser.newPage();
30
+ await page.goto(config.YIYAN_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
31
+ await page.waitForTimeout(500);
52
32
 
53
- // Create wrapper instance
54
- const wrapper = new YiyanBrowser();
55
- wrapper.context = context;
56
- wrapper.page = context.pages()[0] || await context.newPage();
57
- wrapper._closed = false;
58
- wrapper._connected = true; // Mark as connected, not owned
59
-
60
- return wrapper;
33
+ logger.success('Connected! Using new tab.');
34
+ return { browser, page, isNew: false };
35
+ }
61
36
  } catch (err) {
62
- // Connection failed, remove stale port file
37
+ logger.warn('Connection failed, launching new browser...');
63
38
  try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
64
- return null;
65
39
  }
40
+
41
+ // Launch new browser with CDP
42
+ return await this._launchNew();
66
43
  }
67
44
 
68
45
  /**
69
- * Launch new browser with CDP port for future connections
46
+ * Launch new browser with CDP port
70
47
  */
71
- static async _launchWithCDP() {
72
- const wrapper = new YiyanBrowser();
48
+ static async _launchNew() {
49
+ logger.info('Launching new browser...');
73
50
 
74
- // Launch browser with CDP port
51
+ // Ensure session directory
75
52
  const sessionDir = path.join(os.homedir(), '.yiyan-agent', 'session');
53
+ fs.mkdirSync(sessionDir, { recursive: true });
54
+
55
+ // Launch with persistent context + CDP port
76
56
  const context = await chromium.launchPersistentContext(sessionDir, {
77
57
  headless: false,
78
58
  viewport: { width: 1280, height: 900 },
@@ -82,59 +62,45 @@ class BrowserManager {
82
62
  '--no-first-run',
83
63
  '--no-sandbox',
84
64
  ],
85
- userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
65
+ userAgent: 'Mozilla/5.0 AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36',
86
66
  });
87
67
 
88
- wrapper.context = context;
89
- wrapper.page = context.pages()[0] || await context.newPage();
90
- wrapper._closed = false;
91
- wrapper._connected = false; // Owned by this process
68
+ // Get the browser from context
69
+ const browser = context.browser();
92
70
 
93
- // Save CDP port info
71
+ // Save CDP port
94
72
  fs.writeFileSync(CDP_PORT_FILE, JSON.stringify({
95
73
  port: CDP_PORT,
74
+ pid: process.pid,
96
75
  launchedAt: Date.now()
97
76
  }));
98
77
 
99
- // Navigate to Yiyan
100
- await wrapper._navigate('https://yiyan.baidu.com/');
78
+ // Use existing page or create new
79
+ const pages = context.pages();
80
+ const page = pages.length > 0 ? pages[0] : await context.newPage();
101
81
 
102
- return wrapper;
103
- }
82
+ await page.goto(config.YIYAN_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
83
+ await page.waitForTimeout(800);
104
84
 
105
- /**
106
- * New chat session
107
- */
108
- static async newChat() {
109
- if (_instance) {
110
- await _instance.newChat();
111
- }
85
+ logger.success('Browser launched! Tab ready.');
86
+ return { browser, context, page, isNew: true };
112
87
  }
113
88
 
114
89
  /**
115
- * Close browser (only if we own it)
90
+ * Close current page (tab) but keep browser running
116
91
  */
117
- static async close() {
118
- if (_instance && !_instance._connected) {
119
- // We own the browser, close it
120
- try {
121
- await _instance.close();
122
- fs.unlinkSync(CDP_PORT_FILE);
123
- } catch {}
124
- }
125
- // If connected to existing browser, don't close it
126
- _instance = null;
92
+ static async closePage(page) {
93
+ try {
94
+ await page.close();
95
+ } catch {}
127
96
  }
128
97
 
129
98
  /**
130
- * Force close (for cleanup on error)
99
+ * Force close everything (for cleanup)
131
100
  */
132
101
  static async forceClose() {
133
- if (_instance) {
134
- try { await _instance.close(); } catch {}
135
- try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
136
- _instance = null;
137
- }
102
+ try { fs.unlinkSync(CDP_PORT_FILE); } catch {}
103
+ // Browser will close when user does Ctrl+C or manually
138
104
  }
139
105
  }
140
106
 
@@ -0,0 +1,150 @@
1
+ // src/page-agent.js — Agent for a single page/tab
2
+ 'use strict';
3
+
4
+ const logger = require('./logger');
5
+ const config = require('./config');
6
+
7
+ // Selectors for Yiyan
8
+ const SEL = {
9
+ chatInput: [
10
+ '.editable__T7WAW4uW',
11
+ '[role="textbox"]',
12
+ '[contenteditable="true"]',
13
+ 'textarea',
14
+ ],
15
+ sendButton: [
16
+ 'button[aria-label*="发送"]',
17
+ 'button[type="submit"]',
18
+ ],
19
+ };
20
+
21
+ class PageAgent {
22
+ constructor(page) {
23
+ this.page = page;
24
+ }
25
+
26
+ /**
27
+ * Send message to Yiyan
28
+ */
29
+ async sendMessage(text) {
30
+ // Find input
31
+ let inputEl = null;
32
+ for (const sel of SEL.chatInput) {
33
+ try {
34
+ inputEl = await this.page.waitForSelector(sel, { timeout: 5000, state: 'visible' });
35
+ if (inputEl) break;
36
+ } catch {}
37
+ }
38
+
39
+ if (!inputEl) {
40
+ throw new Error('Cannot find input box');
41
+ }
42
+
43
+ // Focus and clear
44
+ await inputEl.click({ clickCount: 3, force: true });
45
+ await this.page.waitForTimeout(100);
46
+ await this.page.keyboard.press('Delete');
47
+ await this.page.waitForTimeout(50);
48
+
49
+ // Type message
50
+ await this.page.keyboard.type(text, { delay: 10 });
51
+ await this.page.keyboard.press('Enter');
52
+ }
53
+
54
+ /**
55
+ * Wait for response
56
+ */
57
+ async waitForResponse() {
58
+ const timeout = config.RESPONSE_TIMEOUT;
59
+ const stableDelay = config.STABLE_DELAY;
60
+ const start = Date.now();
61
+
62
+ // Wait for new content
63
+ await this.page.waitForTimeout(1000);
64
+
65
+ // Poll for stable response
66
+ let lastText = '';
67
+ let stableStart = null;
68
+
69
+ while (Date.now() - start < timeout) {
70
+ const text = await this._extractAnswer();
71
+
72
+ if (text !== lastText && text.length > 0) {
73
+ lastText = text;
74
+ stableStart = Date.now();
75
+ } else if (stableStart && Date.now() - stableStart >= stableDelay) {
76
+ if (!await this._isGenerating()) {
77
+ break;
78
+ }
79
+ stableStart = null;
80
+ }
81
+
82
+ await this.page.waitForTimeout(200);
83
+ }
84
+
85
+ return this._cleanText(lastText);
86
+ }
87
+
88
+ /**
89
+ * Extract answer from page
90
+ */
91
+ async _extractAnswer() {
92
+ return await this.page.evaluate(() => {
93
+ // Try specific selector first
94
+ const answerEl = document.querySelector('#answer_text_id');
95
+ if (answerEl) return answerEl.textContent || '';
96
+
97
+ // Fallback
98
+ const candidates = document.querySelectorAll('[class*="answer"], [class*="response"], [class*="markdown"]');
99
+ for (const el of candidates) {
100
+ const text = el.textContent || '';
101
+ if (text.length > 20) return text;
102
+ }
103
+
104
+ return '';
105
+ });
106
+ }
107
+
108
+ /**
109
+ * Check if still generating
110
+ */
111
+ async _isGenerating() {
112
+ return await this.page.evaluate(() => {
113
+ const stopBtn = document.querySelector('button[aria-label*="停止"]');
114
+ if (stopBtn && stopBtn.offsetParent !== null) return true;
115
+
116
+ const loading = document.querySelector('[class*="loading"], [class*="typing"]');
117
+ if (loading) return true;
118
+
119
+ return false;
120
+ });
121
+ }
122
+
123
+ /**
124
+ * Clean response text
125
+ */
126
+ _cleanText(text) {
127
+ if (!text) return '';
128
+
129
+ // Remove before "准备输出结果"
130
+ const marker = '准备输出结果';
131
+ const idx = text.indexOf(marker);
132
+ if (idx !== -1) {
133
+ text = text.slice(idx + marker.length).trim();
134
+ }
135
+
136
+ // Remove after markers
137
+ const cutMarkers = ['重新生成', '换个回答', '输出更详细的', '再多提供'];
138
+ for (const m of cutMarkers) {
139
+ const cutIdx = text.indexOf(m);
140
+ if (cutIdx !== -1) {
141
+ text = text.slice(0, cutIdx).trim();
142
+ break;
143
+ }
144
+ }
145
+
146
+ return text.trim();
147
+ }
148
+ }
149
+
150
+ module.exports = PageAgent;