yiyan-browser-agent 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yiyan-browser-agent",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "AI coding agent powered by Yiyan (文心一言) via browser automation — no API key needed",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/browser.js CHANGED
@@ -1,5 +1,5 @@
1
1
  // src/browser.js — Playwright controller for Yiyan (yiyan.baidu.com)
2
- // Originally designed for DeepSeek, adapted for Yiyan
2
+ // Performance optimized: smart waits, paste input, MutationObserver
3
3
  'use strict';
4
4
 
5
5
  const { chromium } = require('playwright');
@@ -9,12 +9,9 @@ const logger = require('./logger');
9
9
 
10
10
  // ─────────────────────────────────────────────────────────────────────────────
11
11
  // Selector banks — ordered by likelihood, with fallbacks
12
- // Supports both Yiyan (文心一言) and generic chat UI patterns
13
12
  // ─────────────────────────────────────────────────────────────────────────────
14
13
 
15
14
  const SEL = {
16
- // Text input where the user types
17
- // Yiyan uses contenteditable div with specific class names
18
15
  chatInput: [
19
16
  '.editable__T7WAW4uW',
20
17
  '[role="textbox"]',
@@ -30,8 +27,6 @@ const SEL = {
30
27
  '.input-area textarea',
31
28
  ],
32
29
 
33
- // Button that submits the message
34
- // Yiyan: 发送 button, often with icon
35
30
  sendButton: [
36
31
  'button[aria-label*="Send" i]',
37
32
  'button[aria-label*="发送"]',
@@ -45,8 +40,6 @@ const SEL = {
45
40
  '.send-btn',
46
41
  ],
47
42
 
48
- // "Stop generating" button — visible while streaming
49
- // Yiyan: 停止生成
50
43
  stopButton: [
51
44
  'button[aria-label*="Stop" i]',
52
45
  'button[aria-label*="停止"]',
@@ -57,8 +50,6 @@ const SEL = {
57
50
  '[class*="abort"]',
58
51
  ],
59
52
 
60
- // "New chat" / "New conversation" button in sidebar
61
- // Yiyan: 新对话
62
53
  newChat: [
63
54
  'button[aria-label*="New chat" i]',
64
55
  'button[aria-label*="新对话"]',
@@ -69,7 +60,14 @@ const SEL = {
69
60
  '[class*="newChat"]',
70
61
  ],
71
62
 
72
- // The main chat messages container
63
+ // Response ready indicators
64
+ responseReady: [
65
+ '[class*="answer"]',
66
+ '[class*="response"]',
67
+ '[class*="markdown"]',
68
+ '.ds-markdown',
69
+ ],
70
+
73
71
  messageContainer: [
74
72
  '[class*="chat-content"]',
75
73
  '[class*="message-list"]',
@@ -79,7 +77,7 @@ const SEL = {
79
77
  };
80
78
 
81
79
  // ─────────────────────────────────────────────────────────────────────────────
82
- // YiyanBrowser class
80
+ // YiyanBrowser class (optimized)
83
81
  // ─────────────────────────────────────────────────────────────────────────────
84
82
 
85
83
  class YiyanBrowser {
@@ -114,11 +112,9 @@ class YiyanBrowser {
114
112
  ignoreDefaultArgs: ['--enable-automation'],
115
113
  });
116
114
 
117
- // Grab existing page or open a new one
118
115
  const pages = this.context.pages();
119
116
  this.page = pages.length > 0 ? pages[0] : await this.context.newPage();
120
117
 
121
- // Mask automation signals
122
118
  await this.page.addInitScript(() => {
123
119
  Object.defineProperty(navigator, 'webdriver', { get: () => false });
124
120
  });
@@ -135,26 +131,29 @@ class YiyanBrowser {
135
131
  try { await this.context?.close(); } catch {}
136
132
  }
137
133
 
138
- // ── Navigation ─────────────────────────────────────────────────────────────
134
+ // ── Navigation (optimized: smart wait) ─────────────────────────────────────
139
135
 
140
136
  async _navigate(url) {
141
137
  try {
142
- await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
143
- await this.page.waitForTimeout(1_500);
138
+ // Wait for network idle, then extra wait for dynamic content
139
+ await this.page.goto(url, { waitUntil: 'networkidle', timeout: 30_000 });
140
+ // Give React/Vue SPA time to render
141
+ await this.page.waitForTimeout(800);
144
142
  } catch (err) {
145
143
  logger.warn(`Navigation warning: ${err.message}`);
144
+ // Fallback: longer wait if networkidle fails
145
+ await this.page.waitForTimeout(2000);
146
146
  }
147
147
  }
148
148
 
149
149
  async newChat() {
150
150
  try {
151
- // Try clicking the "New Chat" button in the sidebar
152
151
  for (const sel of SEL.newChat) {
153
152
  try {
154
- const el = await this.page.$(sel);
155
- if (el && await el.isVisible()) {
153
+ const el = await this.page.waitForSelector(sel, { timeout: 2_000, state: 'visible' });
154
+ if (el) {
156
155
  await el.click();
157
- await this.page.waitForTimeout(1_000);
156
+ await this.page.waitForTimeout(500);
158
157
  logger.dim('Started new chat session');
159
158
  return;
160
159
  }
@@ -162,15 +161,16 @@ class YiyanBrowser {
162
161
  }
163
162
  } catch {}
164
163
 
165
- // Fallback: navigate to home which usually opens a fresh chat
166
164
  await this._navigate(config.YIYAN_URL);
167
165
  logger.dim('Navigated to Yiyan home (new chat)');
168
166
  }
169
167
 
170
- // ── Login handling ─────────────────────────────────────────────────────────
168
+ // ── Login handling (optimized) ─────────────────────────────────────────────
171
169
 
172
170
  async _ensureLoggedIn() {
173
- await this.page.waitForTimeout(2_000);
171
+ // Wait for page to be fully interactive
172
+ await this.page.waitForLoadState('domcontentloaded', { timeout: 5_000 }).catch(() => {});
173
+ await this.page.waitForTimeout(500);
174
174
 
175
175
  const needsLogin = await this.page.evaluate(() => {
176
176
  const url = window.location.href;
@@ -190,7 +190,7 @@ class YiyanBrowser {
190
190
  if (needsLogin) {
191
191
  this._printLoginBanner();
192
192
  await this._waitForEnter();
193
- await this.page.waitForTimeout(2_000);
193
+ await this.page.waitForNavigation({ waitUntil: 'networkidle', timeout: 30_000 }).catch(() => {});
194
194
  }
195
195
  }
196
196
 
@@ -228,22 +228,51 @@ class YiyanBrowser {
228
228
  });
229
229
  }
230
230
 
231
- // ── Sending Messages ───────────────────────────────────────────────────────
231
+ // ── Sending Messages (optimized: paste instead of type) ────────────────────
232
232
 
233
233
  async sendMessage(text) {
234
- // Find input element
235
234
  const { el } = await this._findInput();
236
235
 
237
- // Triple click to focus and select all
238
- await el.click({ clickCount: 3, force: true });
239
- await this.page.waitForTimeout(200);
236
+ // Focus the element
237
+ await el.focus();
238
+
239
+ // Clear existing content
240
+ await el.evaluate(e => {
241
+ e.textContent = '';
242
+ if (e.innerText) e.innerText = '';
243
+ });
244
+
245
+ // Use clipboard paste for instant input (much faster than typing)
246
+ await this.page.evaluate(async (text) => {
247
+ // Try modern clipboard API first
248
+ try {
249
+ await navigator.clipboard.writeText(text);
250
+ document.execCommand('paste');
251
+ return;
252
+ } catch {}
253
+
254
+ // Fallback: direct DOM manipulation with proper event dispatch
255
+ const activeEl = document.activeElement;
256
+ if (activeEl && activeEl.isContentEditable) {
257
+ activeEl.textContent = text;
258
+
259
+ // Dispatch input event so Yiyan recognizes the content
260
+ const inputEvent = new InputEvent('input', {
261
+ bubbles: true,
262
+ cancelable: true,
263
+ inputType: 'insertText',
264
+ data: text,
265
+ });
266
+ activeEl.dispatchEvent(inputEvent);
240
267
 
241
- // Clear by pressing Delete
242
- await this.page.keyboard.press('Delete');
243
- await this.page.waitForTimeout(100);
268
+ // Also dispatch change event for good measure
269
+ const changeEvent = new Event('change', { bubbles: true });
270
+ activeEl.dispatchEvent(changeEvent);
271
+ }
272
+ }, text);
244
273
 
245
- // Type text character by character (simulates real user input)
246
- await this.page.keyboard.type(text, { delay: 50 });
274
+ // Small delay to let UI process the input
275
+ await this.page.waitForTimeout(50);
247
276
 
248
277
  // Press Enter to send
249
278
  await this.page.keyboard.press('Enter');
@@ -252,7 +281,8 @@ class YiyanBrowser {
252
281
  async _findInput() {
253
282
  for (const sel of SEL.chatInput) {
254
283
  try {
255
- const el = await this.page.waitForSelector(sel, { timeout: 4_000, state: 'visible' });
284
+ // Increased timeout for initial page load
285
+ const el = await this.page.waitForSelector(sel, { timeout: 8_000, state: 'visible' });
256
286
  if (!el) continue;
257
287
  const tagName = await el.evaluate(e => e.tagName.toLowerCase());
258
288
  const isContentEditable = await el.evaluate(e => e.isContentEditable);
@@ -267,74 +297,73 @@ class YiyanBrowser {
267
297
  );
268
298
  }
269
299
 
270
- async _clickSendButton() {
271
- for (const sel of SEL.sendButton) {
272
- try {
273
- const el = await this.page.$(sel);
274
- if (el && await el.isVisible() && await el.isEnabled()) {
275
- await el.click();
276
- return true;
277
- }
278
- } catch {}
279
- }
280
- return false;
281
- }
300
+ // ── Waiting for Response (optimized: MutationObserver) ────────────────────
282
301
 
283
- // ── Waiting for Response ───────────────────────────────────────────────────
284
-
285
- /**
286
- * Wait until Yiyan finishes generating and return the response text.
287
- *
288
- * Algorithm:
289
- * 1. Record how many assistant messages are on the page right now.
290
- * 2. Wait until a new message appears (count goes up).
291
- * 3. Poll the last message text every 500 ms.
292
- * 4. When the text has not changed for STABLE_DELAY ms AND
293
- * no stop/loading indicator is visible → done.
294
- */
295
302
  async waitForResponse() {
296
303
  const timeout = config.RESPONSE_TIMEOUT;
297
304
  const stableDelay = config.STABLE_DELAY;
298
305
  const start = Date.now();
299
306
 
300
- // ── Phase 1: wait for a new message to appear ──────────────────────────
301
- const initialCount = await this._getMessageCount();
302
- let appeared = false;
307
+ // Use MutationObserver for efficient change detection
308
+ const responseId = await this.page.evaluate(() => {
309
+ let lastText = '';
310
+ let stableStart = null;
311
+ let done = false;
312
+
313
+ // Find response container
314
+ const container = document.querySelector('[class*="answer"], [class*="response"], [class*="markdown"], .ds-markdown, [class*="content"]');
315
+ if (!container) return null;
316
+
317
+ const observer = new MutationObserver(() => {
318
+ const currentText = container.textContent || '';
319
+
320
+ if (currentText !== lastText) {
321
+ lastText = currentText;
322
+ stableStart = Date.now();
323
+ } else if (stableStart && Date.now() - stableStart >= 2500) {
324
+ // Text stable for 2.5s = done
325
+ done = true;
326
+ }
327
+ });
303
328
 
304
- while (Date.now() - start < 12_000) {
305
- const count = await this._getMessageCount();
306
- if (count > initialCount) { appeared = true; break; }
307
- await this.page.waitForTimeout(400);
308
- }
329
+ observer.observe(container, { childList: true, subtree: true, characterData: true });
309
330
 
310
- if (!appeared) logger.warn('Response may have been delayed — continuing to wait...');
331
+ // Store for cleanup
332
+ window.__yiyanObserver = observer;
333
+ window.__yiyanDone = () => done;
334
+ window.__yiyanText = () => lastText;
311
335
 
312
- // ── Phase 2: wait for text to stabilise ───────────────────────────────
313
- let lastText = '';
314
- let stableStart = null;
315
- let dotCount = 0;
336
+ return container.className || 'response-container';
337
+ });
316
338
 
339
+ // Wait for completion signal
340
+ let dotCount = 0;
317
341
  while (Date.now() - start < timeout) {
318
- const text = await this._extractLastMessage();
319
-
320
- if (text !== lastText) {
321
- lastText = text;
322
- stableStart = null;
323
- } else if (text.length > 0) {
324
- if (!stableStart) stableStart = Date.now();
325
- else if (Date.now() - stableStart >= stableDelay) {
326
- if (!await this._isGenerating()) break; // confirmed done
327
- stableStart = null; // still generating, reset
328
- }
342
+ const state = await this.page.evaluate(() => ({
343
+ done : window.__yiyanDone ? window.__yiyanDone() : false,
344
+ text : window.__yiyanText ? window.__yiyanText() : '',
345
+ length: (window.__yiyanText ? window.__yiyanText() : '').length,
346
+ }));
347
+
348
+ if (state.done && state.length > 10 && !await this._isGenerating()) {
349
+ break;
329
350
  }
330
351
 
331
352
  // Progress indicator
332
353
  dotCount = (dotCount + 1) % 4;
333
- logger.thinking(`Receiving response${'.'.repeat(dotCount)} (${text.length} chars)`);
334
-
335
- await this.page.waitForTimeout(500);
354
+ logger.thinking(`Receiving response${'.'.repeat(dotCount)} (${state.length} chars)`);
336
355
  }
337
356
 
357
+ // Cleanup observer
358
+ await this.page.evaluate(() => {
359
+ if (window.__yiyanObserver) {
360
+ window.__yiyanObserver.disconnect();
361
+ delete window.__yiyanObserver;
362
+ delete window.__yiyanDone;
363
+ delete window.__yiyanText;
364
+ }
365
+ });
366
+
338
367
  logger.clearLine();
339
368
 
340
369
  const final = await this._extractLastMessage();
@@ -343,16 +372,13 @@ class YiyanBrowser {
343
372
 
344
373
  // ── DOM Extraction ─────────────────────────────────────────────────────────
345
374
 
346
- /** Count how many "response" blocks are visible */
347
375
  async _getMessageCount() {
348
376
  return await this.page.evaluate(() => {
349
377
  const candidates = [
350
- // Yiyan specific selectors
351
378
  '[class*="answer"]',
352
379
  '[class*="response"]',
353
380
  '[class*="content"]',
354
381
  '[class*="markdown"]',
355
- // Generic selectors
356
382
  '[class*="assistant"][class*="message"]',
357
383
  '[data-role="assistant"]',
358
384
  '[class*="markdown-content"]',
@@ -364,21 +390,14 @@ class YiyanBrowser {
364
390
  const els = document.querySelectorAll(sel);
365
391
  if (els.length > 0) return els.length;
366
392
  }
367
- // Broad fallback - look for any text block that might contain response
368
393
  const textBlocks = Array.from(document.querySelectorAll('div, section, article'))
369
394
  .filter(el => el.innerText && el.innerText.length > 50);
370
395
  return textBlocks.length;
371
396
  });
372
397
  }
373
398
 
374
- /** Extract the text of the last assistant message — including code blocks */
375
399
  async _extractLastMessage() {
376
400
  return await this.page.evaluate(() => {
377
-
378
- // ── Helper: get all text including code blocks ────────────────────────
379
- // Walks the DOM and reconstructs text, re-adding fence markers for code
380
- // blocks so the parser can recognise tool_call fences even after the
381
- // browser markdown renderer has converted them to <pre><code> elements.
382
401
  function getFullText(el) {
383
402
  if (!el) return '';
384
403
  let result = '';
@@ -391,8 +410,6 @@ class YiyanBrowser {
391
410
  if (node.nodeType !== Node.ELEMENT_NODE) return;
392
411
  const tag = node.tagName.toLowerCase();
393
412
 
394
- // <pre> wraps a fenced code block — reconstruct the backtick fence
395
- // so the parser can match the ```tool_call regex.
396
413
  if (tag === 'pre') {
397
414
  const codeEl = node.querySelector('code');
398
415
  if (codeEl) {
@@ -406,7 +423,6 @@ class YiyanBrowser {
406
423
  return;
407
424
  }
408
425
 
409
- // Inline <code> — skip if inside a <pre> (already handled)
410
426
  if (tag === 'code') {
411
427
  const parentTag = node.parentElement && node.parentElement.tagName
412
428
  ? node.parentElement.tagName.toLowerCase() : '';
@@ -427,13 +443,10 @@ class YiyanBrowser {
427
443
  return result.trim();
428
444
  }
429
445
 
430
- // ── Attempt 1: Specific assistant-message selectors ──────────────────
431
446
  const directSelectors = [
432
- // Yiyan specific selectors (百度文心一言)
433
447
  '[class*="answer"]',
434
448
  '[class*="response"]',
435
449
  '[class*="message"][class*="content"]',
436
- // Generic selectors
437
450
  '.ds-markdown',
438
451
  '[class*="assistant"] [class*="markdown"]',
439
452
  '[class*="assistant"] [class*="content"]',
@@ -454,7 +467,6 @@ class YiyanBrowser {
454
467
  } catch {}
455
468
  }
456
469
 
457
- // ── Attempt 2: Any markdown/prose container ───────────────────────────
458
470
  try {
459
471
  const markdownEls = document.querySelectorAll(
460
472
  '[class*="markdown"], [class*="prose"], [class*="rendered"], [class*="content"]'
@@ -465,7 +477,6 @@ class YiyanBrowser {
465
477
  }
466
478
  } catch {}
467
479
 
468
- // ── Attempt 3: Heuristic — large non-user text blocks ────────────────
469
480
  try {
470
481
  const allBlocks = Array.from(
471
482
  document.querySelectorAll('[class*="message"], [class*="chat-item"], [class*="turn"], [class*="answer"], [class*="content"]')
@@ -488,19 +499,15 @@ class YiyanBrowser {
488
499
  }
489
500
  } catch {}
490
501
 
491
- // ── Attempt 4: Any large text block on page (last resort) ─────────────
492
502
  try {
493
503
  const allDivs = Array.from(document.querySelectorAll('div, section'));
494
504
  const textBlocks = allDivs.filter(el => {
495
505
  const text = el.innerText || '';
496
- // Exclude input areas and user messages
497
506
  const cls = el.className || '';
498
507
  if (cls.includes('input') || cls.includes('editable') || cls.includes('user')) return false;
499
- // Look for blocks with substantial text that might be AI response
500
508
  return text.length > 50 && !el.querySelector('textarea, [contenteditable]');
501
509
  });
502
510
  if (textBlocks.length > 0) {
503
- // Sort by text length, prefer longer blocks (likely complete responses)
504
511
  textBlocks.sort((a, b) => (b.innerText || '').length - (a.innerText || '').length);
505
512
  return getFullText(textBlocks[0]);
506
513
  }
@@ -510,10 +517,8 @@ class YiyanBrowser {
510
517
  });
511
518
  }
512
519
 
513
- /** True if Yiyan is still streaming / generating */
514
520
  async _isGenerating() {
515
521
  return await this.page.evaluate(() => {
516
- // Check for stop button
517
522
  const stopSelectors = [
518
523
  'button[aria-label*="Stop" i]',
519
524
  '[class*="stop-gen"]',
@@ -528,7 +533,6 @@ class YiyanBrowser {
528
533
  }
529
534
  }
530
535
 
531
- // Check for animated loading/typing indicators
532
536
  const loaderSelectors = [
533
537
  '[class*="typing"]',
534
538
  '[class*="loading"]',
@@ -557,23 +561,15 @@ class YiyanBrowser {
557
561
  if (!text) return '';
558
562
 
559
563
  return text
560
- // Strip AI thinking blocks (DeepSeek R1 / Yiyan patterns)
561
564
  .replace(/<think>[\s\S]*?<\/think>\n?/gi, '')
562
- // Strip "Thinking..." headers that sometimes prefix responses
563
565
  .replace(/^Thinking\.{0,3}\n[\s\S]*?\n\n/m, '')
564
- // Strip copy-code button artifacts like "1CopyRunInsert"
565
566
  .replace(/^\d+(?:Copy|Run|Insert|Edit)\b.*$/gm, '')
566
- // Collapse 3+ blank lines into 2
567
567
  .replace(/\n{3,}/g, '\n\n')
568
568
  .trim();
569
569
  }
570
570
 
571
571
  // ── Debug / Calibration Utilities ─────────────────────────────────────────
572
572
 
573
- /**
574
- * Dump useful DOM information to stdout.
575
- * Called by `node src/calibrate.js` or `--debug` flag.
576
- */
577
573
  async dumpDebugInfo() {
578
574
  const info = await this.page.evaluate(() => {
579
575
  const classFreq = {};
@@ -614,11 +610,10 @@ class YiyanBrowser {
614
610
  console.log('═'.repeat(60) + '\n');
615
611
  }
616
612
 
617
- /** Take a screenshot (for debugging) */
618
613
  async screenshot(filePath = '/tmp/yiyan-agent-debug.png') {
619
614
  await this.page.screenshot({ path: filePath, fullPage: false });
620
615
  logger.info(`Screenshot saved: ${filePath}`);
621
616
  }
622
617
  }
623
618
 
624
- module.exports = YiyanBrowser;
619
+ module.exports = YiyanBrowser;
package/src/config.js CHANGED
@@ -12,10 +12,10 @@ const defaults = {
12
12
  SESSION_DIR : path.join(os.homedir(), '.yiyan-agent', 'session'),
13
13
  HEADLESS : false,
14
14
 
15
- // Timing
15
+ // Timing (optimized for faster response detection)
16
16
  RESPONSE_TIMEOUT : 180_000,
17
- STABLE_DELAY : 2_500,
18
- SEND_DELAY : 400,
17
+ STABLE_DELAY : 1_500, // Reduced from 2500ms (MutationObserver is faster)
18
+ SEND_DELAY : 100, // Reduced from 400ms (paste is instant)
19
19
 
20
20
  // Agent
21
21
  MAX_ITERATIONS : 40,