yiyan-browser-agent 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yiyan-browser-agent",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "description": "AI coding agent powered by Yiyan (文心一言) via browser automation — no API key needed",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/browser.js CHANGED
@@ -1,5 +1,5 @@
1
1
  // src/browser.js — Playwright controller for Yiyan (yiyan.baidu.com)
2
- // Originally designed for DeepSeek, adapted for Yiyan
2
+ // Performance optimized: smart waits, paste input, MutationObserver
3
3
  'use strict';
4
4
 
5
5
  const { chromium } = require('playwright');
@@ -9,12 +9,9 @@ const logger = require('./logger');
9
9
 
10
10
  // ─────────────────────────────────────────────────────────────────────────────
11
11
  // Selector banks — ordered by likelihood, with fallbacks
12
- // Supports both Yiyan (文心一言) and generic chat UI patterns
13
12
  // ─────────────────────────────────────────────────────────────────────────────
14
13
 
15
14
  const SEL = {
16
- // Text input where the user types
17
- // Yiyan uses contenteditable div with specific class names
18
15
  chatInput: [
19
16
  '.editable__T7WAW4uW',
20
17
  '[role="textbox"]',
@@ -30,8 +27,6 @@ const SEL = {
30
27
  '.input-area textarea',
31
28
  ],
32
29
 
33
- // Button that submits the message
34
- // Yiyan: 发送 button, often with icon
35
30
  sendButton: [
36
31
  'button[aria-label*="Send" i]',
37
32
  'button[aria-label*="发送"]',
@@ -45,8 +40,6 @@ const SEL = {
45
40
  '.send-btn',
46
41
  ],
47
42
 
48
- // "Stop generating" button — visible while streaming
49
- // Yiyan: 停止生成
50
43
  stopButton: [
51
44
  'button[aria-label*="Stop" i]',
52
45
  'button[aria-label*="停止"]',
@@ -57,8 +50,6 @@ const SEL = {
57
50
  '[class*="abort"]',
58
51
  ],
59
52
 
60
- // "New chat" / "New conversation" button in sidebar
61
- // Yiyan: 新对话
62
53
  newChat: [
63
54
  'button[aria-label*="New chat" i]',
64
55
  'button[aria-label*="新对话"]',
@@ -69,7 +60,14 @@ const SEL = {
69
60
  '[class*="newChat"]',
70
61
  ],
71
62
 
72
- // The main chat messages container
63
+ // Response ready indicators
64
+ responseReady: [
65
+ '[class*="answer"]',
66
+ '[class*="response"]',
67
+ '[class*="markdown"]',
68
+ '.ds-markdown',
69
+ ],
70
+
73
71
  messageContainer: [
74
72
  '[class*="chat-content"]',
75
73
  '[class*="message-list"]',
@@ -79,7 +77,7 @@ const SEL = {
79
77
  };
80
78
 
81
79
  // ─────────────────────────────────────────────────────────────────────────────
82
- // YiyanBrowser class
80
+ // YiyanBrowser class (optimized)
83
81
  // ─────────────────────────────────────────────────────────────────────────────
84
82
 
85
83
  class YiyanBrowser {
@@ -114,11 +112,9 @@ class YiyanBrowser {
114
112
  ignoreDefaultArgs: ['--enable-automation'],
115
113
  });
116
114
 
117
- // Grab existing page or open a new one
118
115
  const pages = this.context.pages();
119
116
  this.page = pages.length > 0 ? pages[0] : await this.context.newPage();
120
117
 
121
- // Mask automation signals
122
118
  await this.page.addInitScript(() => {
123
119
  Object.defineProperty(navigator, 'webdriver', { get: () => false });
124
120
  });
@@ -135,12 +131,12 @@ class YiyanBrowser {
135
131
  try { await this.context?.close(); } catch {}
136
132
  }
137
133
 
138
- // ── Navigation ─────────────────────────────────────────────────────────────
134
+ // ── Navigation (optimized: smart wait) ─────────────────────────────────────
139
135
 
140
136
  async _navigate(url) {
141
137
  try {
142
- await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
143
- await this.page.waitForTimeout(1_500);
138
+ // Wait for network idle instead of fixed timeout
139
+ await this.page.goto(url, { waitUntil: 'networkidle', timeout: 30_000 });
144
140
  } catch (err) {
145
141
  logger.warn(`Navigation warning: ${err.message}`);
146
142
  }
@@ -148,13 +144,13 @@ class YiyanBrowser {
148
144
 
149
145
  async newChat() {
150
146
  try {
151
- // Try clicking the "New Chat" button in the sidebar
152
147
  for (const sel of SEL.newChat) {
153
148
  try {
154
- const el = await this.page.$(sel);
155
- if (el && await el.isVisible()) {
149
+ const el = await this.page.waitForSelector(sel, { timeout: 2_000, state: 'visible' });
150
+ if (el) {
156
151
  await el.click();
157
- await this.page.waitForTimeout(1_000);
152
+ // Smart wait for URL change or page update
153
+ await this.page.waitForLoadState('domcontentloaded', { timeout: 3_000 }).catch(() => {});
158
154
  logger.dim('Started new chat session');
159
155
  return;
160
156
  }
@@ -162,15 +158,15 @@ class YiyanBrowser {
162
158
  }
163
159
  } catch {}
164
160
 
165
- // Fallback: navigate to home which usually opens a fresh chat
166
161
  await this._navigate(config.YIYAN_URL);
167
162
  logger.dim('Navigated to Yiyan home (new chat)');
168
163
  }
169
164
 
170
- // ── Login handling ─────────────────────────────────────────────────────────
165
+ // ── Login handling (optimized) ─────────────────────────────────────────────
171
166
 
172
167
  async _ensureLoggedIn() {
173
- await this.page.waitForTimeout(2_000);
168
+ // Smart wait for page to be interactive
169
+ await this.page.waitForLoadState('domcontentloaded', { timeout: 5_000 }).catch(() => {});
174
170
 
175
171
  const needsLogin = await this.page.evaluate(() => {
176
172
  const url = window.location.href;
@@ -190,7 +186,8 @@ class YiyanBrowser {
190
186
  if (needsLogin) {
191
187
  this._printLoginBanner();
192
188
  await this._waitForEnter();
193
- await this.page.waitForTimeout(2_000);
189
+ // Wait for navigation after login
190
+ await this.page.waitForNavigation({ waitUntil: 'networkidle', timeout: 30_000 }).catch(() => {});
194
191
  }
195
192
  }
196
193
 
@@ -228,22 +225,51 @@ class YiyanBrowser {
228
225
  });
229
226
  }
230
227
 
231
- // ── Sending Messages ───────────────────────────────────────────────────────
228
+ // ── Sending Messages (optimized: paste instead of type) ────────────────────
232
229
 
233
230
  async sendMessage(text) {
234
- // Find input element
235
231
  const { el } = await this._findInput();
236
232
 
237
- // Triple click to focus and select all
238
- await el.click({ clickCount: 3, force: true });
239
- await this.page.waitForTimeout(200);
233
+ // Focus the element
234
+ await el.focus();
235
+
236
+ // Clear existing content
237
+ await el.evaluate(e => {
238
+ e.textContent = '';
239
+ if (e.innerText) e.innerText = '';
240
+ });
241
+
242
+ // Use clipboard paste for instant input (much faster than typing)
243
+ await this.page.evaluate(async (text) => {
244
+ // Try modern clipboard API first
245
+ try {
246
+ await navigator.clipboard.writeText(text);
247
+ document.execCommand('paste');
248
+ return;
249
+ } catch {}
250
+
251
+ // Fallback: direct DOM manipulation with proper event dispatch
252
+ const activeEl = document.activeElement;
253
+ if (activeEl && activeEl.isContentEditable) {
254
+ activeEl.textContent = text;
255
+
256
+ // Dispatch input event so Yiyan recognizes the content
257
+ const inputEvent = new InputEvent('input', {
258
+ bubbles: true,
259
+ cancelable: true,
260
+ inputType: 'insertText',
261
+ data: text,
262
+ });
263
+ activeEl.dispatchEvent(inputEvent);
240
264
 
241
- // Clear by pressing Delete
242
- await this.page.keyboard.press('Delete');
243
- await this.page.waitForTimeout(100);
265
+ // Also dispatch change event for good measure
266
+ const changeEvent = new Event('change', { bubbles: true });
267
+ activeEl.dispatchEvent(changeEvent);
268
+ }
269
+ }, text);
244
270
 
245
- // Type text character by character (simulates real user input)
246
- await this.page.keyboard.type(text, { delay: 50 });
271
+ // Small delay to let UI process the input
272
+ await this.page.waitForTimeout(50);
247
273
 
248
274
  // Press Enter to send
249
275
  await this.page.keyboard.press('Enter');
@@ -252,7 +278,7 @@ class YiyanBrowser {
252
278
  async _findInput() {
253
279
  for (const sel of SEL.chatInput) {
254
280
  try {
255
- const el = await this.page.waitForSelector(sel, { timeout: 4_000, state: 'visible' });
281
+ const el = await this.page.waitForSelector(sel, { timeout: 3_000, state: 'visible' });
256
282
  if (!el) continue;
257
283
  const tagName = await el.evaluate(e => e.tagName.toLowerCase());
258
284
  const isContentEditable = await el.evaluate(e => e.isContentEditable);
@@ -267,74 +293,73 @@ class YiyanBrowser {
267
293
  );
268
294
  }
269
295
 
270
- async _clickSendButton() {
271
- for (const sel of SEL.sendButton) {
272
- try {
273
- const el = await this.page.$(sel);
274
- if (el && await el.isVisible() && await el.isEnabled()) {
275
- await el.click();
276
- return true;
277
- }
278
- } catch {}
279
- }
280
- return false;
281
- }
296
+ // ── Waiting for Response (optimized: MutationObserver) ────────────────────
282
297
 
283
- // ── Waiting for Response ───────────────────────────────────────────────────
284
-
285
- /**
286
- * Wait until Yiyan finishes generating and return the response text.
287
- *
288
- * Algorithm:
289
- * 1. Record how many assistant messages are on the page right now.
290
- * 2. Wait until a new message appears (count goes up).
291
- * 3. Poll the last message text every 500 ms.
292
- * 4. When the text has not changed for STABLE_DELAY ms AND
293
- * no stop/loading indicator is visible → done.
294
- */
295
298
  async waitForResponse() {
296
299
  const timeout = config.RESPONSE_TIMEOUT;
297
300
  const stableDelay = config.STABLE_DELAY;
298
301
  const start = Date.now();
299
302
 
300
- // ── Phase 1: wait for a new message to appear ──────────────────────────
301
- const initialCount = await this._getMessageCount();
302
- let appeared = false;
303
+ // Use MutationObserver for efficient change detection
304
+ const responseId = await this.page.evaluate(() => {
305
+ let lastText = '';
306
+ let stableStart = null;
307
+ let done = false;
308
+
309
+ // Find response container
310
+ const container = document.querySelector('[class*="answer"], [class*="response"], [class*="markdown"], .ds-markdown, [class*="content"]');
311
+ if (!container) return null;
312
+
313
+ const observer = new MutationObserver(() => {
314
+ const currentText = container.textContent || '';
315
+
316
+ if (currentText !== lastText) {
317
+ lastText = currentText;
318
+ stableStart = Date.now();
319
+ } else if (stableStart && Date.now() - stableStart >= 2500) {
320
+ // Text stable for 2.5s = done
321
+ done = true;
322
+ }
323
+ });
303
324
 
304
- while (Date.now() - start < 12_000) {
305
- const count = await this._getMessageCount();
306
- if (count > initialCount) { appeared = true; break; }
307
- await this.page.waitForTimeout(400);
308
- }
325
+ observer.observe(container, { childList: true, subtree: true, characterData: true });
309
326
 
310
- if (!appeared) logger.warn('Response may have been delayed — continuing to wait...');
327
+ // Store for cleanup
328
+ window.__yiyanObserver = observer;
329
+ window.__yiyanDone = () => done;
330
+ window.__yiyanText = () => lastText;
311
331
 
312
- // ── Phase 2: wait for text to stabilise ───────────────────────────────
313
- let lastText = '';
314
- let stableStart = null;
315
- let dotCount = 0;
332
+ return container.className || 'response-container';
333
+ });
316
334
 
335
+ // Wait for completion signal
336
+ let dotCount = 0;
317
337
  while (Date.now() - start < timeout) {
318
- const text = await this._extractLastMessage();
319
-
320
- if (text !== lastText) {
321
- lastText = text;
322
- stableStart = null;
323
- } else if (text.length > 0) {
324
- if (!stableStart) stableStart = Date.now();
325
- else if (Date.now() - stableStart >= stableDelay) {
326
- if (!await this._isGenerating()) break; // confirmed done
327
- stableStart = null; // still generating, reset
328
- }
338
+ const state = await this.page.evaluate(() => ({
339
+ done : window.__yiyanDone ? window.__yiyanDone() : false,
340
+ text : window.__yiyanText ? window.__yiyanText() : '',
341
+ length: (window.__yiyanText ? window.__yiyanText() : '').length,
342
+ }));
343
+
344
+ if (state.done && state.length > 10 && !await this._isGenerating()) {
345
+ break;
329
346
  }
330
347
 
331
348
  // Progress indicator
332
349
  dotCount = (dotCount + 1) % 4;
333
- logger.thinking(`Receiving response${'.'.repeat(dotCount)} (${text.length} chars)`);
334
-
335
- await this.page.waitForTimeout(500);
350
+ logger.thinking(`Receiving response${'.'.repeat(dotCount)} (${state.length} chars)`);
336
351
  }
337
352
 
353
+ // Cleanup observer
354
+ await this.page.evaluate(() => {
355
+ if (window.__yiyanObserver) {
356
+ window.__yiyanObserver.disconnect();
357
+ delete window.__yiyanObserver;
358
+ delete window.__yiyanDone;
359
+ delete window.__yiyanText;
360
+ }
361
+ });
362
+
338
363
  logger.clearLine();
339
364
 
340
365
  const final = await this._extractLastMessage();
@@ -343,16 +368,13 @@ class YiyanBrowser {
343
368
 
344
369
  // ── DOM Extraction ─────────────────────────────────────────────────────────
345
370
 
346
- /** Count how many "response" blocks are visible */
347
371
  async _getMessageCount() {
348
372
  return await this.page.evaluate(() => {
349
373
  const candidates = [
350
- // Yiyan specific selectors
351
374
  '[class*="answer"]',
352
375
  '[class*="response"]',
353
376
  '[class*="content"]',
354
377
  '[class*="markdown"]',
355
- // Generic selectors
356
378
  '[class*="assistant"][class*="message"]',
357
379
  '[data-role="assistant"]',
358
380
  '[class*="markdown-content"]',
@@ -364,21 +386,14 @@ class YiyanBrowser {
364
386
  const els = document.querySelectorAll(sel);
365
387
  if (els.length > 0) return els.length;
366
388
  }
367
- // Broad fallback - look for any text block that might contain response
368
389
  const textBlocks = Array.from(document.querySelectorAll('div, section, article'))
369
390
  .filter(el => el.innerText && el.innerText.length > 50);
370
391
  return textBlocks.length;
371
392
  });
372
393
  }
373
394
 
374
- /** Extract the text of the last assistant message — including code blocks */
375
395
  async _extractLastMessage() {
376
396
  return await this.page.evaluate(() => {
377
-
378
- // ── Helper: get all text including code blocks ────────────────────────
379
- // Walks the DOM and reconstructs text, re-adding fence markers for code
380
- // blocks so the parser can recognise tool_call fences even after the
381
- // browser markdown renderer has converted them to <pre><code> elements.
382
397
  function getFullText(el) {
383
398
  if (!el) return '';
384
399
  let result = '';
@@ -391,8 +406,6 @@ class YiyanBrowser {
391
406
  if (node.nodeType !== Node.ELEMENT_NODE) return;
392
407
  const tag = node.tagName.toLowerCase();
393
408
 
394
- // <pre> wraps a fenced code block — reconstruct the backtick fence
395
- // so the parser can match the ```tool_call regex.
396
409
  if (tag === 'pre') {
397
410
  const codeEl = node.querySelector('code');
398
411
  if (codeEl) {
@@ -406,7 +419,6 @@ class YiyanBrowser {
406
419
  return;
407
420
  }
408
421
 
409
- // Inline <code> — skip if inside a <pre> (already handled)
410
422
  if (tag === 'code') {
411
423
  const parentTag = node.parentElement && node.parentElement.tagName
412
424
  ? node.parentElement.tagName.toLowerCase() : '';
@@ -427,13 +439,10 @@ class YiyanBrowser {
427
439
  return result.trim();
428
440
  }
429
441
 
430
- // ── Attempt 1: Specific assistant-message selectors ──────────────────
431
442
  const directSelectors = [
432
- // Yiyan specific selectors (百度文心一言)
433
443
  '[class*="answer"]',
434
444
  '[class*="response"]',
435
445
  '[class*="message"][class*="content"]',
436
- // Generic selectors
437
446
  '.ds-markdown',
438
447
  '[class*="assistant"] [class*="markdown"]',
439
448
  '[class*="assistant"] [class*="content"]',
@@ -454,7 +463,6 @@ class YiyanBrowser {
454
463
  } catch {}
455
464
  }
456
465
 
457
- // ── Attempt 2: Any markdown/prose container ───────────────────────────
458
466
  try {
459
467
  const markdownEls = document.querySelectorAll(
460
468
  '[class*="markdown"], [class*="prose"], [class*="rendered"], [class*="content"]'
@@ -465,7 +473,6 @@ class YiyanBrowser {
465
473
  }
466
474
  } catch {}
467
475
 
468
- // ── Attempt 3: Heuristic — large non-user text blocks ────────────────
469
476
  try {
470
477
  const allBlocks = Array.from(
471
478
  document.querySelectorAll('[class*="message"], [class*="chat-item"], [class*="turn"], [class*="answer"], [class*="content"]')
@@ -488,19 +495,15 @@ class YiyanBrowser {
488
495
  }
489
496
  } catch {}
490
497
 
491
- // ── Attempt 4: Any large text block on page (last resort) ─────────────
492
498
  try {
493
499
  const allDivs = Array.from(document.querySelectorAll('div, section'));
494
500
  const textBlocks = allDivs.filter(el => {
495
501
  const text = el.innerText || '';
496
- // Exclude input areas and user messages
497
502
  const cls = el.className || '';
498
503
  if (cls.includes('input') || cls.includes('editable') || cls.includes('user')) return false;
499
- // Look for blocks with substantial text that might be AI response
500
504
  return text.length > 50 && !el.querySelector('textarea, [contenteditable]');
501
505
  });
502
506
  if (textBlocks.length > 0) {
503
- // Sort by text length, prefer longer blocks (likely complete responses)
504
507
  textBlocks.sort((a, b) => (b.innerText || '').length - (a.innerText || '').length);
505
508
  return getFullText(textBlocks[0]);
506
509
  }
@@ -510,10 +513,8 @@ class YiyanBrowser {
510
513
  });
511
514
  }
512
515
 
513
- /** True if Yiyan is still streaming / generating */
514
516
  async _isGenerating() {
515
517
  return await this.page.evaluate(() => {
516
- // Check for stop button
517
518
  const stopSelectors = [
518
519
  'button[aria-label*="Stop" i]',
519
520
  '[class*="stop-gen"]',
@@ -528,7 +529,6 @@ class YiyanBrowser {
528
529
  }
529
530
  }
530
531
 
531
- // Check for animated loading/typing indicators
532
532
  const loaderSelectors = [
533
533
  '[class*="typing"]',
534
534
  '[class*="loading"]',
@@ -557,23 +557,15 @@ class YiyanBrowser {
557
557
  if (!text) return '';
558
558
 
559
559
  return text
560
- // Strip AI thinking blocks (DeepSeek R1 / Yiyan patterns)
561
560
  .replace(/<think>[\s\S]*?<\/think>\n?/gi, '')
562
- // Strip "Thinking..." headers that sometimes prefix responses
563
561
  .replace(/^Thinking\.{0,3}\n[\s\S]*?\n\n/m, '')
564
- // Strip copy-code button artifacts like "1CopyRunInsert"
565
562
  .replace(/^\d+(?:Copy|Run|Insert|Edit)\b.*$/gm, '')
566
- // Collapse 3+ blank lines into 2
567
563
  .replace(/\n{3,}/g, '\n\n')
568
564
  .trim();
569
565
  }
570
566
 
571
567
  // ── Debug / Calibration Utilities ─────────────────────────────────────────
572
568
 
573
- /**
574
- * Dump useful DOM information to stdout.
575
- * Called by `node src/calibrate.js` or `--debug` flag.
576
- */
577
569
  async dumpDebugInfo() {
578
570
  const info = await this.page.evaluate(() => {
579
571
  const classFreq = {};
@@ -614,11 +606,10 @@ class YiyanBrowser {
614
606
  console.log('═'.repeat(60) + '\n');
615
607
  }
616
608
 
617
- /** Take a screenshot (for debugging) */
618
609
  async screenshot(filePath = '/tmp/yiyan-agent-debug.png') {
619
610
  await this.page.screenshot({ path: filePath, fullPage: false });
620
611
  logger.info(`Screenshot saved: ${filePath}`);
621
612
  }
622
613
  }
623
614
 
624
- module.exports = YiyanBrowser;
615
+ module.exports = YiyanBrowser;
package/src/config.js CHANGED
@@ -12,10 +12,10 @@ const defaults = {
12
12
  SESSION_DIR : path.join(os.homedir(), '.yiyan-agent', 'session'),
13
13
  HEADLESS : false,
14
14
 
15
- // Timing
15
+ // Timing (optimized for faster response detection)
16
16
  RESPONSE_TIMEOUT : 180_000,
17
- STABLE_DELAY : 2_500,
18
- SEND_DELAY : 400,
17
+ STABLE_DELAY : 1_500, // Reduced from 2500ms (MutationObserver is faster)
18
+ SEND_DELAY : 100, // Reduced from 400ms (paste is instant)
19
19
 
20
20
  // Agent
21
21
  MAX_ITERATIONS : 40,