yiyan-browser-agent 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/browser.js ADDED
@@ -0,0 +1,624 @@
1
+ // src/browser.js — Playwright controller for Yiyan (yiyan.baidu.com)
2
+ // Originally designed for DeepSeek, adapted for Yiyan
3
+ 'use strict';
4
+
5
+ const { chromium } = require('playwright');
6
+ const path = require('path');
7
+ const config = require('./config');
8
+ const logger = require('./logger');
9
+
10
+ // ─────────────────────────────────────────────────────────────────────────────
11
+ // Selector banks — ordered by likelihood, with fallbacks
12
+ // Supports both Yiyan (文心一言) and generic chat UI patterns
13
+ // ─────────────────────────────────────────────────────────────────────────────
14
+
15
+ const SEL = {
16
+ // Text input where the user types
17
+ // Yiyan uses contenteditable div with specific class names
18
+ chatInput: [
19
+ '.editable__T7WAW4uW',
20
+ '[role="textbox"]',
21
+ '.editable',
22
+ '#chat-input',
23
+ 'textarea[placeholder]',
24
+ 'textarea',
25
+ '[contenteditable="true"][role="textbox"]',
26
+ '[contenteditable="true"]',
27
+ '[class*="input-box"]',
28
+ '[class*="chatInput"]',
29
+ '[class*="editor"]',
30
+ '.input-area textarea',
31
+ ],
32
+
33
+ // Button that submits the message
34
+ // Yiyan: 发送 button, often with icon
35
+ sendButton: [
36
+ 'button[aria-label*="Send" i]',
37
+ 'button[aria-label*="发送"]',
38
+ 'button[aria-label*="send" i]',
39
+ '[data-testid="send-button"]',
40
+ 'button[type="submit"]',
41
+ '[class*="send-btn"]',
42
+ '[class*="sendBtn"]',
43
+ '[class*="send-button"]',
44
+ '[class*="submit"]',
45
+ '.send-btn',
46
+ ],
47
+
48
+ // "Stop generating" button — visible while streaming
49
+ // Yiyan: 停止生成
50
+ stopButton: [
51
+ 'button[aria-label*="Stop" i]',
52
+ 'button[aria-label*="停止"]',
53
+ '[aria-label*="stop generating" i]',
54
+ '[data-testid="stop-button"]',
55
+ '[class*="stop-btn"]',
56
+ '[class*="stopBtn"]',
57
+ '[class*="abort"]',
58
+ ],
59
+
60
+ // "New chat" / "New conversation" button in sidebar
61
+ // Yiyan: 新对话
62
+ newChat: [
63
+ 'button[aria-label*="New chat" i]',
64
+ 'button[aria-label*="新对话"]',
65
+ 'button[aria-label*="New conversation" i]',
66
+ 'a[href="/"][aria-label]',
67
+ '[data-testid="new-chat"]',
68
+ '[class*="new-chat"]',
69
+ '[class*="newChat"]',
70
+ ],
71
+
72
+ // The main chat messages container
73
+ messageContainer: [
74
+ '[class*="chat-content"]',
75
+ '[class*="message-list"]',
76
+ '[class*="conversation"]',
77
+ 'main',
78
+ ],
79
+ };
80
+
81
+ // ─────────────────────────────────────────────────────────────────────────────
82
+ // YiyanBrowser class
83
+ // ─────────────────────────────────────────────────────────────────────────────
84
+
85
+ class YiyanBrowser {
86
+ constructor() {
87
+ this.context = null;
88
+ this.page = null;
89
+ this._closed = false;
90
+ }
91
+
92
+ // ── Lifecycle ──────────────────────────────────────────────────────────────
93
+
94
+ async launch() {
95
+ logger.info('Launching browser with persistent session...');
96
+
97
+ const sessionDir = path.resolve(config.SESSION_DIR);
98
+
99
+ this.context = await chromium.launchPersistentContext(sessionDir, {
100
+ headless : config.HEADLESS,
101
+ viewport : { width: 1280, height: 900 },
102
+ userAgent : [
103
+ 'Mozilla/5.0 (X11; Linux x86_64)',
104
+ 'AppleWebKit/537.36 (KHTML, like Gecko)',
105
+ 'Chrome/124.0.0.0 Safari/537.36',
106
+ ].join(' '),
107
+ args: [
108
+ '--disable-blink-features=AutomationControlled',
109
+ '--no-first-run',
110
+ '--disable-default-apps',
111
+ '--no-sandbox',
112
+ '--disable-setuid-sandbox',
113
+ ],
114
+ ignoreDefaultArgs: ['--enable-automation'],
115
+ });
116
+
117
+ // Grab existing page or open a new one
118
+ const pages = this.context.pages();
119
+ this.page = pages.length > 0 ? pages[0] : await this.context.newPage();
120
+
121
+ // Mask automation signals
122
+ await this.page.addInitScript(() => {
123
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
124
+ });
125
+
126
+ await this._navigate(config.YIYAN_URL);
127
+ await this._ensureLoggedIn();
128
+
129
+ logger.success('Browser ready!');
130
+ }
131
+
132
+ async close() {
133
+ if (this._closed) return;
134
+ this._closed = true;
135
+ try { await this.context?.close(); } catch {}
136
+ }
137
+
138
+ // ── Navigation ─────────────────────────────────────────────────────────────
139
+
140
+ async _navigate(url) {
141
+ try {
142
+ await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30_000 });
143
+ await this.page.waitForTimeout(1_500);
144
+ } catch (err) {
145
+ logger.warn(`Navigation warning: ${err.message}`);
146
+ }
147
+ }
148
+
149
+ async newChat() {
150
+ try {
151
+ // Try clicking the "New Chat" button in the sidebar
152
+ for (const sel of SEL.newChat) {
153
+ try {
154
+ const el = await this.page.$(sel);
155
+ if (el && await el.isVisible()) {
156
+ await el.click();
157
+ await this.page.waitForTimeout(1_000);
158
+ logger.dim('Started new chat session');
159
+ return;
160
+ }
161
+ } catch {}
162
+ }
163
+ } catch {}
164
+
165
+ // Fallback: navigate to home which usually opens a fresh chat
166
+ await this._navigate(config.YIYAN_URL);
167
+ logger.dim('Navigated to Yiyan home (new chat)');
168
+ }
169
+
170
+ // ── Login handling ─────────────────────────────────────────────────────────
171
+
172
+ async _ensureLoggedIn() {
173
+ await this.page.waitForTimeout(2_000);
174
+
175
+ const needsLogin = await this.page.evaluate(() => {
176
+ const url = window.location.href;
177
+ const bodyText = document.body?.innerText || '';
178
+ return (
179
+ url.includes('/auth') ||
180
+ url.includes('/login') ||
181
+ url.includes('/sign') ||
182
+ bodyText.includes('Sign in') ||
183
+ bodyText.includes('Log in') ||
184
+ bodyText.includes('登录') ||
185
+ bodyText.includes('登 录') ||
186
+ !!document.querySelector('input[type="password"]')
187
+ );
188
+ });
189
+
190
+ if (needsLogin) {
191
+ this._printLoginBanner();
192
+ await this._waitForEnter();
193
+ await this.page.waitForTimeout(2_000);
194
+ }
195
+ }
196
+
197
+ _printLoginBanner() {
198
+ console.log('');
199
+ logger.warn('╔══════════════════════════════════════════════╗');
200
+ logger.warn('║ 🔐 LOGIN REQUIRED ║');
201
+ logger.warn('║ ║');
202
+ logger.warn('║ 1. Log in to Yiyan (文心一言) in browser ║');
203
+ logger.warn('║ 2. Return here and press ENTER to continue║');
204
+ logger.warn('╚══════════════════════════════════════════════╝');
205
+ console.log('');
206
+ }
207
+
208
+ async _waitForEnter() {
209
+ return new Promise(resolve => {
210
+ const stdin = process.stdin;
211
+ const wasRaw = stdin.isRaw;
212
+ const wasPaused = !stdin.readable;
213
+
214
+ if (stdin.isTTY) stdin.setRawMode(false);
215
+ stdin.resume();
216
+
217
+ const handler = chunk => {
218
+ const s = chunk.toString();
219
+ if (s.includes('\n') || s.includes('\r')) {
220
+ stdin.removeListener('data', handler);
221
+ if (stdin.isTTY && wasRaw) stdin.setRawMode(true);
222
+ if (wasPaused) stdin.pause();
223
+ resolve();
224
+ }
225
+ };
226
+
227
+ stdin.on('data', handler);
228
+ });
229
+ }
230
+
231
+ // ── Sending Messages ───────────────────────────────────────────────────────
232
+
233
+ async sendMessage(text) {
234
+ // Find input element
235
+ const { el } = await this._findInput();
236
+
237
+ // Triple click to focus and select all
238
+ await el.click({ clickCount: 3, force: true });
239
+ await this.page.waitForTimeout(200);
240
+
241
+ // Clear by pressing Delete
242
+ await this.page.keyboard.press('Delete');
243
+ await this.page.waitForTimeout(100);
244
+
245
+ // Type text character by character (simulates real user input)
246
+ await this.page.keyboard.type(text, { delay: 50 });
247
+
248
+ // Press Enter to send
249
+ await this.page.keyboard.press('Enter');
250
+ }
251
+
252
+ async _findInput() {
253
+ for (const sel of SEL.chatInput) {
254
+ try {
255
+ const el = await this.page.waitForSelector(sel, { timeout: 4_000, state: 'visible' });
256
+ if (!el) continue;
257
+ const tagName = await el.evaluate(e => e.tagName.toLowerCase());
258
+ const isContentEditable = await el.evaluate(e => e.isContentEditable);
259
+ return { el, isTextarea: tagName === 'textarea' && !isContentEditable };
260
+ } catch {}
261
+ }
262
+ throw new Error(
263
+ 'Cannot find the Yiyan chat input box.\n' +
264
+ ' → Make sure the page is fully loaded and you are logged in.\n' +
265
+ ' → Run with --debug to inspect DOM selectors.\n' +
266
+ ' → Run: node src/calibrate.js to auto-detect selectors.'
267
+ );
268
+ }
269
+
270
+ async _clickSendButton() {
271
+ for (const sel of SEL.sendButton) {
272
+ try {
273
+ const el = await this.page.$(sel);
274
+ if (el && await el.isVisible() && await el.isEnabled()) {
275
+ await el.click();
276
+ return true;
277
+ }
278
+ } catch {}
279
+ }
280
+ return false;
281
+ }
282
+
283
+ // ── Waiting for Response ───────────────────────────────────────────────────
284
+
285
+ /**
286
+ * Wait until Yiyan finishes generating and return the response text.
287
+ *
288
+ * Algorithm:
289
+ * 1. Record how many assistant messages are on the page right now.
290
+ * 2. Wait until a new message appears (count goes up).
291
+ * 3. Poll the last message text every 500 ms.
292
+ * 4. When the text has not changed for STABLE_DELAY ms AND
293
+ * no stop/loading indicator is visible → done.
294
+ */
295
+ async waitForResponse() {
296
+ const timeout = config.RESPONSE_TIMEOUT;
297
+ const stableDelay = config.STABLE_DELAY;
298
+ const start = Date.now();
299
+
300
+ // ── Phase 1: wait for a new message to appear ──────────────────────────
301
+ const initialCount = await this._getMessageCount();
302
+ let appeared = false;
303
+
304
+ while (Date.now() - start < 12_000) {
305
+ const count = await this._getMessageCount();
306
+ if (count > initialCount) { appeared = true; break; }
307
+ await this.page.waitForTimeout(400);
308
+ }
309
+
310
+ if (!appeared) logger.warn('Response may have been delayed — continuing to wait...');
311
+
312
+ // ── Phase 2: wait for text to stabilise ───────────────────────────────
313
+ let lastText = '';
314
+ let stableStart = null;
315
+ let dotCount = 0;
316
+
317
+ while (Date.now() - start < timeout) {
318
+ const text = await this._extractLastMessage();
319
+
320
+ if (text !== lastText) {
321
+ lastText = text;
322
+ stableStart = null;
323
+ } else if (text.length > 0) {
324
+ if (!stableStart) stableStart = Date.now();
325
+ else if (Date.now() - stableStart >= stableDelay) {
326
+ if (!await this._isGenerating()) break; // confirmed done
327
+ stableStart = null; // still generating, reset
328
+ }
329
+ }
330
+
331
+ // Progress indicator
332
+ dotCount = (dotCount + 1) % 4;
333
+ logger.thinking(`Receiving response${'.'.repeat(dotCount)} (${text.length} chars)`);
334
+
335
+ await this.page.waitForTimeout(500);
336
+ }
337
+
338
+ logger.clearLine();
339
+
340
+ const final = await this._extractLastMessage();
341
+ return this._cleanText(final);
342
+ }
343
+
344
+ // ── DOM Extraction ─────────────────────────────────────────────────────────
345
+
346
+ /** Count how many "response" blocks are visible */
347
+ async _getMessageCount() {
348
+ return await this.page.evaluate(() => {
349
+ const candidates = [
350
+ // Yiyan specific selectors
351
+ '[class*="answer"]',
352
+ '[class*="response"]',
353
+ '[class*="content"]',
354
+ '[class*="markdown"]',
355
+ // Generic selectors
356
+ '[class*="assistant"][class*="message"]',
357
+ '[data-role="assistant"]',
358
+ '[class*="markdown-content"]',
359
+ '.ds-markdown',
360
+ '[class*="chat-message"]',
361
+ '[class*="message-bubble"]',
362
+ ];
363
+ for (const sel of candidates) {
364
+ const els = document.querySelectorAll(sel);
365
+ if (els.length > 0) return els.length;
366
+ }
367
+ // Broad fallback - look for any text block that might contain response
368
+ const textBlocks = Array.from(document.querySelectorAll('div, section, article'))
369
+ .filter(el => el.innerText && el.innerText.length > 50);
370
+ return textBlocks.length;
371
+ });
372
+ }
373
+
374
+ /** Extract the text of the last assistant message — including code blocks */
375
+ async _extractLastMessage() {
376
+ return await this.page.evaluate(() => {
377
+
378
+ // ── Helper: get all text including code blocks ────────────────────────
379
+ // Walks the DOM and reconstructs text, re-adding fence markers for code
380
+ // blocks so the parser can recognise tool_call fences even after the
381
+ // browser markdown renderer has converted them to <pre><code> elements.
382
+ function getFullText(el) {
383
+ if (!el) return '';
384
+ let result = '';
385
+
386
+ function walk(node) {
387
+ if (node.nodeType === Node.TEXT_NODE) {
388
+ result += node.textContent;
389
+ return;
390
+ }
391
+ if (node.nodeType !== Node.ELEMENT_NODE) return;
392
+ const tag = node.tagName.toLowerCase();
393
+
394
+ // <pre> wraps a fenced code block — reconstruct the backtick fence
395
+ // so the parser can match the ```tool_call regex.
396
+ if (tag === 'pre') {
397
+ const codeEl = node.querySelector('code');
398
+ if (codeEl) {
399
+ const cls = codeEl.className || '';
400
+ const lang = (cls.match(/language-(\S+)/) || [])[1] || '';
401
+ const body = codeEl.textContent || '';
402
+ result += '\n```' + lang + '\n' + body + '\n```\n';
403
+ } else {
404
+ result += '\n```\n' + node.textContent + '\n```\n';
405
+ }
406
+ return;
407
+ }
408
+
409
+ // Inline <code> — skip if inside a <pre> (already handled)
410
+ if (tag === 'code') {
411
+ const parentTag = node.parentElement && node.parentElement.tagName
412
+ ? node.parentElement.tagName.toLowerCase() : '';
413
+ if (parentTag !== 'pre') {
414
+ result += '`' + node.textContent + '`';
415
+ }
416
+ return;
417
+ }
418
+
419
+ for (const child of node.childNodes) walk(child);
420
+
421
+ if (['p','div','li','br','h1','h2','h3','h4','h5','h6'].includes(tag)) {
422
+ result += '\n';
423
+ }
424
+ }
425
+
426
+ walk(el);
427
+ return result.trim();
428
+ }
429
+
430
+ // ── Attempt 1: Specific assistant-message selectors ──────────────────
431
+ const directSelectors = [
432
+ // Yiyan specific selectors (百度文心一言)
433
+ '[class*="answer"]',
434
+ '[class*="response"]',
435
+ '[class*="message"][class*="content"]',
436
+ // Generic selectors
437
+ '.ds-markdown',
438
+ '[class*="assistant"] [class*="markdown"]',
439
+ '[class*="assistant"] [class*="content"]',
440
+ '[data-role="assistant"] [class*="content"]',
441
+ '[class*="ai-message"] [class*="content"]',
442
+ '[class*="bot-message"] [class*="content"]',
443
+ '[class*="response-content"]',
444
+ '[class*="message-content"]:last-child',
445
+ ];
446
+
447
+ for (const sel of directSelectors) {
448
+ try {
449
+ const els = document.querySelectorAll(sel);
450
+ if (els.length > 0) {
451
+ const t = getFullText(els[els.length - 1]);
452
+ if (t.length > 10) return t;
453
+ }
454
+ } catch {}
455
+ }
456
+
457
+ // ── Attempt 2: Any markdown/prose container ───────────────────────────
458
+ try {
459
+ const markdownEls = document.querySelectorAll(
460
+ '[class*="markdown"], [class*="prose"], [class*="rendered"], [class*="content"]'
461
+ );
462
+ if (markdownEls.length > 0) {
463
+ const t = getFullText(markdownEls[markdownEls.length - 1]);
464
+ if (t.length > 10) return t;
465
+ }
466
+ } catch {}
467
+
468
+ // ── Attempt 3: Heuristic — large non-user text blocks ────────────────
469
+ try {
470
+ const allBlocks = Array.from(
471
+ document.querySelectorAll('[class*="message"], [class*="chat-item"], [class*="turn"], [class*="answer"], [class*="content"]')
472
+ );
473
+ const candidates = allBlocks.filter(el => {
474
+ const cls = el.className || '';
475
+ const id = el.id || '';
476
+ return (
477
+ !cls.toLowerCase().includes('input') &&
478
+ !cls.toLowerCase().includes('user') &&
479
+ !cls.toLowerCase().includes('editable') &&
480
+ !id.toLowerCase().includes('input') &&
481
+ !el.querySelector('textarea, input[type="text"], [contenteditable="true"]') &&
482
+ (el.innerText || '').length > 20
483
+ );
484
+ });
485
+
486
+ if (candidates.length > 0) {
487
+ return getFullText(candidates[candidates.length - 1]);
488
+ }
489
+ } catch {}
490
+
491
+ // ── Attempt 4: Any large text block on page (last resort) ─────────────
492
+ try {
493
+ const allDivs = Array.from(document.querySelectorAll('div, section'));
494
+ const textBlocks = allDivs.filter(el => {
495
+ const text = el.innerText || '';
496
+ // Exclude input areas and user messages
497
+ const cls = el.className || '';
498
+ if (cls.includes('input') || cls.includes('editable') || cls.includes('user')) return false;
499
+ // Look for blocks with substantial text that might be AI response
500
+ return text.length > 50 && !el.querySelector('textarea, [contenteditable]');
501
+ });
502
+ if (textBlocks.length > 0) {
503
+ // Sort by text length, prefer longer blocks (likely complete responses)
504
+ textBlocks.sort((a, b) => (b.innerText || '').length - (a.innerText || '').length);
505
+ return getFullText(textBlocks[0]);
506
+ }
507
+ } catch {}
508
+
509
+ return '';
510
+ });
511
+ }
512
+
513
+ /** True if Yiyan is still streaming / generating */
514
+ async _isGenerating() {
515
+ return await this.page.evaluate(() => {
516
+ // Check for stop button
517
+ const stopSelectors = [
518
+ 'button[aria-label*="Stop" i]',
519
+ '[class*="stop-gen"]',
520
+ '[class*="stopGen"]',
521
+ '[class*="generating"]',
522
+ ];
523
+ for (const sel of stopSelectors) {
524
+ const el = document.querySelector(sel);
525
+ if (el) {
526
+ const s = window.getComputedStyle(el);
527
+ if (s.display !== 'none' && s.visibility !== 'hidden' && s.opacity !== '0') return true;
528
+ }
529
+ }
530
+
531
+ // Check for animated loading/typing indicators
532
+ const loaderSelectors = [
533
+ '[class*="typing"]',
534
+ '[class*="loading"]',
535
+ '[class*="spinner"]',
536
+ '[class*="blink"]',
537
+ '[class*="cursor"]',
538
+ '[class*="pulsing"]',
539
+ 'svg[class*="loading"]',
540
+ 'svg[class*="spinner"]',
541
+ ];
542
+ for (const sel of loaderSelectors) {
543
+ const el = document.querySelector(sel);
544
+ if (el) {
545
+ const s = window.getComputedStyle(el);
546
+ if (s.display !== 'none' && s.visibility !== 'hidden') return true;
547
+ }
548
+ }
549
+
550
+ return false;
551
+ });
552
+ }
553
+
554
+ // ── Text Cleanup ───────────────────────────────────────────────────────────
555
+
556
+ _cleanText(text) {
557
+ if (!text) return '';
558
+
559
+ return text
560
+ // Strip AI thinking blocks (DeepSeek R1 / Yiyan patterns)
561
+ .replace(/<think>[\s\S]*?<\/think>\n?/gi, '')
562
+ // Strip "Thinking..." headers that sometimes prefix responses
563
+ .replace(/^Thinking\.{0,3}\n[\s\S]*?\n\n/m, '')
564
+ // Strip copy-code button artifacts like "1CopyRunInsert"
565
+ .replace(/^\d+(?:Copy|Run|Insert|Edit)\b.*$/gm, '')
566
+ // Collapse 3+ blank lines into 2
567
+ .replace(/\n{3,}/g, '\n\n')
568
+ .trim();
569
+ }
570
+
571
+ // ── Debug / Calibration Utilities ─────────────────────────────────────────
572
+
573
+ /**
574
+ * Dump useful DOM information to stdout.
575
+ * Called by `node src/calibrate.js` or `--debug` flag.
576
+ */
577
+ async dumpDebugInfo() {
578
+ const info = await this.page.evaluate(() => {
579
+ const classFreq = {};
580
+ document.querySelectorAll('*').forEach(el => {
581
+ el.classList.forEach(c => {
582
+ if (c.match(/message|chat|input|send|stop|markdown|content|assistant|user|bot/i)) {
583
+ classFreq[c] = (classFreq[c] || 0) + 1;
584
+ }
585
+ });
586
+ });
587
+
588
+ const inputs = Array.from(document.querySelectorAll('textarea, [contenteditable]')).map(e => ({
589
+ tag : e.tagName,
590
+ id : e.id || null,
591
+ class : e.className?.slice(0, 80) || null,
592
+ placeholder : e.placeholder || null,
593
+ editable : e.isContentEditable,
594
+ visible : e.offsetParent !== null,
595
+ }));
596
+
597
+ return {
598
+ url : window.location.href,
599
+ title : document.title,
600
+ classes: Object.entries(classFreq).sort((a, b) => b[1] - a[1]).slice(0, 40),
601
+ inputs,
602
+ };
603
+ });
604
+
605
+ console.log('\n' + '═'.repeat(60));
606
+ console.log(' DOM DEBUG INFO');
607
+ console.log('═'.repeat(60));
608
+ console.log('URL :', info.url);
609
+ console.log('Title :', info.title);
610
+ console.log('\nInput elements:');
611
+ info.inputs.forEach(i => console.log(' ', JSON.stringify(i)));
612
+ console.log('\nMatching CSS classes (by frequency):');
613
+ info.classes.forEach(([cls, count]) => console.log(` ${String(count).padStart(3)}x .${cls}`));
614
+ console.log('═'.repeat(60) + '\n');
615
+ }
616
+
617
+ /** Take a screenshot (for debugging) */
618
+ async screenshot(filePath = '/tmp/yiyan-agent-debug.png') {
619
+ await this.page.screenshot({ path: filePath, fullPage: false });
620
+ logger.info(`Screenshot saved: ${filePath}`);
621
+ }
622
+ }
623
+
624
+ module.exports = YiyanBrowser;