yiyan-browser-agent 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -30,9 +30,48 @@ var YIYAN_CHAT_URL = "https://yiyan.baidu.com/";
30
30
  // src/browser.ts
31
31
  import { chromium } from "playwright";
32
32
  import fs from "fs";
33
+ var SELECTORS = {
34
+ // 输入框选择器(按优先级排序)
35
+ chatInput: [
36
+ "#chat-input",
37
+ 'textarea[placeholder*="\u8F93\u5165"]',
38
+ "textarea[placeholder]",
39
+ "textarea",
40
+ '[contenteditable="true"][role="textbox"]',
41
+ '[contenteditable="true"]'
42
+ ],
43
+ // 发送按钮选择器
44
+ sendButton: [
45
+ 'button[aria-label*="\u53D1\u9001"]',
46
+ 'button[aria-label*="Send"]',
47
+ '[data-testid="send-button"]',
48
+ 'button[type="submit"]',
49
+ '[class*="send-btn"]',
50
+ '[class*="sendBtn"]',
51
+ '[class*="send-button"]'
52
+ ],
53
+ // 响应容器选择器
54
+ responseContainer: [
55
+ '[class*="answerBox"]',
56
+ '[class*="response"]',
57
+ '[class*="ai-message"]',
58
+ '[class*="assistant"]',
59
+ '[class*="markdown"]',
60
+ '[class*="message-content"]',
61
+ '[class*="chat-message"]'
62
+ ],
63
+ // 停止生成按钮
64
+ stopButton: [
65
+ 'button[aria-label*="\u505C\u6B62"]',
66
+ 'button[aria-label*="Stop"]',
67
+ '[class*="stop-btn"]',
68
+ '[class*="stopBtn"]',
69
+ '[class*="stop-gen"]'
70
+ ]
71
+ };
33
72
  function log(verbose, msg) {
34
73
  if (verbose) {
35
- process.stderr.write(`[yiyan-agent] ${msg}
74
+ process.stderr.write(`[yiyan-browser-agent] ${msg}
36
75
  `);
37
76
  }
38
77
  }
@@ -85,326 +124,310 @@ async function closeBrowser(context, verbose) {
85
124
  }
86
125
  }
87
126
  async function navigateToYiyan(page, verbose = false) {
88
- log(verbose, "\u5BFC\u822A\u5230\u6587\u5FC3\u4E00\u8A00\u804A\u5929\u9875\u9762...");
127
+ log(verbose, "\u5BFC\u822A\u5230\u6587\u5FC3\u4E00\u8A00...");
89
128
  try {
90
- await page.goto(YIYAN_CHAT_URL, {
91
- waitUntil: "domcontentloaded",
92
- timeout: 3e4
93
- });
129
+ await page.goto(YIYAN_CHAT_URL, { waitUntil: "domcontentloaded", timeout: 3e4 });
94
130
  await page.waitForTimeout(1500);
95
131
  log(verbose, "\u9875\u9762\u52A0\u8F7D\u5B8C\u6210");
96
132
  } catch (err) {
97
133
  log(verbose, `\u5BFC\u822A\u8B66\u544A: ${err instanceof Error ? err.message : String(err)}`);
98
134
  }
99
135
  }
100
- async function checkCaptcha(page, verbose = false) {
101
- const captchaIndicators = [
102
- "\u8BF7\u5B8C\u6210\u4E0B\u5217\u9A8C\u8BC1\u540E\u7EE7\u7EED",
103
- "\u6309\u4F4F\u5DE6\u8FB9\u6309\u94AE\u62D6\u52A8",
104
- "\u6ED1\u52A8\u9A8C\u8BC1",
105
- "\u70B9\u51FB\u9A8C\u8BC1",
106
- "\u5B89\u5168\u9A8C\u8BC1",
107
- "captcha",
108
- "\u8BF7\u5B8C\u6210\u9A8C\u8BC1"
109
- ];
110
- const hasCaptcha = await page.evaluate((indicators) => {
111
- const bodyText = document.body.innerText || "";
112
- for (const indicator of indicators) {
113
- if (bodyText.includes(indicator)) return true;
114
- }
115
- const dialogs = document.querySelectorAll('[role="dialog"], [class*="captcha"], [class*="verify"]');
116
- if (dialogs.length > 0) return true;
117
- return false;
118
- }, captchaIndicators);
119
- if (hasCaptcha) {
120
- log(verbose, "\u26A0 \u68C0\u6D4B\u5230\u9A8C\u8BC1\u7801\u5F39\u7A97\uFF01");
121
- }
122
- return hasCaptcha;
123
- }
124
136
  async function checkLoggedIn(page, verbose = false) {
137
+ await page.waitForTimeout(2e3);
125
138
  const isLoggedIn = await page.evaluate(() => {
126
- const bodyText = document.body.innerText || "";
127
- if (bodyText.includes("\u672A\u767B\u5F55")) return false;
128
- const loginButtons = document.querySelectorAll("button");
129
- for (const btn of loginButtons) {
130
- const text = btn.textContent?.trim() || "";
131
- if (text === "\u767B\u5F55") {
132
- return false;
133
- }
134
- }
139
+ const url = window.location.href;
140
+ const bodyText = document.body?.innerText || "";
141
+ if (url.includes("/auth") || url.includes("/login") || url.includes("/sign")) return false;
142
+ if (bodyText.includes("\u672A\u767B\u5F55") || bodyText.includes("\u8BF7\u767B\u5F55")) return false;
143
+ if (document.querySelector('input[type="password"]')) return false;
144
+ const loginBtn = document.querySelector("button");
145
+ if (loginBtn && loginBtn.textContent?.trim() === "\u767B\u5F55") return false;
135
146
  return true;
136
147
  });
137
- if (isLoggedIn) {
138
- log(verbose, "\u2713 \u5DF2\u68C0\u6D4B\u5230\u767B\u5F55\u72B6\u6001");
139
- } else {
140
- log(verbose, "\u26A0 \u672A\u68C0\u6D4B\u5230\u767B\u5F55\u72B6\u6001");
141
- }
148
+ log(verbose, isLoggedIn ? "\u2713 \u5DF2\u767B\u5F55" : "\u26A0 \u672A\u767B\u5F55");
142
149
  return isLoggedIn;
143
150
  }
151
+ async function checkCaptcha(page, verbose = false) {
152
+ const hasCaptcha = await page.evaluate(() => {
153
+ const bodyText = document.body.innerText || "";
154
+ const indicators = ["\u9A8C\u8BC1\u7801", "\u8BF7\u5B8C\u6210\u9A8C\u8BC1", "\u5B89\u5168\u9A8C\u8BC1", "\u6ED1\u52A8\u9A8C\u8BC1", "captcha"];
155
+ for (const kw of indicators) {
156
+ if (bodyText.includes(kw)) return true;
157
+ }
158
+ const dialogs = document.querySelectorAll('[role="dialog"], [class*="captcha"], [class*="verify"]');
159
+ return dialogs.length > 0;
160
+ });
161
+ if (hasCaptcha) log(verbose, "\u26A0 \u68C0\u6D4B\u5230\u9A8C\u8BC1\u7801\uFF01");
162
+ return hasCaptcha;
163
+ }
144
164
  async function waitForUserAction(page, reason, verbose = false) {
145
- const reasonText = reason === "captcha" ? "\u68C0\u6D4B\u5230\u9A8C\u8BC1\u7801\uFF0C\u8BF7\u5728\u6D4F\u89C8\u5668\u4E2D\u624B\u52A8\u5B8C\u6210\u9A8C\u8BC1" : reason === "login" ? "\u68C0\u6D4B\u5230\u672A\u767B\u5F55\uFF0C\u8BF7\u5728\u6D4F\u89C8\u5668\u4E2D\u624B\u52A8\u767B\u5F55" : "AI \u672A\u56DE\u590D\uFF0C\u8BF7\u68C0\u67E5\u6D4F\u89C8\u5668\u662F\u5426\u9700\u8981\u624B\u52A8\u64CD\u4F5C\uFF08\u9A8C\u8BC1\u7801/\u767B\u5F55\uFF09";
165
+ const messages = {
166
+ captcha: "\u68C0\u6D4B\u5230\u9A8C\u8BC1\u7801\uFF0C\u8BF7\u5728\u6D4F\u89C8\u5668\u4E2D\u624B\u52A8\u5B8C\u6210\u9A8C\u8BC1",
167
+ login: "\u68C0\u6D4B\u5230\u672A\u767B\u5F55\uFF0C\u8BF7\u5728\u6D4F\u89C8\u5668\u4E2D\u624B\u52A8\u767B\u5F55",
168
+ "no-reply": "AI \u672A\u56DE\u590D\uFF0C\u8BF7\u68C0\u67E5\u6D4F\u89C8\u5668\u662F\u5426\u9700\u8981\u624B\u52A8\u64CD\u4F5C"
169
+ };
146
170
  process.stderr.write(`
147
- [yiyan-agent] \u26A0 ${reasonText}
171
+ [yiyan-browser-agent] \u26A0 ${messages[reason]}
148
172
  `);
149
- process.stderr.write("[yiyan-agent] \u7B49\u5F85\u60A8\u64CD\u4F5C\u5B8C\u6210...\uFF08\u64CD\u4F5C\u5B8C\u6210\u540E\u4F1A\u81EA\u52A8\u7EE7\u7EED\uFF09\n\n");
173
+ process.stderr.write("[yiyan-browser-agent] \u7B49\u5F85\u60A8\u64CD\u4F5C\u5B8C\u6210...\uFF08\u64CD\u4F5C\u5B8C\u6210\u540E\u4F1A\u81EA\u52A8\u7EE7\u7EED\uFF09\n\n");
150
174
  const maxWait = 18e4;
151
- const startTime = Date.now();
152
- while (Date.now() - startTime < maxWait) {
175
+ const start = Date.now();
176
+ while (Date.now() - start < maxWait) {
153
177
  await page.waitForTimeout(2e3);
154
- if (reason === "captcha") {
155
- const stillHasCaptcha = await checkCaptcha(page, false);
156
- if (!stillHasCaptcha) {
157
- log(verbose, "\u2713 \u9A8C\u8BC1\u7801\u5DF2\u901A\u8FC7\uFF01");
158
- return;
159
- }
160
- } else if (reason === "login") {
161
- const loggedIn = await checkLoggedIn(page, false);
162
- if (loggedIn) {
163
- log(verbose, "\u2713 \u767B\u5F55\u6210\u529F\uFF01");
164
- return;
165
- }
166
- } else {
167
- const hasReply = await page.evaluate(() => {
168
- const answerBox = document.querySelector('[class*="answerBox"]');
169
- if (!answerBox) return false;
170
- const text = answerBox.innerText?.trim() || "";
171
- return text.length > 0;
172
- });
173
- if (hasReply) {
174
- log(verbose, "\u2713 AI \u5DF2\u5F00\u59CB\u56DE\u590D\uFF01");
175
- return;
176
- }
178
+ if (reason === "captcha" && !await checkCaptcha(page, false)) {
179
+ log(verbose, "\u2713 \u9A8C\u8BC1\u7801\u5DF2\u901A\u8FC7\uFF01");
180
+ return;
181
+ }
182
+ if (reason === "login" && await checkLoggedIn(page, false)) {
183
+ log(verbose, "\u2713 \u767B\u5F55\u6210\u529F\uFF01");
184
+ return;
185
+ }
186
+ if (reason === "no-reply" && await getMessageCount(page) > 0) {
187
+ log(verbose, "\u2713 AI \u5DF2\u5F00\u59CB\u56DE\u590D\uFF01");
188
+ return;
177
189
  }
178
190
  }
179
191
  log(verbose, "\u26A0 \u7B49\u5F85\u7528\u6237\u64CD\u4F5C\u8D85\u65F6\uFF083\u5206\u949F\uFF09");
180
192
  }
181
193
  async function sendMessage(page, message, verbose = false, headful = false) {
182
- const inputSelectors = [
183
- '[contenteditable="true"][role="textbox"]',
184
- '[contenteditable="true"]',
185
- "textarea[placeholder]",
186
- "textarea"
187
- ];
188
- log(verbose, "\u7B49\u5F85\u8F93\u5165\u6846\u51FA\u73B0...");
189
- let inputElement = null;
190
- for (const sel of inputSelectors) {
191
- try {
192
- inputElement = await page.waitForSelector(sel, { timeout: 4e3, state: "visible" });
193
- if (inputElement) {
194
- log(verbose, `\u2713 \u8F93\u5165\u6846\u5DF2\u627E\u5230: ${sel}`);
195
- break;
196
- }
197
- } catch {
198
- }
199
- }
200
- if (!inputElement) {
194
+ const input = await findInput(page, verbose);
195
+ if (!input) {
201
196
  throw new YiyanAgentError("NETWORK", "Cannot find the Yiyan chat input box");
202
197
  }
203
- log(verbose, "\u70B9\u51FB\u8F93\u5165\u6846\u83B7\u53D6\u7126\u70B9...");
204
- await inputElement.click({ force: true });
205
- await page.waitForTimeout(500);
206
- log(verbose, `\u8F93\u5165\u95EE\u9898: "${message}"`);
207
- await inputElement.fill(message);
208
- await page.waitForTimeout(500);
209
- const filledValue = await inputElement.innerText();
210
- if (!filledValue.includes(message)) {
211
- log(verbose, "fill \u5931\u8D25\uFF0C\u5C1D\u8BD5 keyboard.type...");
212
- await inputElement.click({ force: true });
213
- await page.keyboard.press("Control+a");
214
- await page.keyboard.type(message, { delay: 50 });
215
- await page.waitForTimeout(500);
198
+ await input.click({ force: true });
199
+ await page.waitForTimeout(200);
200
+ await page.keyboard.press("Control+a");
201
+ await page.waitForTimeout(100);
202
+ const isTextarea = await input.evaluate((el) => el.tagName.toLowerCase() === "textarea");
203
+ if (isTextarea) {
204
+ await input.fill(message);
205
+ } else {
206
+ await input.evaluate((el, content) => {
207
+ el.focus();
208
+ document.execCommand("selectAll", false, null);
209
+ document.execCommand("delete", false, null);
210
+ document.execCommand("insertText", false, content);
211
+ el.dispatchEvent(new InputEvent("input", { bubbles: true, data: content }));
212
+ }, message);
213
+ }
214
+ await page.waitForTimeout(400);
215
+ log(verbose, `\u5DF2\u8F93\u5165\u95EE\u9898: "${message.substring(0, 50)}..."`);
216
+ const sent = await clickSendButton(page);
217
+ if (!sent) {
218
+ await page.keyboard.press("Enter");
216
219
  }
217
- log(verbose, "\u6309 Enter \u53D1\u9001\u6D88\u606F...");
218
- await page.keyboard.press("Enter");
219
- await page.waitForTimeout(3e3);
220
+ await page.waitForTimeout(500);
220
221
  if (await checkCaptcha(page, verbose)) {
221
222
  if (headful) {
222
223
  await waitForUserAction(page, "captcha", verbose);
223
224
  } else {
224
- throw new YiyanAgentError(
225
- "CAPTCHA",
226
- "Yiyan detected automation and triggered a captcha. Use --headful to manually solve it."
227
- );
225
+ throw new YiyanAgentError("CAPTCHA", "Captcha detected. Use --headful to solve it.");
228
226
  }
229
227
  }
230
228
  }
231
- async function waitForReply(page, timeout, verbose = false, headful = false) {
232
- const maxWait = Math.min(timeout, 6e4);
233
- log(verbose, `\u7B49\u5F85 AI \u56DE\u590D\uFF08\u6700\u591A ${maxWait / 1e3} \u79D2\uFF09...`);
234
- const startTime = Date.now();
235
- let lastLen = 0;
236
- let stableCount = 0;
237
- let replyStarted = false;
238
- while (Date.now() - startTime < maxWait) {
239
- const state = await page.evaluate(() => {
240
- const answerBox = document.querySelector('[class*="answerBox"]');
241
- if (answerBox) {
242
- const text = answerBox.innerText?.trim() || "";
243
- return {
244
- hasAiReply: text.length > 0,
245
- aiTextLen: text.length,
246
- aiPreview: text.substring(0, 100)
247
- };
248
- }
249
- const cardList = document.querySelector('[class*="dialogueCardList"]');
250
- if (cardList) {
251
- const lastCard = cardList.lastElementChild;
252
- if (lastCard) {
253
- const text = lastCard.innerText?.trim() || "";
254
- return {
255
- hasAiReply: text.length > 0,
256
- aiTextLen: text.length,
257
- aiPreview: text.substring(0, 100)
258
- };
259
- }
229
+ async function findInput(page, verbose = false) {
230
+ for (const sel of SELECTORS.chatInput) {
231
+ try {
232
+ const el = await page.waitForSelector(sel, { timeout: 4e3, state: "visible" });
233
+ if (el) {
234
+ log(verbose, `\u2713 \u8F93\u5165\u6846: ${sel}`);
235
+ return el;
260
236
  }
261
- return { hasAiReply: false, aiTextLen: 0, aiPreview: "" };
262
- });
263
- if (state.hasAiReply && state.aiTextLen > 0) {
264
- if (!replyStarted) {
265
- log(verbose, `\u2713 AI \u56DE\u590D\u5DF2\u5F00\u59CB\u751F\u6210\uFF08${state.aiTextLen}\u5B57\uFF09`);
266
- replyStarted = true;
237
+ } catch {
238
+ }
239
+ }
240
+ return null;
241
+ }
242
+ async function clickSendButton(page) {
243
+ for (const sel of SELECTORS.sendButton) {
244
+ try {
245
+ const el = await page.$(sel);
246
+ if (el && await el.isVisible() && await el.isEnabled()) {
247
+ await el.click();
248
+ return true;
267
249
  }
268
- if (state.aiTextLen === lastLen) {
269
- stableCount++;
270
- if (stableCount >= 3) {
271
- log(verbose, `\u2713 AI \u56DE\u590D\u5DF2\u5B8C\u6210\uFF08${state.aiTextLen}\u5B57\uFF09`);
250
+ } catch {
251
+ }
252
+ }
253
+ return false;
254
+ }
255
+ async function waitForReply(page, timeout, verbose = false, headful = false) {
256
+ const maxWait = Math.min(timeout, 12e4);
257
+ const stableDelay = 2500;
258
+ const start = Date.now();
259
+ const initialCount = await getMessageCount(page);
260
+ let appeared = false;
261
+ while (Date.now() - start < 12e3) {
262
+ const count = await getMessageCount(page);
263
+ if (count > initialCount) {
264
+ appeared = true;
265
+ log(verbose, "\u2713 AI \u56DE\u590D\u5DF2\u5F00\u59CB\u751F\u6210");
266
+ break;
267
+ }
268
+ await page.waitForTimeout(400);
269
+ }
270
+ if (!appeared) {
271
+ log(verbose, "\u56DE\u590D\u53EF\u80FD\u5EF6\u8FDF\uFF0C\u7EE7\u7EED\u7B49\u5F85...");
272
+ }
273
+ let lastText = "";
274
+ let stableStart = null;
275
+ while (Date.now() - start < maxWait) {
276
+ const text = await extractLastMessage(page);
277
+ if (text !== lastText) {
278
+ lastText = text;
279
+ stableStart = null;
280
+ } else if (text.length > 0) {
281
+ if (!stableStart) stableStart = Date.now();
282
+ else if (Date.now() - stableStart >= stableDelay) {
283
+ if (!await isGenerating(page)) {
284
+ log(verbose, `\u2713 AI \u56DE\u590D\u5DF2\u5B8C\u6210\uFF08${text.length}\u5B57\uFF09`);
272
285
  break;
273
286
  }
274
- } else {
275
- stableCount = 0;
276
- lastLen = state.aiTextLen;
287
+ stableStart = null;
277
288
  }
278
289
  }
279
290
  await page.waitForTimeout(500);
280
291
  }
281
- if (!replyStarted) {
292
+ if (lastText.length === 0) {
282
293
  if (await checkCaptcha(page, verbose)) {
283
294
  if (headful) {
284
295
  await waitForUserAction(page, "captcha", verbose);
285
296
  } else {
286
- throw new YiyanAgentError(
287
- "CAPTCHA",
288
- "Yiyan detected automation and triggered a captcha."
289
- );
297
+ throw new YiyanAgentError("CAPTCHA", "Captcha detected.");
290
298
  }
291
299
  } else if (headful) {
292
300
  await waitForUserAction(page, "no-reply", verbose);
293
301
  } else {
294
- throw new YiyanAgentError(
295
- "TIMEOUT",
296
- "AI reply timeout in headless mode. Try running login first, or use headed mode."
297
- );
302
+ throw new YiyanAgentError("TIMEOUT", "AI reply timeout.");
298
303
  }
299
304
  }
300
305
  await page.waitForTimeout(1e3);
301
306
  }
302
- async function extractReply(page, question, verbose = false) {
303
- log(verbose, "\u63D0\u53D6 AI \u56DE\u590D\u5185\u5BB9...");
304
- const reply = await page.evaluate((userQuestion) => {
305
- const debugInfo = [];
306
- const answerBox = document.querySelector('[class*="answerBox"]');
307
- debugInfo.push(`answerBox found: ${!!answerBox}`);
308
- if (answerBox) {
309
- const clone = answerBox.cloneNode(true);
310
- const processItems = clone.querySelectorAll('[class*="processItem"], [class*="processContent"]');
311
- for (const item of processItems) item.remove();
312
- const toolMessages = clone.querySelectorAll('[class*="toolMessage"]');
313
- for (const msg of toolMessages) msg.remove();
314
- const thinkHeaders = clone.querySelectorAll('[class*="headerMask"], [class*="topHeader"]');
315
- for (const h of thinkHeaders) h.remove();
316
- const text = clone.innerText?.trim() || "";
317
- const cleanedText = text.replace(/^参考\d+个网页\s*/, "").replace(/^深度思考已完成\s*/, "").replace(/^思考完成[::]\s*/, "").replace(/^准备输出结果\s*/, "").trim();
318
- if (cleanedText.length > 0) {
319
- debugInfo.push(`method1 success: ${cleanedText.length} chars`);
320
- return JSON.stringify({ text: cleanedText, debug: debugInfo });
307
+ async function getMessageCount(page) {
308
+ return await page.evaluate(() => {
309
+ for (const sel of [
310
+ '[class*="answerBox"]',
311
+ '[class*="assistant"]',
312
+ '[class*="ai-message"]',
313
+ '[class*="response"]'
314
+ ]) {
315
+ const els = document.querySelectorAll(sel);
316
+ if (els.length > 0) return els.length;
317
+ }
318
+ return document.querySelectorAll('[class*="message"]').length;
319
+ });
320
+ }
321
+ async function isGenerating(page) {
322
+ return await page.evaluate(() => {
323
+ for (const sel of ['button[aria-label*="\u505C\u6B62"]', '[class*="stop"]', '[class*="generating"]']) {
324
+ const el = document.querySelector(sel);
325
+ if (el) {
326
+ const s = window.getComputedStyle(el);
327
+ if (s.display !== "none" && s.visibility !== "hidden") return true;
321
328
  }
322
329
  }
323
- const allMdContainers = document.querySelectorAll('[class*="mdRenderContainer"]');
324
- debugInfo.push(`mdRenderContainer count: ${allMdContainers.length}`);
325
- for (const container of allMdContainers) {
326
- const parent = container.parentElement;
327
- const parentClass = parent?.className?.toString() || "";
328
- if (parentClass.includes("toolMessage") || parentClass.includes("process")) continue;
329
- const text = container.innerText?.trim() || "";
330
- if (text.length > 10) {
331
- debugInfo.push(`method2 success: ${text.length} chars`);
332
- return JSON.stringify({ text, debug: debugInfo });
330
+ for (const sel of ['[class*="loading"]', '[class*="typing"]', '[class*="spinner"]']) {
331
+ const el = document.querySelector(sel);
332
+ if (el) {
333
+ const s = window.getComputedStyle(el);
334
+ if (s.display !== "none" && s.visibility !== "hidden") return true;
333
335
  }
334
336
  }
335
- const cardList = document.querySelector('[class*="dialogueCardList"]');
336
- debugInfo.push(`dialogueCardList found: ${!!cardList}`);
337
- if (cardList && cardList.children.length > 0) {
338
- const lastCard = cardList.lastElementChild;
339
- if (lastCard) {
340
- const clone = lastCard.cloneNode(true);
341
- const thinkEls = clone.querySelectorAll('[class*="processItem"], [class*="processContent"], [class*="toolMessage"]');
342
- for (const el of thinkEls) el.remove();
343
- const text = clone.innerText?.trim() || "";
344
- if (text.length > 0) {
345
- debugInfo.push(`method3 success: ${text.length} chars`);
346
- return JSON.stringify({ text, debug: debugInfo });
337
+ return false;
338
+ });
339
+ }
340
+ async function extractLastMessage(page) {
341
+ return await page.evaluate(() => {
342
+ function getFullText(el) {
343
+ if (!el) return "";
344
+ let result = "";
345
+ function walk(node) {
346
+ if (node.nodeType === Node.TEXT_NODE) {
347
+ result += node.textContent || "";
348
+ return;
349
+ }
350
+ if (node.nodeType !== Node.ELEMENT_NODE) return;
351
+ const tag = node.tagName.toLowerCase();
352
+ if (tag === "pre") {
353
+ const codeEl = node.querySelector("code");
354
+ if (codeEl) {
355
+ const cls = codeEl.className || "";
356
+ const lang = (cls.match(/language-(\S+)/) || [])[1] || "";
357
+ result += "\n```" + lang + "\n" + (codeEl.textContent || "") + "\n```\n";
358
+ } else {
359
+ result += "\n```\n" + (node.textContent || "") + "\n```\n";
360
+ }
361
+ return;
362
+ }
363
+ for (const child of node.childNodes) walk(child);
364
+ if (["p", "div", "li", "br", "h1", "h2", "h3", "h4", "h5", "h6"].includes(tag)) {
365
+ result += "\n";
347
366
  }
348
367
  }
368
+ walk(el);
369
+ return result.trim();
349
370
  }
350
- const fullText = document.body.innerText;
351
- const lines = fullText.split("\n").map((l) => l.trim()).filter((l) => l.length > 0);
352
- const uiWords = /* @__PURE__ */ new Set([
353
- "\u6587\u5FC3\u4E00\u8A00",
354
- "\u65B0\u5BF9\u8BDD",
355
- "\u521B\u610F\u5199\u4F5C",
356
- "\u667A\u6167\u7ED8\u56FE",
357
- "\u8D85\u7EA7\u667A\u80FD\u4F53",
358
- "\u66F4\u591A",
359
- "\u6211\u7684\u6536\u85CF",
360
- "\u9879\u76EE",
361
- "\u5BF9\u8BDD",
362
- "\u6682\u65E0\u8BB0\u5F55",
363
- "\u672A\u767B\u5F55",
364
- "\u767B\u5F55",
365
- "\u5185\u5BB9\u7531AI\u751F\u6210\uFF0C\u4EC5\u4F9B\u53C2\u8003\uFF0C\u8BF7\u4ED4\u7EC6\u7504\u522B",
366
- "\u53C2\u8003"
367
- ]);
368
- const thinkingKeywords = [
369
- "\u6DF1\u5EA6\u601D\u8003\u5DF2\u5B8C\u6210",
370
- "\u601D\u8003\u5B8C\u6210",
371
- "\u51C6\u5907\u8F93\u51FA\u7ED3\u679C"
372
- ];
373
- const userQIdx = lines.findIndex((l) => l === userQuestion || l.includes(userQuestion));
374
- debugInfo.push(`userQ idx: ${userQIdx}, lines: ${lines.length}`);
375
- if (userQIdx >= 0) {
376
- const replyLines = [];
377
- for (let i = userQIdx + 1; i < lines.length; i++) {
378
- const line = lines[i];
379
- if (uiWords.has(line)) continue;
380
- if (thinkingKeywords.some((kw) => line.includes(kw))) continue;
381
- if (line.startsWith("\u641C\u7D22") || line.includes("\u7BC7\u8D44\u6599")) continue;
382
- if (line.match(/^参考\d+个网页/)) continue;
383
- if (line.length > 0) replyLines.push(line);
384
- if (line === "\u5FEB\u901F" || line === "\u66F4\u591A" || line.includes("\u5185\u5BB9\u7531AI\u751F\u6210")) break;
385
- }
386
- if (replyLines.length > 0) {
387
- debugInfo.push(`method4 success: ${replyLines.length} lines`);
388
- return JSON.stringify({ text: replyLines.join("\n"), debug: debugInfo });
371
+ for (const sel of SELECTORS.responseContainer) {
372
+ const els = document.querySelectorAll(sel);
373
+ if (els.length > 0) {
374
+ const text = getFullText(els[els.length - 1]);
375
+ if (text.length > 10) return text;
389
376
  }
390
377
  }
391
- debugInfo.push("ALL METHODS FAILED");
392
- return JSON.stringify({ text: "", debug: debugInfo });
393
- }, question);
394
- let parsed;
395
- try {
396
- parsed = JSON.parse(reply);
397
- } catch {
398
- parsed = { text: reply, debug: [] };
399
- }
400
- for (const line of parsed.debug) {
401
- log(verbose, ` [extract] ${line}`);
402
- }
403
- if (parsed.text && parsed.text.length > 0) {
404
- log(verbose, `\u63D0\u53D6\u6210\u529F\uFF0C\u56DE\u590D\u957F\u5EA6: ${parsed.text.length} \u5B57\u7B26`);
405
- return parsed.text;
406
- }
407
- throw new YiyanAgentError("TIMEOUT", "Failed to extract reply content");
378
+ const mdEls = document.querySelectorAll('[class*="markdown"], [class*="prose"]');
379
+ if (mdEls.length > 0) {
380
+ const text = getFullText(mdEls[mdEls.length - 1]);
381
+ if (text.length > 10) return text;
382
+ }
383
+ return "";
384
+ });
385
+ }
386
+ async function extractReply(page, question, verbose = false) {
387
+ log(verbose, "\u63D0\u53D6 AI \u56DE\u590D...");
388
+ const text = await extractLastMessage(page);
389
+ if (text.length > 0) {
390
+ const cleaned = text.replace(/^参考\d+个网页\s*/, "").replace(/^深度思考已完成\s*/, "").replace(/^思考完成[::]\s*/, "").replace(/\n{3,}/g, "\n\n").trim();
391
+ log(verbose, `\u63D0\u53D6\u6210\u529F: ${cleaned.length} \u5B57\u7B26`);
392
+ return cleaned;
393
+ }
394
+ throw new YiyanAgentError("TIMEOUT", "Failed to extract reply");
395
+ }
396
+ async function dumpDebugInfo(page) {
397
+ const info = await page.evaluate(() => {
398
+ const classFreq = {};
399
+ document.querySelectorAll("*").forEach((el) => {
400
+ el.classList.forEach((c) => {
401
+ if (c.match(/message|chat|input|send|stop|markdown|content|assistant|user|bot|answer/i)) {
402
+ classFreq[c] = (classFreq[c] || 0) + 1;
403
+ }
404
+ });
405
+ });
406
+ const inputs = Array.from(document.querySelectorAll("textarea, [contenteditable]")).map((e) => ({
407
+ tag: e.tagName,
408
+ id: e.id || null,
409
+ class: (e.className?.toString() || "").substring(0, 80),
410
+ placeholder: e.placeholder || null,
411
+ editable: e.isContentEditable,
412
+ visible: e.offsetParent !== null
413
+ }));
414
+ return {
415
+ url: window.location.href,
416
+ title: document.title,
417
+ classes: Object.entries(classFreq).sort((a, b) => b[1] - a[1]).slice(0, 40),
418
+ inputs
419
+ };
420
+ });
421
+ console.log("\n" + "\u2550".repeat(60));
422
+ console.log(" DOM DEBUG INFO");
423
+ console.log("\u2550".repeat(60));
424
+ console.log("URL :", info.url);
425
+ console.log("Title :", info.title);
426
+ console.log("\nInput elements:");
427
+ info.inputs.forEach((i) => console.log(" ", JSON.stringify(i)));
428
+ console.log("\nMatching CSS classes (by frequency):");
429
+ info.classes.forEach(([cls, count]) => console.log(` ${String(count).padStart(3)}x .${cls}`));
430
+ console.log("\u2550".repeat(60) + "\n");
408
431
  }
409
432
 
410
433
  // src/agent.ts
@@ -448,7 +471,7 @@ var YiyanAgent = class {
448
471
  } else {
449
472
  throw new YiyanAgentError(
450
473
  "CAPTCHA",
451
- 'Not logged in. Please run "yiyan-agent login" first, or use headed mode.'
474
+ 'Not logged in. Please run "yiyan-browser-agent login" first, or use headed mode.'
452
475
  );
453
476
  }
454
477
  }
@@ -473,14 +496,14 @@ var YiyanAgent = class {
473
496
  });
474
497
  try {
475
498
  await navigateToYiyan(page, this.verbose);
476
- process.stderr.write("\n[yiyan-agent] \u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557\n");
477
- process.stderr.write("[yiyan-agent] \u2551 \u{1F510} LOGIN REQUIRED \u2551\n");
478
- process.stderr.write("[yiyan-agent] \u2551 \u2551\n");
479
- process.stderr.write("[yiyan-agent] \u2551 1. Log in to Yiyan in the browser window \u2551\n");
480
- process.stderr.write("[yiyan-agent] \u2551 2. Return here and press ENTER to continue \u2551\n");
481
- process.stderr.write("[yiyan-agent] \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D\n\n");
499
+ process.stderr.write("\n[yiyan-browser-agent] \u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557\n");
500
+ process.stderr.write("[yiyan-browser-agent] \u2551 \u{1F510} LOGIN REQUIRED \u2551\n");
501
+ process.stderr.write("[yiyan-browser-agent] \u2551 \u2551\n");
502
+ process.stderr.write("[yiyan-browser-agent] \u2551 1. Log in to Yiyan in the browser window \u2551\n");
503
+ process.stderr.write("[yiyan-browser-agent] \u2551 2. Return here and press ENTER to continue \u2551\n");
504
+ process.stderr.write("[yiyan-browser-agent] \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D\n\n");
482
505
  await this.waitForEnter();
483
- process.stderr.write("[yiyan-agent] \u2713 Login complete, saving session...\n");
506
+ process.stderr.write("[yiyan-browser-agent] \u2713 Login complete, saving session...\n");
484
507
  await new Promise((resolve) => setTimeout(resolve, 3e3));
485
508
  } finally {
486
509
  await closeBrowser(context, this.verbose);
@@ -522,24 +545,52 @@ var YiyanAgent = class {
522
545
  sessionPath: this.sessionDir
523
546
  };
524
547
  }
548
+ /**
549
+ * Debug 模式:启动浏览器并输出 DOM 信息
550
+ */
551
+ async debug() {
552
+ const { context, page } = await launchBrowser({
553
+ sessionDir: this.sessionDir,
554
+ headless: false,
555
+ timeout: this.options.timeout,
556
+ verbose: true
557
+ });
558
+ try {
559
+ await navigateToYiyan(page, true);
560
+ await page.waitForTimeout(3e3);
561
+ await dumpDebugInfo(page);
562
+ process.stderr.write("\n[yiyan-browser-agent] \u6D4F\u89C8\u5668\u4FDD\u6301\u6253\u5F00\uFF0C\u53EF\u624B\u52A8\u6D4B\u8BD5\u3002\n");
563
+ process.stderr.write("[yiyan-browser-agent] \u5173\u95ED\u6D4F\u89C8\u5668\u7A97\u53E3\u7ED3\u675F debug \u6A21\u5F0F...\n");
564
+ await new Promise((resolve) => {
565
+ context.on("close", () => resolve());
566
+ });
567
+ } finally {
568
+ try {
569
+ await context.close();
570
+ } catch {
571
+ }
572
+ }
573
+ }
525
574
  };
526
575
 
527
576
  // src/cli.ts
528
577
  function printHelp() {
529
578
  console.log(`
530
- yiyan-agent - \u6587\u5FC3\u4E00\u8A00\u6D4F\u89C8\u5668\u4EE3\u7406 CLI
579
+ yiyan-browser-agent - \u6587\u5FC3\u4E00\u8A00\u6D4F\u89C8\u5668\u4EE3\u7406 CLI
531
580
 
532
581
  \u7528\u6CD5:
533
- yiyan-agent login \u9996\u6B21\u767B\u5F55\u6587\u5FC3\u4E00\u8A00\uFF08\u4F1A\u6253\u5F00\u6D4F\u89C8\u5668\u7A97\u53E3\uFF09
534
- yiyan-agent ask "\u95EE\u9898" [--timeout ms] [--headful] [--verbose]
535
- yiyan-agent status \u68C0\u67E5\u767B\u5F55\u72B6\u6001
536
- yiyan-agent reset \u6E05\u9664\u4FDD\u5B58\u7684 session
582
+ yiyan-browser-agent login \u9996\u6B21\u767B\u5F55\u6587\u5FC3\u4E00\u8A00\uFF08\u4F1A\u6253\u5F00\u6D4F\u89C8\u5668\u7A97\u53E3\uFF09
583
+ yiyan-browser-agent ask "\u95EE\u9898" [--timeout ms] [--headful] [--verbose]
584
+ yiyan-browser-agent status \u68C0\u67E5\u767B\u5F55\u72B6\u6001
585
+ yiyan-browser-agent reset \u6E05\u9664\u4FDD\u5B58\u7684 session
586
+ yiyan-browser-agent debug \u8C03\u8BD5\u6A21\u5F0F\uFF08\u8F93\u51FA DOM \u4FE1\u606F\uFF09
537
587
 
538
588
  \u547D\u4EE4:
539
589
  login \u6253\u5F00\u6D4F\u89C8\u5668\u624B\u52A8\u767B\u5F55\u6587\u5FC3\u4E00\u8A00\uFF08\u9996\u6B21\u4F7F\u7528\u5FC5\u987B\u5148\u767B\u5F55\uFF09
540
590
  ask \u53D1\u9001\u95EE\u9898\u5E76\u83B7\u53D6\u7B54\u6848\uFF08\u9ED8\u8BA4\u65E0\u5934\u6A21\u5F0F\uFF0C\u4E0D\u5F39\u7A97\uFF1B\u767B\u5F55\u540E\u76F4\u63A5\u4F7F\u7528\uFF09
541
591
  status \u68C0\u67E5\u767B\u5F55\u72B6\u6001
542
592
  reset \u6E05\u9664\u4FDD\u5B58\u7684 session
593
+ debug \u8C03\u8BD5\u6A21\u5F0F\uFF1A\u542F\u52A8\u6D4F\u89C8\u5668\u8F93\u51FA DOM \u4FE1\u606F\uFF0C\u7528\u4E8E\u6392\u67E5\u9009\u62E9\u5668\u95EE\u9898
543
594
 
544
595
  \u9009\u9879:
545
596
  --timeout <ms> \u8D85\u65F6\u65F6\u95F4\uFF08\u6BEB\u79D2\uFF09\uFF0C\u9ED8\u8BA4 120000
@@ -548,16 +599,17 @@ yiyan-agent - \u6587\u5FC3\u4E00\u8A00\u6D4F\u89C8\u5668\u4EE3\u7406 CLI
548
599
  --help \u663E\u793A\u5E2E\u52A9\u4FE1\u606F
549
600
 
550
601
  \u793A\u4F8B:
551
- yiyan-agent login # \u9996\u6B21\u4F7F\u7528\uFF1A\u767B\u5F55\u6587\u5FC3\u4E00\u8A00
552
- yiyan-agent ask "\u4EC0\u4E48\u662F TypeScript\uFF1F" # \u63D0\u95EE\uFF08\u9759\u9ED8\u6A21\u5F0F\uFF09
553
- yiyan-agent ask "\u89E3\u91CA Promise" --verbose # \u663E\u793A\u8BE6\u7EC6\u65E5\u5FD7
554
- yiyan-agent ask "30+30=" --headful # \u6709\u5934\u6A21\u5F0F\uFF08\u53EF\u624B\u52A8\u8FC7\u9A8C\u8BC1\u7801\uFF09
555
- yiyan-agent status
556
- yiyan-agent reset
602
+ yiyan-browser-agent login # \u9996\u6B21\u4F7F\u7528\uFF1A\u767B\u5F55\u6587\u5FC3\u4E00\u8A00
603
+ yiyan-browser-agent ask "\u4EC0\u4E48\u662F TypeScript\uFF1F" # \u63D0\u95EE\uFF08\u9759\u9ED8\u6A21\u5F0F\uFF09
604
+ yiyan-browser-agent ask "\u89E3\u91CA Promise" --verbose # \u663E\u793A\u8BE6\u7EC6\u65E5\u5FD7
605
+ yiyan-browser-agent ask "30+30=" --headful # \u6709\u5934\u6A21\u5F0F\uFF08\u53EF\u624B\u52A8\u8FC7\u9A8C\u8BC1\u7801\uFF09
606
+ yiyan-browser-agent debug # \u8C03\u8BD5\u6A21\u5F0F\uFF0C\u8F93\u51FA DOM \u4FE1\u606F
607
+ yiyan-browser-agent status
608
+ yiyan-browser-agent reset
557
609
 
558
610
  \u6D41\u7A0B:
559
- 1. \u5148\u8FD0\u884C yiyan-agent login \u767B\u5F55\uFF08\u53EA\u9700\u4E00\u6B21\uFF0C\u767B\u5F55\u72B6\u6001\u4F1A\u4FDD\u5B58\uFF09
560
- 2. \u4E4B\u540E\u76F4\u63A5 yiyan-agent ask "\u95EE\u9898" \u5373\u53EF\uFF0C\u65E0\u9700\u518D\u767B\u5F55
611
+ 1. \u5148\u8FD0\u884C yiyan-browser-agent login \u767B\u5F55\uFF08\u53EA\u9700\u4E00\u6B21\uFF0C\u767B\u5F55\u72B6\u6001\u4F1A\u4FDD\u5B58\uFF09
612
+ 2. \u4E4B\u540E\u76F4\u63A5 yiyan-browser-agent ask "\u95EE\u9898" \u5373\u53EF\uFF0C\u65E0\u9700\u518D\u767B\u5F55
561
613
  3. \u5982\u679C\u9047\u5230\u9A8C\u8BC1\u7801\uFF0C\u52A0 --headful \u5207\u6362\u6709\u5934\u6A21\u5F0F\u624B\u52A8\u5904\u7406
562
614
 
563
615
  \u63D0\u793A:
@@ -591,6 +643,9 @@ function parseCliArgs(args) {
591
643
  if (arg === "login") {
592
644
  result.command = "login";
593
645
  }
646
+ if (arg === "debug") {
647
+ result.command = "debug";
648
+ }
594
649
  if (arg === "--timeout") {
595
650
  if (i + 1 < args.length) {
596
651
  result.timeout = parseInt(args[i + 1], 10);
@@ -663,6 +718,26 @@ async function runCli(args) {
663
718
  }));
664
719
  return;
665
720
  }
721
+ if (parsed.command === "debug") {
722
+ try {
723
+ await agent.debug();
724
+ console.log(formatCliOutput({
725
+ success: true,
726
+ question: "debug",
727
+ answer: "Debug session completed",
728
+ duration: 0
729
+ }));
730
+ } catch (error) {
731
+ const errorMessage = error instanceof Error ? error.message : String(error);
732
+ console.log(formatCliOutput({
733
+ success: false,
734
+ question: "debug",
735
+ error: errorMessage,
736
+ duration: 0
737
+ }));
738
+ }
739
+ return;
740
+ }
666
741
  if (parsed.command === "ask") {
667
742
  if (!parsed.question) {
668
743
  console.log(formatCliOutput({