yiyan-browser-agent 1.11.0 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -54
- package/package.json +1 -1
- package/src/browser.js +52 -40
package/README.md
CHANGED
|
@@ -82,55 +82,38 @@ npx playwright install-deps chromium # 安装系统依赖
|
|
|
82
82
|
|
|
83
83
|
## 🚀 Quick Start
|
|
84
84
|
|
|
85
|
-
###
|
|
85
|
+
### 第一步:登录文心一言(首次使用)
|
|
86
86
|
|
|
87
|
-
**
|
|
87
|
+
**Windows:**
|
|
88
88
|
```powershell
|
|
89
89
|
yiyan-agent -i
|
|
90
90
|
```
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
**2. 给任务:**
|
|
94
|
-
```powershell
|
|
95
|
-
yiyan-agent "创建一个 Express REST API,带用户认证"
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
**3. 发送任务到已运行的服务器:**
|
|
99
|
-
```powershell
|
|
100
|
-
# 终端 1: 启动交互模式 (作为 HTTP 服务器)
|
|
91
|
+
**Linux / macOS:**
|
|
92
|
+
```bash
|
|
101
93
|
yiyan-agent -i
|
|
102
|
-
|
|
103
|
-
# 终端 2: 发送任务 (转发到服务器,不启动新浏览器)
|
|
104
|
-
yiyan-agent "上海天气,20个字"
|
|
105
94
|
```
|
|
95
|
+
浏览器窗口打开后,登录你的百度账号,然后回到终端按 Enter。会话会保存 — 只需登录一次。
|
|
106
96
|
|
|
107
|
-
###
|
|
97
|
+
### 第二步:通过 HTTP API 发送任务
|
|
108
98
|
|
|
109
|
-
|
|
110
|
-
```bash
|
|
111
|
-
yiyan-agent -i
|
|
112
|
-
```
|
|
99
|
+
登录完成后,交互模式会自动启动 HTTP 服务(端口 9527),通过 API 发送任务:
|
|
113
100
|
|
|
114
|
-
**2. 给任务:**
|
|
115
101
|
```bash
|
|
116
|
-
|
|
102
|
+
curl -X POST http://localhost:9527/task \
|
|
103
|
+
-H "Content-Type: application/json" \
|
|
104
|
+
-d '{"task": "创建一个 Express REST API,带用户认证"}'
|
|
117
105
|
```
|
|
118
106
|
|
|
119
|
-
|
|
120
|
-
```bash
|
|
121
|
-
cd ~/my-project
|
|
122
|
-
ya "add input validation to all my API routes"
|
|
123
|
-
```
|
|
107
|
+
> 💡 更多用法见 [HTTP API](#-http-api) 章节。
|
|
124
108
|
|
|
125
109
|
---
|
|
126
110
|
|
|
127
111
|
## 💻 Usage
|
|
128
112
|
|
|
129
113
|
```
|
|
130
|
-
yiyan-agent [OPTIONS]
|
|
114
|
+
yiyan-agent [OPTIONS]
|
|
131
115
|
|
|
132
|
-
-
|
|
133
|
-
-i, --interactive Keep browser open, run multiple tasks (starts HTTP server)
|
|
116
|
+
-i, --interactive Start interactive mode with HTTP server on port 9527
|
|
134
117
|
-d, --dir <path> Set working directory (default: current directory)
|
|
135
118
|
--debug Print raw AI responses to the terminal
|
|
136
119
|
--show-browser Show browser window (non-interactive mode)
|
|
@@ -141,22 +124,22 @@ Aliases:
|
|
|
141
124
|
ya Short form of yiyan-agent
|
|
142
125
|
```
|
|
143
126
|
|
|
144
|
-
###
|
|
127
|
+
### 典型工作流
|
|
145
128
|
|
|
146
129
|
```bash
|
|
147
|
-
#
|
|
148
|
-
yiyan-agent "create a Python script that scrapes Hacker News"
|
|
149
|
-
|
|
150
|
-
# Interactive mode — keeps browser open, starts HTTP server on port 9527
|
|
130
|
+
# 1. 启动交互模式(登录 + 启动 HTTP 服务)
|
|
151
131
|
yiyan-agent -i
|
|
132
|
+
# → Server listening on port 9527
|
|
152
133
|
|
|
153
|
-
#
|
|
154
|
-
|
|
134
|
+
# 2. 通过 HTTP API 发送任务(另开终端)
|
|
135
|
+
curl -X POST http://localhost:9527/task \
|
|
136
|
+
-H "Content-Type: application/json" \
|
|
137
|
+
-d '{"task": "创建一个 Python 爬虫"}'
|
|
155
138
|
|
|
156
|
-
#
|
|
157
|
-
|
|
139
|
+
# 3. 查看任务状态
|
|
140
|
+
curl http://localhost:9527/status
|
|
158
141
|
|
|
159
|
-
#
|
|
142
|
+
# 在交互模式中,输入 quit 或 q 退出:
|
|
160
143
|
❯ quit
|
|
161
144
|
```
|
|
162
145
|
|
|
@@ -193,9 +176,10 @@ When interactive mode (`-i`) is running, an HTTP server starts on port **9527**,
|
|
|
193
176
|
yiyan-agent -i
|
|
194
177
|
# → Server listening on port 9527
|
|
195
178
|
|
|
196
|
-
# Terminal 2: Send task
|
|
197
|
-
|
|
198
|
-
|
|
179
|
+
# Terminal 2: Send task via HTTP API
|
|
180
|
+
curl -X POST http://localhost:9527/task \
|
|
181
|
+
-H "Content-Type: application/json" \
|
|
182
|
+
-d '{"task": "北京天气,15个字"}'
|
|
199
183
|
```
|
|
200
184
|
|
|
201
185
|
### HTTP POST API
|
|
@@ -220,8 +204,8 @@ yiyan-agent "北京天气,15个字"
|
|
|
220
204
|
**Response:**
|
|
221
205
|
```json
|
|
222
206
|
{
|
|
223
|
-
"question": "
|
|
224
|
-
"answer": "
|
|
207
|
+
"question": "创建一个 Express REST API",
|
|
208
|
+
"answer": "好的,我来创建...",
|
|
225
209
|
"duration": 5234,
|
|
226
210
|
"status": "success"
|
|
227
211
|
}
|
|
@@ -323,19 +307,21 @@ curl http://localhost:9527/task/abc123
|
|
|
323
307
|
### Windows CMD (curl)
|
|
324
308
|
|
|
325
309
|
```cmd
|
|
326
|
-
curl -X POST http://localhost:9527/task -H "Content-Type: application/json" -d "{\"task\":\"
|
|
310
|
+
curl -X POST http://localhost:9527/task -H "Content-Type: application/json" -d "{\"task\":\"创建一个 Express REST API\"}"
|
|
327
311
|
```
|
|
328
312
|
|
|
329
313
|
### PowerShell
|
|
330
314
|
|
|
331
315
|
```powershell
|
|
332
|
-
Invoke-RestMethod -Uri "http://localhost:9527/task" -Method POST -Body '{"task":"
|
|
316
|
+
Invoke-RestMethod -Uri "http://localhost:9527/task" -Method POST -Body '{"task":"创建一个 Express REST API"}' -ContentType "application/json"
|
|
333
317
|
```
|
|
334
318
|
|
|
335
319
|
### Ubuntu / Linux (curl)
|
|
336
320
|
|
|
337
321
|
```bash
|
|
338
|
-
curl -X POST http://localhost:9527/task
|
|
322
|
+
curl -X POST http://localhost:9527/task \
|
|
323
|
+
-H "Content-Type: application/json" \
|
|
324
|
+
-d '{"task": "创建一个 Express REST API"}'
|
|
339
325
|
```
|
|
340
326
|
|
|
341
327
|
### From Other Programming Languages
|
|
@@ -343,11 +329,10 @@ curl -X POST http://localhost:9527/task -H "Content-Type: application/json" -d '
|
|
|
343
329
|
**Python:**
|
|
344
330
|
```python
|
|
345
331
|
import requests
|
|
346
|
-
import json
|
|
347
332
|
|
|
348
333
|
response = requests.post(
|
|
349
334
|
'http://localhost:9527/task',
|
|
350
|
-
json={'task': '
|
|
335
|
+
json={'task': '创建一个 Express REST API'}
|
|
351
336
|
)
|
|
352
337
|
result = response.json()
|
|
353
338
|
print(result)
|
|
@@ -357,7 +342,7 @@ print(result)
|
|
|
357
342
|
```javascript
|
|
358
343
|
const http = require('http');
|
|
359
344
|
|
|
360
|
-
const body = JSON.stringify({ task: '
|
|
345
|
+
const body = JSON.stringify({ task: '创建一个 Express REST API' });
|
|
361
346
|
|
|
362
347
|
const req = http.request({
|
|
363
348
|
hostname: 'localhost',
|
|
@@ -495,13 +480,14 @@ Everything lives in `~/.yiyan-agent/` in your home directory:
|
|
|
495
480
|
### Agent responds but creates no files
|
|
496
481
|
The browser DOM rendered the AI's response in a way the parser didn't catch. Run with `--debug` to see exactly what's being received:
|
|
497
482
|
```bash
|
|
498
|
-
yiyan-agent --debug
|
|
483
|
+
yiyan-agent --debug -i
|
|
484
|
+
# Then send a task via HTTP API and observe the raw output
|
|
499
485
|
```
|
|
500
486
|
|
|
501
487
|
### Agent stops responding / loops
|
|
502
488
|
Yiyan's UI may have changed. Run the calibration tool — it inspects the live DOM and prints updated selectors:
|
|
503
489
|
```bash
|
|
504
|
-
|
|
490
|
+
node src/calibrate.js
|
|
505
491
|
```
|
|
506
492
|
|
|
507
493
|
### Login session expired
|
|
@@ -605,7 +591,7 @@ Open an issue on GitHub with:
|
|
|
605
591
|
- What you ran
|
|
606
592
|
- What you expected
|
|
607
593
|
- What actually happened
|
|
608
|
-
-
|
|
594
|
+
- Debug output from `yiyan-agent --debug -i` if relevant
|
|
609
595
|
|
|
610
596
|
---
|
|
611
597
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "yiyan-browser-agent",
|
|
3
|
-
"version": "1.11.
|
|
3
|
+
"version": "1.11.2",
|
|
4
4
|
"description": "AI coding agent powered by Yiyan (文心一言) via browser automation (chat.baidu.com) — no API key needed. Performance-optimized. Enhanced with comprehensive security.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/browser.js
CHANGED
|
@@ -320,6 +320,7 @@ class YiyanBrowser {
|
|
|
320
320
|
let lastThinkingText = '';
|
|
321
321
|
let stableCount = 0;
|
|
322
322
|
let lastStableTime = Date.now();
|
|
323
|
+
let textChangedAt = Date.now(); // 文本最后变化时间
|
|
323
324
|
let dotCount = 0;
|
|
324
325
|
let hasCompletionMarker = false;
|
|
325
326
|
|
|
@@ -373,6 +374,7 @@ class YiyanBrowser {
|
|
|
373
374
|
lastText = text;
|
|
374
375
|
stableCount = 0;
|
|
375
376
|
lastStableTime = Date.now();
|
|
377
|
+
textChangedAt = Date.now();
|
|
376
378
|
}
|
|
377
379
|
|
|
378
380
|
// ── 完成标记检测 (chat.baidu.com) ──
|
|
@@ -404,10 +406,12 @@ class YiyanBrowser {
|
|
|
404
406
|
}
|
|
405
407
|
|
|
406
408
|
// ── 完成判断 ──
|
|
407
|
-
// 条件1:
|
|
408
|
-
// 条件2
|
|
409
|
+
// 条件1: 完成标记出现 + 稳定 2 次
|
|
410
|
+
// 条件2: 稳定 3 次(即使没有完成标记)
|
|
411
|
+
// 条件3(兜底): 文本长时间稳定不变(≥5秒),强制认为完成
|
|
409
412
|
const condition1 = hasCompletionMarker && stableCount >= 2;
|
|
410
413
|
const condition2 = stableCount >= 3;
|
|
414
|
+
const longStable = (Date.now() - textChangedAt) >= 5000 && text.length > 0;
|
|
411
415
|
|
|
412
416
|
if (condition1 || condition2) {
|
|
413
417
|
// 检查 _isGenerating 作为最终确认
|
|
@@ -420,6 +424,13 @@ class YiyanBrowser {
|
|
|
420
424
|
// _isGenerating 说还在生成,重置计数继续等待
|
|
421
425
|
stableCount = 0;
|
|
422
426
|
lastStableTime = Date.now();
|
|
427
|
+
} else if (longStable) {
|
|
428
|
+
// 兜底:文本已经 5 秒没变化了,强制完成
|
|
429
|
+
logger.dim('Long stable fallback — text unchanged for 5s, forcing completion');
|
|
430
|
+
await this.page.waitForTimeout(300);
|
|
431
|
+
logger.clearLine();
|
|
432
|
+
logger.success('Response complete (stable timeout)');
|
|
433
|
+
break;
|
|
423
434
|
}
|
|
424
435
|
|
|
425
436
|
// Progress indicator
|
|
@@ -560,33 +571,13 @@ class YiyanBrowser {
|
|
|
560
571
|
|
|
561
572
|
async _isGenerating() {
|
|
562
573
|
return await this.page.evaluate(() => {
|
|
563
|
-
// ── 1.
|
|
564
|
-
const typingSelectors = [
|
|
565
|
-
'.cosd-markdown-content-typingall',
|
|
566
|
-
'.markdown-typing-all',
|
|
567
|
-
'[class*="typing"]',
|
|
568
|
-
'[class*="generating"]',
|
|
569
|
-
'[class*="loading-indicator"]',
|
|
570
|
-
'svg[class*="loading"]',
|
|
571
|
-
'svg[class*="spinner"]',
|
|
572
|
-
'[class*="blink"]',
|
|
573
|
-
'[class*="cursor-blink"]',
|
|
574
|
-
'[class*="pulsing"]',
|
|
575
|
-
];
|
|
576
|
-
for (const sel of typingSelectors) {
|
|
577
|
-
const el = document.querySelector(sel);
|
|
578
|
-
if (el) {
|
|
579
|
-
const s = window.getComputedStyle(el);
|
|
580
|
-
if (s.display !== 'none' && s.visibility !== 'hidden') return true;
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
// ── 2. 检测停止按钮 ──
|
|
574
|
+
// ── 1. 检测停止按钮(最可靠的生成中信号)──
|
|
585
575
|
const stopSelectors = [
|
|
586
576
|
'button[aria-label*="Stop" i]',
|
|
587
577
|
'button[aria-label*="停止"]',
|
|
588
578
|
'[class*="stop-gen"]',
|
|
589
579
|
'[class*="stopGen"]',
|
|
580
|
+
'[class*="stop-btn"]',
|
|
590
581
|
];
|
|
591
582
|
for (const sel of stopSelectors) {
|
|
592
583
|
const el = document.querySelector(sel);
|
|
@@ -596,35 +587,56 @@ class YiyanBrowser {
|
|
|
596
587
|
}
|
|
597
588
|
}
|
|
598
589
|
|
|
599
|
-
// ──
|
|
590
|
+
// ── 2. 检测活跃的 CSS 动画(真正的 loading spinner,不是永久类名)──
|
|
591
|
+
// 只检测 svg 动画元素和明确的 loading 指示器
|
|
592
|
+
const activeAnimSelectors = [
|
|
593
|
+
'svg[class*="loading"]',
|
|
594
|
+
'svg[class*="spinner"]',
|
|
595
|
+
'[class*="loading-indicator"]',
|
|
596
|
+
'[class*="generating-indicator"]',
|
|
597
|
+
];
|
|
598
|
+
for (const sel of activeAnimSelectors) {
|
|
599
|
+
const el = document.querySelector(sel);
|
|
600
|
+
if (el) {
|
|
601
|
+
const s = window.getComputedStyle(el);
|
|
602
|
+
if (s.display !== 'none' && s.visibility !== 'hidden') return true;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// ── 3. 如果有完成标记 → 肯定已完成 ──
|
|
600
607
|
const completionMarkers = [
|
|
601
608
|
'.cos-icon.cos-icon-copy',
|
|
602
609
|
'.cos-icon-copy',
|
|
610
|
+
'.feedback-hover-show',
|
|
603
611
|
'.cos-icon.cos-icon-share1',
|
|
604
|
-
'.cos-icon-feedback',
|
|
612
|
+
'.cos-icon.cos-icon-feedback',
|
|
605
613
|
'[class*="copy-btn"]',
|
|
606
|
-
'[class*="copyBtn"]',
|
|
607
614
|
'[aria-label*="Copy" i]',
|
|
608
615
|
'[aria-label*="复制"]',
|
|
609
|
-
'[class*="regenerate"]',
|
|
610
|
-
'[class*="retry"]',
|
|
611
|
-
'[class*="action-btn"]',
|
|
612
|
-
'.feedback-hover-show',
|
|
613
616
|
];
|
|
614
|
-
let hasCompletionMarker = false;
|
|
615
617
|
for (const sel of completionMarkers) {
|
|
616
|
-
if (document.querySelector(sel))
|
|
617
|
-
hasCompletionMarker = true;
|
|
618
|
-
break;
|
|
619
|
-
}
|
|
618
|
+
if (document.querySelector(sel)) return false;
|
|
620
619
|
}
|
|
621
620
|
|
|
622
|
-
//
|
|
621
|
+
// ── 4. 检查是否有光标闪烁(生成中的光标)──
|
|
622
|
+
// 用 getComputedStyle 检测 animation,而不是类名
|
|
623
623
|
const answerArea = document.querySelector(
|
|
624
|
-
'.ai-entry-block.ai-markdown, .answer-container, .cs-answer-container
|
|
624
|
+
'.ai-entry-block.ai-markdown, .answer-container, .cs-answer-container'
|
|
625
625
|
);
|
|
626
|
-
if (answerArea
|
|
627
|
-
|
|
626
|
+
if (answerArea) {
|
|
627
|
+
// 查找最后子元素是否有动画光标
|
|
628
|
+
const lastChild = answerArea.lastElementChild;
|
|
629
|
+
if (lastChild) {
|
|
630
|
+
const s = window.getComputedStyle(lastChild);
|
|
631
|
+
// 如果最后一个元素有活跃的动画 → 还在生成
|
|
632
|
+
if (s.animationName && s.animationName !== 'none' &&
|
|
633
|
+
s.display !== 'none' && s.visibility !== 'hidden') {
|
|
634
|
+
return true;
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// 有回答内容但没有完成标记 → 可能还在生成
|
|
639
|
+
if (answerArea.innerText && answerArea.innerText.length > 5) {
|
|
628
640
|
return true;
|
|
629
641
|
}
|
|
630
642
|
}
|