yiyan-browser-agent 1.4.5 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.js ADDED
@@ -0,0 +1,272 @@
1
+ // src/parser.js — Parse Yiyan's text responses to extract tool calls
2
+ 'use strict';
3
+
4
+ /**
5
+ * Parse a raw Yiyan response string.
6
+ *
7
+ * Returns one of:
8
+ * { type: 'tool_call', name: string, args: object, raw: string }
9
+ * { type: 'final', content: string, raw: string }
10
+ * { type: 'error', message: string, raw: string }
11
+ */
12
+ function parseResponse(rawText) {
13
+ const text = stripThinkingBlocks(rawText).trim();
14
+
15
+ // ── Strategy 0 (DOM FALLBACK): bare "tool_call\n{ ... }" ─────────────────
16
+ //
17
+ // When the browser markdown renderer converts:
18
+ // ```tool_call
19
+ // { "name": "write_file", "args": {...} }
20
+ // ```
21
+ // …into a <pre><code class="language-tool_call"> element, our getFullText()
22
+ // now reconstructs the fence. BUT if that still fails for any reason, this
23
+ // strategy catches the raw DOM text which looks like:
24
+ //
25
+ // tool_call
26
+ // {
27
+ // "name": "write_file",
28
+ // "args": { ... }
29
+ // }
30
+ //
31
+ const bareMatch = text.match(/^tool_call\s*\n([\s\S]+)$/i);
32
+ if (bareMatch) {
33
+ const jsonRaw = bareMatch[1].trim();
34
+ try {
35
+ const parsed = JSON.parse(jsonRaw);
36
+ const name = parsed.name || parsed.tool || parsed.function;
37
+ const args = parsed.args || parsed.arguments || parsed.parameters || parsed.input || {};
38
+ if (name && typeof name === 'string') {
39
+ return { type: 'tool_call', name, args, raw: rawText };
40
+ }
41
+ } catch {
42
+ const fixed = attemptJsonFix(jsonRaw);
43
+ if (fixed) {
44
+ const name = fixed.name || fixed.tool || fixed.function;
45
+ const args = fixed.args || fixed.arguments || fixed.parameters || fixed.input || {};
46
+ if (name) return { type: 'tool_call', name, args, raw: rawText };
47
+ }
48
+ }
49
+ }
50
+
51
+ // ── Strategy 1 (PRIMARY): ```tool_call fenced code block ─────────────────
52
+ // Our primary format — reconstructed by getFullText() from <pre><code>.
53
+ const fencedMatch = text.match(/```tool_call\s*([\s\S]*?)```/i);
54
+ if (fencedMatch) {
55
+ const raw = fencedMatch[1].trim();
56
+ try {
57
+ const parsed = JSON.parse(raw);
58
+ const name = parsed.name || parsed.tool || parsed.function;
59
+ const args = parsed.args || parsed.arguments || parsed.parameters || parsed.input || {};
60
+ if (name && typeof name === 'string') {
61
+ return { type: 'tool_call', name, args, raw: rawText };
62
+ }
63
+ } catch (e) {
64
+ const fixed = attemptJsonFix(raw);
65
+ if (fixed) {
66
+ const name = fixed.name || fixed.tool || fixed.function;
67
+ const args = fixed.args || fixed.arguments || fixed.parameters || fixed.input || {};
68
+ if (name) return { type: 'tool_call', name, args, raw: rawText };
69
+ }
70
+ return {
71
+ type : 'error',
72
+ message : 'tool_call block had invalid JSON: ' + e.message + '\nContent: ' + raw.slice(0, 300),
73
+ raw : rawText,
74
+ };
75
+ }
76
+ }
77
+
78
+ // ── Strategy 2: ```json block with "name"/"tool" key ──────────────────────
79
+ const jsonFenceMatch = text.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/);
80
+ if (jsonFenceMatch) {
81
+ try {
82
+ const parsed = JSON.parse(jsonFenceMatch[1]);
83
+ const name = parsed.name || parsed.tool || parsed.function;
84
+ const args = parsed.args || parsed.arguments || parsed.parameters || parsed.input || {};
85
+ if (name && typeof name === 'string') {
86
+ return { type: 'tool_call', name, args, raw: rawText };
87
+ }
88
+ } catch {}
89
+ }
90
+
91
+ // ── Strategy 3: XML <tool_call> ───────────────────────────────────────────
92
+ const xmlMatch = text.match(
93
+ /<tool_call[^>]*>\s*(?:<name>([\s\S]*?)<\/name>\s*)?(?:<input>([\s\S]*?)<\/input>|<args>([\s\S]*?)<\/args>)\s*<\/tool_call>/i
94
+ );
95
+ if (xmlMatch) {
96
+ const name = (xmlMatch[1] || '').trim();
97
+ const inputRaw = stripCodeFences((xmlMatch[2] || xmlMatch[3] || '').trim());
98
+ if (name) return tryParseToolCall(name, inputRaw, rawText);
99
+ }
100
+
101
+ // ── Strategy 4: XML with angle-brackets stripped by DOM ───────────────────
102
+ const domStrippedMatch = text.match(
103
+ /tool_call\s+name\s+([\w_]+)\s*\/name\s+input\s*([\s\S]*?)\s*\/input\s*\/tool_call/i
104
+ );
105
+ if (domStrippedMatch) {
106
+ const name = domStrippedMatch[1].trim();
107
+ const inputRaw = stripCodeFences(domStrippedMatch[2].trim());
108
+ return tryParseToolCall(name, inputRaw, rawText);
109
+ }
110
+
111
+ // ── Strategy 5: Any JSON object with "name" key anywhere in text ──────────
112
+ // Uses a greedy match to find the outermost JSON object (not fragments).
113
+ if (/["'](?:name|tool|function)["']\s*:\s*["'][\w_]+["']/.test(text)) {
114
+ const jsonObj = extractLargestJsonObject(text);
115
+ if (jsonObj) {
116
+ const name = jsonObj.name || jsonObj.tool || jsonObj.function;
117
+ const args = jsonObj.args || jsonObj.arguments || jsonObj.parameters || jsonObj.input || {};
118
+ if (name && typeof name === 'string') {
119
+ return { type: 'tool_call', name, args, raw: rawText };
120
+ }
121
+ }
122
+ }
123
+
124
+ // ── Strategy 6: Python-style function call in code block ──────────────────
125
+ const funcMatch = text.match(/```\w*\s*([\w_]+)\(([^)]*)\)\s*```/);
126
+ if (funcMatch) {
127
+ const name = funcMatch[1];
128
+ const argsRaw = funcMatch[2];
129
+ const args = {};
130
+ const argRe = /(\w+)\s*=\s*(?:"([^"]*?)"|'([^']*?)'|(\d+(?:\.\d+)?)|(\btrue\b|\bfalse\b))/g;
131
+ let m;
132
+ while ((m = argRe.exec(argsRaw)) !== null) {
133
+ const key = m[1];
134
+ if (m[2] !== undefined) args[key] = m[2];
135
+ else if (m[3] !== undefined) args[key] = m[3];
136
+ else if (m[4] !== undefined) args[key] = parseFloat(m[4]);
137
+ else if (m[5] !== undefined) args[key] = m[5] === 'true';
138
+ }
139
+ if (Object.keys(args).length > 0) {
140
+ return { type: 'tool_call', name, args, raw: rawText };
141
+ }
142
+ }
143
+
144
+ // ── No tool call detected — final prose response ───────────────────────────
145
+ return { type: 'final', content: text, raw: rawText };
146
+ }
147
+
148
+ // ─────────────────────────────────────────────
149
+ // Helpers
150
+ // ─────────────────────────────────────────────
151
+
152
+ function tryParseToolCall(name, inputRaw, rawText) {
153
+ try {
154
+ const args = JSON.parse(inputRaw);
155
+ return { type: 'tool_call', name, args, raw: rawText };
156
+ } catch (e) {
157
+ // Try to fix common JSON issues
158
+ const fixed = attemptJsonFix(inputRaw);
159
+ if (fixed !== null) {
160
+ return { type: 'tool_call', name, args: fixed, raw: rawText };
161
+ }
162
+ return {
163
+ type : 'error',
164
+ message : `Tool "${name}" returned invalid JSON: ${e.message}\nRaw input: ${inputRaw.slice(0, 200)}`,
165
+ raw : rawText,
166
+ };
167
+ }
168
+ }
169
+
170
+ /** Strip ```json ... ``` or ``` ... ``` fences */
171
+ function stripCodeFences(str) {
172
+ return str
173
+ .replace(/^```(?:json)?\s*/i, '')
174
+ .replace(/\s*```$/, '')
175
+ .trim();
176
+ }
177
+
178
+ /** Remove AI thinking blocks (DeepSeek R1 / Yiyan patterns) */
179
+ function stripThinkingBlocks(text) {
180
+ return text
181
+ .replace(/<think>[\s\S]*?<\/think>\n?/gi, '')
182
+ .replace(/^Thinking\.{0,3}\n[\s\S]*?\n\n/m, '')
183
+ .trim();
184
+ }
185
+
186
+ /** Attempt to fix common LLM JSON mistakes */
187
+ function attemptJsonFix(str) {
188
+ try {
189
+ const fixed = str
190
+ .replace(/,\s*([}\]])/g, '$1')
191
+ .replace(/([{,]\s*)(\w+)\s*:/g, '$1"$2":');
192
+ return JSON.parse(fixed);
193
+ } catch {
194
+ return null;
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Extract the largest valid JSON object from a string.
200
+ * Uses a bracket-counting approach rather than regex to handle nested objects.
201
+ */
202
+ function extractLargestJsonObject(text) {
203
+ let best = null;
204
+ let bestLen = 0;
205
+
206
+ for (let i = 0; i < text.length; i++) {
207
+ if (text[i] !== '{') continue;
208
+ let depth = 0;
209
+ let inStr = false;
210
+ let escape = false;
211
+
212
+ for (let j = i; j < text.length; j++) {
213
+ const ch = text[j];
214
+ if (escape) { escape = false; continue; }
215
+ if (ch === '\\' && inStr) { escape = true; continue; }
216
+ if (ch === '"') { inStr = !inStr; continue; }
217
+ if (inStr) { continue; }
218
+ if (ch === '{') { depth++; }
219
+ else if (ch === '}') {
220
+ depth--;
221
+ if (depth === 0) {
222
+ const candidate = text.slice(i, j + 1);
223
+ if (candidate.length > bestLen) {
224
+ try {
225
+ const parsed = JSON.parse(candidate);
226
+ best = parsed;
227
+ bestLen = candidate.length;
228
+ } catch {
229
+ const fixed = attemptJsonFix(candidate);
230
+ if (fixed && candidate.length > bestLen) {
231
+ best = fixed;
232
+ bestLen = candidate.length;
233
+ }
234
+ }
235
+ }
236
+ break;
237
+ }
238
+ }
239
+ }
240
+ }
241
+
242
+ return best;
243
+ }
244
+
245
+ /** Format a tool result for sending back to the AI */
246
+ function formatToolResult(toolName, result, isError = false) {
247
+ const status = isError ? 'ERROR' : 'SUCCESS';
248
+ return [
249
+ `[TOOL RESULT: ${toolName} | ${status}]`,
250
+ String(result),
251
+ `[END TOOL RESULT]`,
252
+ ].join('\n');
253
+ }
254
+
255
+ /** Check if a response looks like the agent is asking a clarifying question */
256
+ function isAskingQuestion(text) {
257
+ const questionIndicators = [
258
+ /\?(\s*$)/m,
259
+ /could you (please |kindly )?clarify/i,
260
+ /can you provide more/i,
261
+ /what (do you|would you) (want|like|prefer)/i,
262
+ /please (specify|clarify|tell me)/i,
263
+ ];
264
+ return questionIndicators.some(re => re.test(text));
265
+ }
266
+
267
+ module.exports = {
268
+ parseResponse,
269
+ formatToolResult,
270
+ stripThinkingBlocks,
271
+ isAskingQuestion,
272
+ };
@@ -0,0 +1,26 @@
1
+ // src/postinstall.js — Auto-install Playwright Chromium
2
+ 'use strict';
3
+
4
+ const { execSync } = require('child_process');
5
+ const path = require('path');
6
+ const os = require('os');
7
+
8
+ // Skip in CI
9
+ if (process.env.CI || process.env.SKIP_PLAYWRIGHT_INSTALL) {
10
+ console.log('[yiyan-agent] Skipping browser install (CI detected)');
11
+ process.exit(0);
12
+ }
13
+
14
+ console.log('\n[yiyan-agent] Installing Playwright Chromium...\n');
15
+
16
+ try {
17
+ const isWindows = process.platform === 'win32';
18
+ const playwrightBin = path.join(__dirname, '..', 'node_modules', '.bin',
19
+ isWindows ? 'playwright.cmd' : 'playwright');
20
+
21
+ execSync(`"${playwrightBin}" install chromium`, { stdio: 'inherit' });
22
+ console.log('\n[yiyan-agent] ✓ Browser installed!\n');
23
+ } catch {
24
+ console.warn('\n[yiyan-agent] ⚠ Could not auto-install. Run manually:');
25
+ console.warn(' npx playwright install chromium\n');
26
+ }
package/src/prompt.js ADDED
@@ -0,0 +1,188 @@
1
+ // src/prompt.js — System prompt and conversation builder
2
+ 'use strict';
3
+
4
+ const os = require('os');
5
+ const path = require('path');
6
+ const { getToolDescriptions } = require('./tools');
7
+ const config = require('./config');
8
+
9
+ // ─────────────────────────────────────────────
10
+ // System prompt — sent as the first message
11
+ // ─────────────────────────────────────────────
12
+
13
+ function buildSystemPrompt() {
14
+ const toolDocs = getToolDescriptions();
15
+ const cwd = config.WORKING_DIR;
16
+ const platform = os.platform() + ' ' + os.release();
17
+ const nodeVer = process.version;
18
+ const now = new Date().toISOString();
19
+
20
+ // NOTE: We intentionally avoid a single template literal for the full prompt
21
+ // because the tool-call example contains triple backticks which would
22
+ // terminate the template literal early and cause a SyntaxError.
23
+ // We build the string with an array join + a FENCE variable instead.
24
+
25
+ const FENCE = '```';
26
+
27
+ const lines = [
28
+ 'You are Yiyan Agent (文心一言代理) — an expert AI software engineer and coding assistant',
29
+ 'running inside a terminal-based agent framework. You have direct access to the',
30
+ "user's filesystem and can execute shell commands.",
31
+ '',
32
+ 'ENVIRONMENT',
33
+ '───────────',
34
+ 'Platform : ' + platform,
35
+ 'Node.js : ' + nodeVer,
36
+ 'Date/Time : ' + now,
37
+ 'Working Directory: ' + cwd,
38
+ '',
39
+ 'YOUR CAPABILITIES',
40
+ '─────────────────',
41
+ 'You can read/write files, run shell commands, search codebases, fetch URLs,',
42
+ 'and scaffold entire projects. You operate in an autonomous loop: you call a',
43
+ 'tool, receive its result, and continue until the task is fully complete.',
44
+ '',
45
+ 'HOW TO CALL TOOLS',
46
+ '─────────────────',
47
+ 'When you need to use a tool, your ENTIRE response must be ONLY a fenced code',
48
+ 'block tagged "tool_call" — with NO text before or after it:',
49
+ '',
50
+ FENCE + 'tool_call',
51
+ '{',
52
+ ' "name": "TOOL_NAME_HERE",',
53
+ ' "args": {',
54
+ ' "param1": "value1",',
55
+ ' "param2": "value2"',
56
+ ' }',
57
+ '}',
58
+ FENCE,
59
+ '',
60
+ 'CRITICAL RULES:',
61
+ '- Output ONLY the tool_call block — no prose, no greeting, nothing else.',
62
+ '- ONE tool call per response. Never multiple.',
63
+ '- Content must be valid JSON with exactly "name" and "args" keys.',
64
+ '- After receiving a tool result, call another tool OR give your final response.',
65
+ '- Only write plain prose (no code block) when the task is 100% complete.',
66
+ '',
67
+ 'WHEN TO STOP',
68
+ '────────────',
69
+ 'When fully done, respond with a clear natural language summary.',
70
+ 'Do NOT wrap it in any tags or code blocks. Just plain text.',
71
+ '',
72
+ 'CODING GUIDELINES',
73
+ '─────────────────',
74
+ '- Always read existing files before modifying them.',
75
+ '- Always check the directory structure before creating new files.',
76
+ '- Write complete, production-quality code — no TODOs, no placeholders.',
77
+ '- Include proper error handling in all code you write.',
78
+ '- After writing code, run it (if applicable) to verify it works.',
79
+ '- Prefer small focused files over large monolithic ones.',
80
+ '- When installing packages, check package.json first.',
81
+ '',
82
+ 'MULTI-STEP APPROACH',
83
+ '───────────────────',
84
+ 'For complex tasks, break them into steps:',
85
+ '1. Explore the codebase / understand context',
86
+ '2. Plan what changes need to be made',
87
+ '3. Make changes systematically, one file at a time',
88
+ '4. Test / verify the result',
89
+ '',
90
+ 'AVAILABLE TOOLS',
91
+ '───────────────',
92
+ toolDocs,
93
+ '',
94
+ 'Remember: You are running autonomously. Be thorough, be precise, and complete',
95
+ 'the task fully. If something is ambiguous, make a sensible decision and note',
96
+ 'it in your final response.',
97
+ ];
98
+
99
+ return lines.join('\n');
100
+ }
101
+
102
+ // ─────────────────────────────────────────────
103
+ // Conversation / message history manager
104
+ // ─────────────────────────────────────────────
105
+
106
+ class ConversationManager {
107
+ constructor() {
108
+ this.messages = [];
109
+ this._systemPrompt = null;
110
+ }
111
+
112
+ /**
113
+ * Build the very first user message that includes the system prompt,
114
+ * working-directory context, and the user's task.
115
+ */
116
+ buildFirstMessage(task, workingDirListing) {
117
+ this._systemPrompt = buildSystemPrompt();
118
+
119
+ const dirContext = workingDirListing
120
+ ? '\nCURRENT WORKING DIRECTORY CONTENTS:\n' + workingDirListing + '\n'
121
+ : '';
122
+
123
+ const firstMessage = [
124
+ this._systemPrompt,
125
+ '',
126
+ '═'.repeat(60),
127
+ '',
128
+ dirContext,
129
+ 'USER TASK:',
130
+ '──────────',
131
+ task,
132
+ ].join('\n');
133
+
134
+ this.messages.push({ role: 'user', content: firstMessage });
135
+ return firstMessage;
136
+ }
137
+
138
+ /**
139
+ * Add a tool result as a user-turn message (feeding results back to the AI).
140
+ */
141
+ addToolResult(toolName, result, isError) {
142
+ const status = isError ? 'ERROR' : 'SUCCESS';
143
+ const content = [
144
+ '[TOOL RESULT: ' + toolName + ' | ' + status + ']',
145
+ String(result),
146
+ '[END TOOL RESULT]',
147
+ '',
148
+ 'Continue with the next step, or provide your final response if the task is complete.',
149
+ ].join('\n');
150
+
151
+ this.messages.push({ role: 'user', content: content });
152
+ return content;
153
+ }
154
+
155
+ /**
156
+ * Add an assistant message (the AI's raw response).
157
+ */
158
+ addAssistantMessage(content) {
159
+ this.messages.push({ role: 'assistant', content: content });
160
+ }
161
+
162
+ /**
163
+ * Get the most recent user message content.
164
+ */
165
+ getLatestUserMessage() {
166
+ const userMessages = this.messages.filter(function(m) { return m.role === 'user'; });
167
+ return userMessages.length > 0 ? userMessages[userMessages.length - 1].content : '';
168
+ }
169
+
170
+ /**
171
+ * How many assistant turns have happened.
172
+ */
173
+ get turnCount() {
174
+ return this.messages.filter(function(m) { return m.role === 'assistant'; }).length;
175
+ }
176
+
177
+ /**
178
+ * Export the full conversation as a readable text log.
179
+ */
180
+ exportLog() {
181
+ return this.messages.map(function(m) {
182
+ const header = m.role === 'user' ? 'USER' : 'ASSISTANT';
183
+ return '\n' + '─'.repeat(40) + '\n' + header + '\n' + '─'.repeat(40) + '\n' + m.content;
184
+ }).join('\n');
185
+ }
186
+ }
187
+
188
+ module.exports = { buildSystemPrompt, ConversationManager };