jerob 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI/cli.ts +42 -0
- package/README.md +137 -0
- package/SETUP.md +584 -0
- package/agent/action-tracker.ts +45 -0
- package/agent/agent-tools.ts +111 -0
- package/agent/approval.ts +137 -0
- package/agent/diff-view.ts +26 -0
- package/agent/orchestrator.ts +186 -0
- package/agent/tool-executor.ts +463 -0
- package/agent/types.ts +69 -0
- package/ask/orchestrator.ts +244 -0
- package/auth/auth.ts +567 -0
- package/auth/config-store.ts +77 -0
- package/auth/crypto.ts +51 -0
- package/auth/env-writer.ts +82 -0
- package/bin/jerob.js +28 -0
- package/config/ai.config.ts +163 -0
- package/email_ops/email-tools.ts +178 -0
- package/email_ops/email_functions.ts +443 -0
- package/email_ops/email_init.ts +92 -0
- package/email_ops/email_pass_store.ts +61 -0
- package/email_ops/email_server.ts +29 -0
- package/email_ops/types.ts +88 -0
- package/index.ts +176 -0
- package/package.json +88 -0
- package/plan/browser-agent/README.md +118 -0
- package/plan/browser-agent/USAGE.md +308 -0
- package/plan/browser-agent/evaluator.ts +353 -0
- package/plan/browser-agent/executor.ts +372 -0
- package/plan/browser-agent/index.ts +13 -0
- package/plan/browser-agent/orchestrator.ts +323 -0
- package/plan/browser-agent/planner.ts +200 -0
- package/plan/browser-agent/types.ts +62 -0
- package/plan/browser-tool.ts +128 -0
- package/plan/index.ts +12 -0
- package/plan/orchestrator.ts +214 -0
- package/plan/planner.ts +183 -0
- package/plan/selection.ts +50 -0
- package/plan/types.ts +13 -0
- package/plan/web-tools.ts +119 -0
- package/scheduler/ARCHITECTURE.md +263 -0
- package/scheduler/README.md +200 -0
- package/scheduler/SETUP-READY.sql +84 -0
- package/scheduler/check-status.sql +124 -0
- package/scheduler/config-sync.ts +91 -0
- package/scheduler/db-migrate.ts +271 -0
- package/scheduler/db.ts +162 -0
- package/scheduler/debug.ts +184 -0
- package/scheduler/orchestrator.ts +438 -0
- package/scheduler/planner.ts +170 -0
- package/scheduler/update-task-email.ts +70 -0
- package/supabase/.temp/cli-latest +1 -0
- package/supabase/.temp/gotrue-version +1 -0
- package/supabase/.temp/linked-project.json +1 -0
- package/supabase/.temp/pooler-url +1 -0
- package/supabase/.temp/postgres-version +1 -0
- package/supabase/.temp/project-ref +1 -0
- package/supabase/.temp/rest-version +1 -0
- package/supabase/.temp/storage-migration +1 -0
- package/supabase/.temp/storage-version +1 -0
- package/supabase/deploy.ps1 +50 -0
- package/supabase/functions/scheduler-tick/index.ts +496 -0
- package/supabase/supabase/.temp/linked-project.json +1 -0
- package/tsconfig.json +33 -0
- package/tui/spinner.ts +33 -0
- package/tui/spinup.ts +67 -0
- package/tui/terminal-render.ts +16 -0
- package/utils/llm-error.ts +185 -0
- package/utils/model-validator.ts +247 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# Browser Agent System - Quick Start Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The Browser Agent is an intelligent automation system that:
|
|
6
|
+
- 🎯 Understands your goal
|
|
7
|
+
- 📋 Plans automation steps
|
|
8
|
+
- 🤖 Executes browser actions
|
|
9
|
+
- ✅ Evaluates results
|
|
10
|
+
- 🔄 Refines automatically (up to 5 iterations)
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
### 1. Launch Browser Agent
|
|
15
|
+
```bash
|
|
16
|
+
jimmy jet
|
|
17
|
+
# Select "Browser Agent" from menu
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### 2. Enter Your Query
|
|
21
|
+
```
|
|
22
|
+
Enter query: Find the top 5 AI jobs on LinkedIn and extract their descriptions
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 3. Watch the Magic
|
|
26
|
+
The system will:
|
|
27
|
+
1. **Iteration 1**: Plan → Execute → Evaluate → Score
|
|
28
|
+
2. **Iteration 2** (if needed): Refine plan → Execute → Evaluate → Score
|
|
29
|
+
3. **Iteration 3+** (if needed): Continue refining...
|
|
30
|
+
|
|
31
|
+
### 4. Review Results
|
|
32
|
+
- Final score and completion status
|
|
33
|
+
- Extracted data (if any)
|
|
34
|
+
- Iteration logs
|
|
35
|
+
- Option to save to JSON file
|
|
36
|
+
|
|
37
|
+
## Example Queries
|
|
38
|
+
|
|
39
|
+
### Data Extraction
|
|
40
|
+
```
|
|
41
|
+
"Extract all product names and prices from Amazon search results for laptops"
|
|
42
|
+
"Get emails and phone numbers from the company website"
|
|
43
|
+
"List all job titles and salary ranges from the careers page"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Navigation & Interaction
|
|
47
|
+
```
|
|
48
|
+
"Sign up for newsletter with email: test@example.com"
|
|
49
|
+
"Search for flights from NYC to LA and sort by price"
|
|
50
|
+
"Add first 5 items to shopping cart and proceed to checkout"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Information Gathering
|
|
54
|
+
```
|
|
55
|
+
"Find the latest 10 blog posts and extract their titles and dates"
|
|
56
|
+
"Get contact information for all team members on the about page"
|
|
57
|
+
"List all upcoming webinars with dates and registration links"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Complex Tasks
|
|
61
|
+
```
|
|
62
|
+
"Compare prices of iPhone 15 across 3 different retailers"
|
|
63
|
+
"Find user reviews for product XYZ and extract ratings"
|
|
64
|
+
"Automate login and export order history"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Understanding the Iteration Process
|
|
68
|
+
|
|
69
|
+
### Iteration Feedback Loop
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
Query: "Find top 3 tech jobs with full descriptions"
|
|
73
|
+
|
|
74
|
+
┌─ Iteration 1 ─────────────────────────────────────┐
|
|
75
|
+
│ Plan: Navigate → Search → Observe │
|
|
76
|
+
│ Score: 60/100 ❌ (Incomplete data) │
|
|
77
|
+
│ Issues: Found 3 jobs but descriptions cut off │
|
|
78
|
+
└────────────────────────────────────────────────────┘
|
|
79
|
+
│
|
|
80
|
+
▼ (Feedback: Fix description extraction)
|
|
81
|
+
┌─ Iteration 2 ─────────────────────────────────────┐
|
|
82
|
+
│ Plan: Navigate → Search → Click → Extract │
|
|
83
|
+
│ Score: 85/100 ⚠️ (Mostly complete) │
|
|
84
|
+
│ Issues: 2 jobs have full description, 1 missing │
|
|
85
|
+
└────────────────────────────────────────────────────┘
|
|
86
|
+
│
|
|
87
|
+
▼ (Feedback: Get missing job details)
|
|
88
|
+
┌─ Iteration 3 ─────────────────────────────────────┐
|
|
89
|
+
│ Plan: Navigate → Search → Extract All → Verify │
|
|
90
|
+
│ Score: 98/100 ✅ (Complete!) │
|
|
91
|
+
│ Status: SATISFIED - All 3 jobs with descriptions │
|
|
92
|
+
└────────────────────────────────────────────────────┘
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Reading the Results
|
|
96
|
+
|
|
97
|
+
### Console Output Example
|
|
98
|
+
```
|
|
99
|
+
═════════════════════════════════════════
|
|
100
|
+
📊 Browser Agent Execution Summary
|
|
101
|
+
═════════════════════════════════════════
|
|
102
|
+
|
|
103
|
+
Query: Find top 3 AI jobs with descriptions
|
|
104
|
+
Status: ✓ Succeeded
|
|
105
|
+
Total Iterations: 3/5
|
|
106
|
+
Final Score: 98/100
|
|
107
|
+
Completeness: 99%
|
|
108
|
+
Accuracy: 97%
|
|
109
|
+
|
|
110
|
+
📦 Extracted Data:
|
|
111
|
+
[
|
|
112
|
+
{
|
|
113
|
+
"title": "AI Engineer",
|
|
114
|
+
"company": "Tech Corp",
|
|
115
|
+
"salary": "$150k-$200k",
|
|
116
|
+
"description": "..."
|
|
117
|
+
},
|
|
118
|
+
...
|
|
119
|
+
]
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Saved JSON Structure
|
|
123
|
+
```json
|
|
124
|
+
{
|
|
125
|
+
"success": true,
|
|
126
|
+
"query": "Find top 3 AI jobs with descriptions",
|
|
127
|
+
"finalData": { ... },
|
|
128
|
+
"iterations": [
|
|
129
|
+
{
|
|
130
|
+
"iteration": 1,
|
|
131
|
+
"plan": { "goal": "...", "steps": [...] },
|
|
132
|
+
"execution": [
|
|
133
|
+
{ "success": true, "action": "navigate", ... },
|
|
134
|
+
{ "success": true, "action": "observe", ... },
|
|
135
|
+
{ "success": false, "action": "extract", "error": "..." }
|
|
136
|
+
],
|
|
137
|
+
"evaluation": {
|
|
138
|
+
"satisfied": false,
|
|
139
|
+
"score": 60,
|
|
140
|
+
"feedback": "...",
|
|
141
|
+
"completeness": 50,
|
|
142
|
+
"accuracy": 70,
|
|
143
|
+
"issues": ["..."]
|
|
144
|
+
},
|
|
145
|
+
"shouldContinue": true
|
|
146
|
+
},
|
|
147
|
+
{ "iteration": 2, ... },
|
|
148
|
+
{ "iteration": 3, ... }
|
|
149
|
+
],
|
|
150
|
+
"totalIterations": 3,
|
|
151
|
+
"completedAt": "2026-06-02T10:30:45.123Z"
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Supported Actions
|
|
156
|
+
|
|
157
|
+
### Navigation
|
|
158
|
+
- `navigate` - Go to URL
|
|
159
|
+
- `scroll` - Scroll up/down/left/right
|
|
160
|
+
|
|
161
|
+
### Interaction
|
|
162
|
+
- `click` - Click on elements
|
|
163
|
+
- `type` - Enter text
|
|
164
|
+
|
|
165
|
+
### Extraction
|
|
166
|
+
- `extract` - Get structured data with custom schema
|
|
167
|
+
- `observe` - Analyze available actions on page
|
|
168
|
+
|
|
169
|
+
### Timing
|
|
170
|
+
- `wait` - Wait for specified milliseconds
|
|
171
|
+
|
|
172
|
+
## Evaluation Metrics
|
|
173
|
+
|
|
174
|
+
**Score (0-100)**
|
|
175
|
+
- 0-30: Task not started/failed
|
|
176
|
+
- 31-60: Partial progress
|
|
177
|
+
- 61-80: Mostly complete
|
|
178
|
+
- 81-100: Complete/satisfied
|
|
179
|
+
|
|
180
|
+
**Completeness (0-100)**
|
|
181
|
+
- Percentage of required data obtained
|
|
182
|
+
|
|
183
|
+
**Accuracy (0-100)**
|
|
184
|
+
- Correctness of extracted data
|
|
185
|
+
|
|
186
|
+
**Satisfaction**
|
|
187
|
+
- Default threshold: 80/100
|
|
188
|
+
- Task marked as satisfied when score ≥ threshold
|
|
189
|
+
|
|
190
|
+
## Tips for Better Results
|
|
191
|
+
|
|
192
|
+
### 1. **Be Specific**
|
|
193
|
+
```
|
|
194
|
+
❌ "Find jobs"
|
|
195
|
+
✅ "Find senior software engineer jobs in SF with salary > $200k"
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### 2. **Include Context**
|
|
199
|
+
```
|
|
200
|
+
❌ "Extract prices"
|
|
201
|
+
✅ "Extract iPhone 15 prices from Apple, Amazon, and Best Buy"
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 3. **Define Output Format**
|
|
205
|
+
```
|
|
206
|
+
❌ "Get company info"
|
|
207
|
+
✅ "Extract company name, founded year, and employee count"
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### 4. **Set Clear Success Criteria**
|
|
211
|
+
```
|
|
212
|
+
❌ "Search for flights"
|
|
213
|
+
✅ "Find cheapest round-trip flights from NYC to LA under $300 for next week"
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Troubleshooting
|
|
217
|
+
|
|
218
|
+
### Query Too Vague
|
|
219
|
+
**Error**: Multiple iterations with low scores
|
|
220
|
+
**Solution**: Be more specific about what you need
|
|
221
|
+
|
|
222
|
+
### Navigation Fails
|
|
223
|
+
**Error**: First iteration fails on navigate step
|
|
224
|
+
**Solution**: Verify URL is correct and accessible
|
|
225
|
+
|
|
226
|
+
### Data Not Extracting
|
|
227
|
+
**Error**: Extract steps succeed but return empty data
|
|
228
|
+
**Solution**: Try with screenshot tool first to see page structure
|
|
229
|
+
|
|
230
|
+
### Browser Not Opening
|
|
231
|
+
**Error**: Connection error on init
|
|
232
|
+
**Solution**: Ensure Chrome is installed and `GOOGLE_GENERATIVE_AI_API_KEY` is set
|
|
233
|
+
|
|
234
|
+
## System Configuration
|
|
235
|
+
|
|
236
|
+
Edit `plan/browser-agent/orchestrator.ts`:
|
|
237
|
+
|
|
238
|
+
```typescript
|
|
239
|
+
const DEFAULT_CONFIG = {
|
|
240
|
+
maxIterations: 5, // Increase for complex tasks
|
|
241
|
+
timeout: 120000, // 2 minutes per execution
|
|
242
|
+
model: "google/gemini-3.1-flash-lite-preview",
|
|
243
|
+
evaluationThreshold: 80, // Lower = accept sooner
|
|
244
|
+
};
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Environment Setup
|
|
248
|
+
|
|
249
|
+
**Required:**
|
|
250
|
+
```bash
|
|
251
|
+
export GOOGLE_GENERATIVE_AI_API_KEY="your-api-key-here"
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Optional:**
|
|
255
|
+
```bash
|
|
256
|
+
export DEBUG=true # Enable verbose logging
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## Integration Notes
|
|
260
|
+
|
|
261
|
+
✅ Works alongside existing modes:
|
|
262
|
+
- Agent Mode - File/folder operations
|
|
263
|
+
- Plan Mode - Task planning
|
|
264
|
+
- Ask Mode - General questions
|
|
265
|
+
- Browser Agent Mode - Web automation (NEW)
|
|
266
|
+
|
|
267
|
+
✅ No conflicts or breaking changes
|
|
268
|
+
|
|
269
|
+
## Performance Tips
|
|
270
|
+
|
|
271
|
+
1. **Reduce maxIterations** for quick results (default: 5)
|
|
272
|
+
2. **Increase timeout** for slower sites (default: 120s)
|
|
273
|
+
3. **Lower threshold** to accept "good enough" results faster
|
|
274
|
+
4. **Use variables** for repeated values (email, password, etc.)
|
|
275
|
+
|
|
276
|
+
## Advanced Usage
|
|
277
|
+
|
|
278
|
+
### Manual Plan (Future Feature)
|
|
279
|
+
```typescript
|
|
280
|
+
const manualPlan = {
|
|
281
|
+
goal: "Extract pricing info",
|
|
282
|
+
steps: [
|
|
283
|
+
{ id: 1, action: "navigate", value: "https://example.com" },
|
|
284
|
+
{ id: 2, action: "click", value: "Search for laptops" },
|
|
285
|
+
{ id: 3, action: "extract", description: "Get name and price" }
|
|
286
|
+
]
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
const result = await executeBrowserPlan(manualPlan);
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Direct Executor Usage
|
|
293
|
+
```typescript
|
|
294
|
+
import { runBrowserAgentMode } from './plan/browser-agent';
|
|
295
|
+
|
|
296
|
+
await runBrowserAgentMode();
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## Support & Issues
|
|
300
|
+
|
|
301
|
+
For issues or feature requests, check:
|
|
302
|
+
1. Browser console for detailed error messages
|
|
303
|
+
2. Saved JSON result files for iteration logs
|
|
304
|
+
3. TypeScript types in `plan/browser-agent/types.ts`
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
**Happy automating!** 🚀
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import { getAgentModel2 } from "../../config/ai.config";
|
|
2
|
+
import type { EvaluationResult, ExecutionResult, BrowserPlan } from "./types";
|
|
3
|
+
import { generateText } from "ai";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
import { withLLMRetry } from "../../utils/llm-error";
|
|
6
|
+
|
|
7
|
+
const NUMBER_WORDS: Record<string, number> = {
|
|
8
|
+
zero: 0,
|
|
9
|
+
one: 1,
|
|
10
|
+
two: 2,
|
|
11
|
+
three: 3,
|
|
12
|
+
four: 4,
|
|
13
|
+
five: 5,
|
|
14
|
+
six: 6,
|
|
15
|
+
seven: 7,
|
|
16
|
+
eight: 8,
|
|
17
|
+
nine: 9,
|
|
18
|
+
ten: 10,
|
|
19
|
+
eleven: 11,
|
|
20
|
+
twelve: 12,
|
|
21
|
+
thirteen: 13,
|
|
22
|
+
fourteen: 14,
|
|
23
|
+
fifteen: 15,
|
|
24
|
+
sixteen: 16,
|
|
25
|
+
seventeen: 17,
|
|
26
|
+
eighteen: 18,
|
|
27
|
+
nineteen: 19,
|
|
28
|
+
twenty: 20,
|
|
29
|
+
thirty: 30,
|
|
30
|
+
forty: 40,
|
|
31
|
+
fifty: 50,
|
|
32
|
+
sixty: 60,
|
|
33
|
+
seventy: 70,
|
|
34
|
+
eighty: 80,
|
|
35
|
+
ninety: 90,
|
|
36
|
+
hundred: 100,
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
function parseNumberLike(value: unknown): number | null {
|
|
40
|
+
if (typeof value === "number") {
|
|
41
|
+
return value;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (typeof value !== "string") {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const normalized = value.trim().toLowerCase();
|
|
49
|
+
if (normalized.length === 0) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const numeric = Number(normalized.replace(/,/g, ""));
|
|
54
|
+
if (!Number.isNaN(numeric)) {
|
|
55
|
+
return numeric;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const parts = normalized.replace(/-/g, " ").split(/\s+/);
|
|
59
|
+
let total = 0;
|
|
60
|
+
let valid = false;
|
|
61
|
+
|
|
62
|
+
for (const part of parts) {
|
|
63
|
+
if (NUMBER_WORDS[part] === undefined) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const value = NUMBER_WORDS[part];
|
|
68
|
+
if (value === 100 && total !== 0) {
|
|
69
|
+
total *= value;
|
|
70
|
+
} else {
|
|
71
|
+
total += value;
|
|
72
|
+
}
|
|
73
|
+
valid = true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return valid ? total : null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const booleanLike = z.preprocess((value) => {
|
|
80
|
+
if (typeof value === "boolean") {
|
|
81
|
+
return value;
|
|
82
|
+
}
|
|
83
|
+
if (typeof value === "string") {
|
|
84
|
+
const normalized = value.trim().toLowerCase();
|
|
85
|
+
if (["true", "yes", "y"].includes(normalized)) {
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
if (["false", "no", "n"].includes(normalized)) {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return value;
|
|
93
|
+
}, z.boolean());
|
|
94
|
+
|
|
95
|
+
const percentageLike = z.preprocess((value) => {
|
|
96
|
+
const num = parseNumberLike(value);
|
|
97
|
+
return num;
|
|
98
|
+
}, z.number().min(0).max(100));
|
|
99
|
+
|
|
100
|
+
const issuesLike = z.preprocess((value) => {
|
|
101
|
+
if (typeof value === "string") {
|
|
102
|
+
const trimmed = value.trim();
|
|
103
|
+
if (trimmed.length === 0) {
|
|
104
|
+
return [];
|
|
105
|
+
}
|
|
106
|
+
return [trimmed];
|
|
107
|
+
}
|
|
108
|
+
return value;
|
|
109
|
+
}, z.array(z.string()));
|
|
110
|
+
|
|
111
|
+
const EvaluationSchema = z.object({
|
|
112
|
+
satisfied: booleanLike,
|
|
113
|
+
score: percentageLike,
|
|
114
|
+
feedback: z.string(),
|
|
115
|
+
completeness: percentageLike,
|
|
116
|
+
accuracy: percentageLike,
|
|
117
|
+
issues: issuesLike,
|
|
118
|
+
}).strict();
|
|
119
|
+
|
|
120
|
+
export async function evaluateExecutionResults(
|
|
121
|
+
query: string,
|
|
122
|
+
plan: BrowserPlan,
|
|
123
|
+
results: ExecutionResult[],
|
|
124
|
+
evaluationThreshold: number = 80
|
|
125
|
+
): Promise<EvaluationResult> {
|
|
126
|
+
const model = getAgentModel2();
|
|
127
|
+
|
|
128
|
+
const successfulResults = results.filter((r) => r.success);
|
|
129
|
+
const failedSteps = results
|
|
130
|
+
.filter((r) => !r.success)
|
|
131
|
+
.map((r) => `Step ${r.stepNumber} (${r.action}): ${r.error}`);
|
|
132
|
+
|
|
133
|
+
// Check if this is an agent-based result (single entry with agentOutput)
|
|
134
|
+
const agentResult = results[0];
|
|
135
|
+
const agentOutput = agentResult?.agentOutput ?? "";
|
|
136
|
+
const agentData = agentResult?.data
|
|
137
|
+
? JSON.stringify(agentResult.data, null, 2)
|
|
138
|
+
: "";
|
|
139
|
+
|
|
140
|
+
const executionSummary = `
|
|
141
|
+
Plan Goal: ${plan.goal}
|
|
142
|
+
Query: ${query}
|
|
143
|
+
Agent Completed: ${agentResult?.success ? "Yes" : "No"}
|
|
144
|
+
${agentResult?.error ? `Error: ${agentResult.error}` : ""}
|
|
145
|
+
|
|
146
|
+
Agent Output:
|
|
147
|
+
${agentOutput || "(no text output)"}
|
|
148
|
+
|
|
149
|
+
${agentData ? `Extracted Data:\n${agentData}` : ""}
|
|
150
|
+
|
|
151
|
+
Execution Details:
|
|
152
|
+
${results.map((r) => `- Step ${r.stepNumber} (${r.action}): ${r.message}`).join("\n")}
|
|
153
|
+
`;
|
|
154
|
+
|
|
155
|
+
const systemPrompt = `You are an evaluator for browser automation tasks. Assess whether the execution successfully completed the requested task.
|
|
156
|
+
|
|
157
|
+
Respond with ONLY valid JSON (no markdown, no code blocks):
|
|
158
|
+
{
|
|
159
|
+
"satisfied": boolean,
|
|
160
|
+
"score": number (0-100),
|
|
161
|
+
"feedback": "string",
|
|
162
|
+
"completeness": number (0-100),
|
|
163
|
+
"accuracy": number (0-100),
|
|
164
|
+
"issues": ["issue1", "issue2"]
|
|
165
|
+
}`;
|
|
166
|
+
|
|
167
|
+
const userPrompt = `Evaluate this browser automation execution:
|
|
168
|
+
|
|
169
|
+
${executionSummary}
|
|
170
|
+
|
|
171
|
+
Original Query: "${query}"`;
|
|
172
|
+
|
|
173
|
+
const response = await withLLMRetry(
|
|
174
|
+
() => generateText({
|
|
175
|
+
model,
|
|
176
|
+
system: systemPrompt,
|
|
177
|
+
prompt: userPrompt,
|
|
178
|
+
temperature: 0.3,
|
|
179
|
+
}),
|
|
180
|
+
{ maxRetries: 2, context: "Evaluator" }
|
|
181
|
+
);
|
|
182
|
+
|
|
183
|
+
const extractJsonObjectFromText = (text: string): string | null => {
|
|
184
|
+
let jsonText = text.trim();
|
|
185
|
+
|
|
186
|
+
if (jsonText.startsWith("```json")) {
|
|
187
|
+
jsonText = jsonText.slice(7);
|
|
188
|
+
}
|
|
189
|
+
if (jsonText.startsWith("```")) {
|
|
190
|
+
jsonText = jsonText.slice(3);
|
|
191
|
+
}
|
|
192
|
+
if (jsonText.endsWith("```")) {
|
|
193
|
+
jsonText = jsonText.slice(0, -3);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
jsonText = jsonText.trim();
|
|
197
|
+
|
|
198
|
+
const firstBraceIndex = jsonText.indexOf("{");
|
|
199
|
+
if (firstBraceIndex === -1) {
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
let depth = 0;
|
|
204
|
+
for (let i = firstBraceIndex; i < jsonText.length; i += 1) {
|
|
205
|
+
const char = jsonText[i];
|
|
206
|
+
if (char === "{") {
|
|
207
|
+
depth += 1;
|
|
208
|
+
} else if (char === "}") {
|
|
209
|
+
depth -= 1;
|
|
210
|
+
if (depth === 0) {
|
|
211
|
+
return jsonText.slice(firstBraceIndex, i + 1);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return null;
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
const normalizeLooseString = (value: string): string => {
|
|
220
|
+
let normalized = value.trim();
|
|
221
|
+
if ((normalized.startsWith('"') && normalized.endsWith('"')) || (normalized.startsWith("'") && normalized.endsWith("'"))) {
|
|
222
|
+
normalized = normalized.slice(1, -1);
|
|
223
|
+
}
|
|
224
|
+
return normalized.replace(/\s+/g, " ").trim();
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
const normalizeLooseBoolean = (value: string): boolean | null => {
|
|
228
|
+
const normalized = value.trim().toLowerCase();
|
|
229
|
+
if (["true", "yes", "y"].includes(normalized)) {
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
if (["false", "no", "n"].includes(normalized)) {
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
return null;
|
|
236
|
+
};
|
|
237
|
+
|
|
238
|
+
const normalizeLooseValue = (value: string): unknown => {
|
|
239
|
+
const normalized = value.trim();
|
|
240
|
+
const bool = normalizeLooseBoolean(normalized);
|
|
241
|
+
if (bool !== null) {
|
|
242
|
+
return bool;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const num = parseNumberLike(normalized);
|
|
246
|
+
if (num !== null) {
|
|
247
|
+
return num;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
if (normalized.startsWith("[") || normalized.startsWith("{")) {
|
|
252
|
+
const sanitized = normalized.replace(/'/g, '"');
|
|
253
|
+
return JSON.parse(sanitized);
|
|
254
|
+
}
|
|
255
|
+
} catch {
|
|
256
|
+
// fallback to string
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return normalizeLooseString(normalized);
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
const parseLooseEvaluationResponse = (rawText: string): unknown => {
|
|
263
|
+
const jsonText = extractJsonObjectFromText(rawText);
|
|
264
|
+
if (!jsonText) {
|
|
265
|
+
throw new Error(
|
|
266
|
+
`Unable to locate a valid JSON object in the model response. Raw response: ${rawText}`
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
return JSON.parse(jsonText);
|
|
272
|
+
} catch {
|
|
273
|
+
const result: Record<string, unknown> = {};
|
|
274
|
+
const pairs: Array<[string, string]> = [];
|
|
275
|
+
const pattern = /["']?(satisfied|score|feedback|completeness|accuracy|issues)["']?\s*:\s*([^,}\n]+|\[[^\]]*\]|\{[^}]*\})/gi;
|
|
276
|
+
let match: RegExpExecArray | null;
|
|
277
|
+
while (true) {
|
|
278
|
+
match = pattern.exec(jsonText);
|
|
279
|
+
if (!match) {
|
|
280
|
+
break;
|
|
281
|
+
}
|
|
282
|
+
pairs.push([match[1]!.toLowerCase(), match[2]!.trim()]);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
for (const [key, rawValue] of pairs) {
|
|
286
|
+
if (key === "issues") {
|
|
287
|
+
const trimmed = rawValue.trim();
|
|
288
|
+
if (trimmed.startsWith("[")) {
|
|
289
|
+
try {
|
|
290
|
+
result.issues = JSON.parse(trimmed.replace(/'/g, '"'));
|
|
291
|
+
continue;
|
|
292
|
+
} catch {
|
|
293
|
+
// fallback
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
result.issues = [normalizeLooseString(trimmed)];
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (key === "feedback") {
|
|
301
|
+
result.feedback = normalizeLooseString(rawValue);
|
|
302
|
+
continue;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
result[key] = normalizeLooseValue(rawValue);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return result;
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
const rawText = response.text?.trim() ?? "";
|
|
314
|
+
const parsed = parseLooseEvaluationResponse(rawText);
|
|
315
|
+
const evaluation = EvaluationSchema.parse(parsed);
|
|
316
|
+
|
|
317
|
+
if (evaluation.score < evaluationThreshold) {
|
|
318
|
+
evaluation.satisfied = false;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
return evaluation;
|
|
322
|
+
} catch (error) {
|
|
323
|
+
throw new Error(
|
|
324
|
+
`Failed to parse evaluation result: ${error instanceof Error ? error.message : String(error)}`
|
|
325
|
+
);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export function shouldContinueIterating(
|
|
330
|
+
evaluation: EvaluationResult,
|
|
331
|
+
iteration: number,
|
|
332
|
+
maxIterations: number
|
|
333
|
+
): boolean {
|
|
334
|
+
// Stop if satisfied
|
|
335
|
+
if (evaluation.satisfied) {
|
|
336
|
+
return false;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Stop if max iterations reached
|
|
340
|
+
if (iteration >= maxIterations) {
|
|
341
|
+
return false;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Continue if score is improvable and iterations remain
|
|
345
|
+
return true;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
export function extractFeedbackForNextIteration(
|
|
349
|
+
evaluation: EvaluationResult
|
|
350
|
+
): string {
|
|
351
|
+
const issues = evaluation.issues.slice(0, 3).join("; ");
|
|
352
|
+
return `Previous iteration score: ${evaluation.score}/100. Issues to fix: ${issues}. Completeness: ${evaluation.completeness}%, Accuracy: ${evaluation.accuracy}%.`;
|
|
353
|
+
}
|