halo-agent 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/browser.js +157 -0
- package/captcha.js +217 -0
- package/config.js +37 -0
- package/filler.js +987 -0
- package/index.js +360 -0
- package/localServer.js +270 -0
- package/manusAutomate.js +349 -0
- package/orchestrator.js +1122 -0
- package/package.json +49 -0
- package/poller.js +172 -0
- package/scanPage.js +606 -0
- package/vision.js +398 -0
package/vision.js
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Claude computer-use vision driver for HALO agent.
|
|
5
|
+
*
|
|
6
|
+
* Two entry points:
|
|
7
|
+
*
|
|
8
|
+
* 1. visionNavigateAndSubmit(page, aep, apiKey)
|
|
9
|
+
* Targeted use: DOM filling already ran, but submit button not found.
|
|
10
|
+
* Claude looks at the current page state and completes the navigation + submission.
|
|
11
|
+
* Used as a fallback after fillFields() + pagination have run.
|
|
12
|
+
*
|
|
13
|
+
* 2. visionFill(page, aep, apiKey)
|
|
14
|
+
* Full fallback: fills all visible fields from scratch via vision.
|
|
15
|
+
* Used for ATS types that completely defeat DOM automation (Workday canvas, Taleo).
|
|
16
|
+
*
|
|
17
|
+
* Both use a proper action loop:
|
|
18
|
+
* screenshot -> Claude decides action -> execute in Playwright -> screenshot -> repeat
|
|
19
|
+
* until Claude signals done or max iterations reached.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const CLAUDE_MODEL = 'claude-opus-4-6';
|
|
23
|
+
const MAX_ITERATIONS = 40;
|
|
24
|
+
const SCREENSHOT_QUALITY = 75;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Execute a single computer_use action returned by Claude.
|
|
28
|
+
* Returns a fresh screenshot buffer after the action.
|
|
29
|
+
*/
|
|
30
|
+
async function executeAction(page, action) {
|
|
31
|
+
const { action: type, coordinate, text, key, scroll_direction, scroll_distance } = action;
|
|
32
|
+
|
|
33
|
+
switch (type) {
|
|
34
|
+
case 'screenshot':
|
|
35
|
+
// Claude is checking its work — just return fresh screenshot, no interaction
|
|
36
|
+
break;
|
|
37
|
+
|
|
38
|
+
case 'left_click':
|
|
39
|
+
await page.mouse.click(coordinate[0], coordinate[1]);
|
|
40
|
+
await page.waitForTimeout(400);
|
|
41
|
+
break;
|
|
42
|
+
|
|
43
|
+
case 'double_click':
|
|
44
|
+
await page.mouse.dblclick(coordinate[0], coordinate[1]);
|
|
45
|
+
await page.waitForTimeout(400);
|
|
46
|
+
break;
|
|
47
|
+
|
|
48
|
+
case 'right_click':
|
|
49
|
+
await page.mouse.click(coordinate[0], coordinate[1], { button: 'right' });
|
|
50
|
+
await page.waitForTimeout(300);
|
|
51
|
+
break;
|
|
52
|
+
|
|
53
|
+
case 'mouse_move':
|
|
54
|
+
await page.mouse.move(coordinate[0], coordinate[1]);
|
|
55
|
+
break;
|
|
56
|
+
|
|
57
|
+
case 'left_click_drag':
|
|
58
|
+
await page.mouse.move(coordinate[0], coordinate[1]);
|
|
59
|
+
await page.mouse.down();
|
|
60
|
+
if (action.end_coordinate) {
|
|
61
|
+
await page.mouse.move(action.end_coordinate[0], action.end_coordinate[1]);
|
|
62
|
+
}
|
|
63
|
+
await page.mouse.up();
|
|
64
|
+
break;
|
|
65
|
+
|
|
66
|
+
case 'type':
|
|
67
|
+
await page.keyboard.type(text, { delay: 25 });
|
|
68
|
+
break;
|
|
69
|
+
|
|
70
|
+
case 'key':
|
|
71
|
+
// Handle key combos like "ctrl+a"
|
|
72
|
+
if (key.includes('+')) {
|
|
73
|
+
const parts = key.split('+');
|
|
74
|
+
const modifiers = parts.slice(0, -1);
|
|
75
|
+
const mainKey = parts[parts.length - 1];
|
|
76
|
+
for (const mod of modifiers) await page.keyboard.down(mod);
|
|
77
|
+
await page.keyboard.press(mainKey);
|
|
78
|
+
for (const mod of modifiers.reverse()) await page.keyboard.up(mod);
|
|
79
|
+
} else {
|
|
80
|
+
await page.keyboard.press(key);
|
|
81
|
+
}
|
|
82
|
+
await page.waitForTimeout(200);
|
|
83
|
+
break;
|
|
84
|
+
|
|
85
|
+
case 'scroll':
|
|
86
|
+
await page.mouse.move(coordinate[0], coordinate[1]);
|
|
87
|
+
const delta = (scroll_distance || 3) * 100;
|
|
88
|
+
await page.mouse.wheel(0, scroll_direction === 'up' ? -delta : delta);
|
|
89
|
+
await page.waitForTimeout(300);
|
|
90
|
+
break;
|
|
91
|
+
|
|
92
|
+
default:
|
|
93
|
+
console.warn(`[vision] Unknown action type: ${type}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Wait for any triggered navigation or DOM updates
|
|
97
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 5000 }).catch(() => {});
|
|
98
|
+
|
|
99
|
+
// Return fresh screenshot
|
|
100
|
+
return page.screenshot({ type: 'jpeg', quality: SCREENSHOT_QUALITY });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Core action loop. Sends messages to Claude and executes returned actions
|
|
105
|
+
* until Claude stops issuing tool calls (signals completion).
|
|
106
|
+
*/
|
|
107
|
+
async function runActionLoop(page, systemPrompt, userPrompt, apiKey, maxIter = MAX_ITERATIONS) {
|
|
108
|
+
// Get the current viewport size so Claude knows the coordinate space
|
|
109
|
+
const viewport = page.viewportSize() || { width: 1280, height: 800 };
|
|
110
|
+
|
|
111
|
+
const screenshot = await page.screenshot({ type: 'jpeg', quality: SCREENSHOT_QUALITY });
|
|
112
|
+
const base64 = screenshot.toString('base64');
|
|
113
|
+
|
|
114
|
+
const messages = [
|
|
115
|
+
{
|
|
116
|
+
role: 'user',
|
|
117
|
+
content: [
|
|
118
|
+
{
|
|
119
|
+
type: 'image',
|
|
120
|
+
source: { type: 'base64', media_type: 'image/jpeg', data: base64 },
|
|
121
|
+
},
|
|
122
|
+
{ type: 'text', text: userPrompt },
|
|
123
|
+
],
|
|
124
|
+
},
|
|
125
|
+
];
|
|
126
|
+
|
|
127
|
+
let iteration = 0;
|
|
128
|
+
let actionsExecuted = 0;
|
|
129
|
+
|
|
130
|
+
while (iteration < maxIter) {
|
|
131
|
+
iteration++;
|
|
132
|
+
|
|
133
|
+
let response;
|
|
134
|
+
try {
|
|
135
|
+
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
|
136
|
+
method: 'POST',
|
|
137
|
+
headers: {
|
|
138
|
+
'Content-Type': 'application/json',
|
|
139
|
+
'x-api-key': apiKey,
|
|
140
|
+
'anthropic-version': '2023-06-01',
|
|
141
|
+
'anthropic-beta': 'computer-use-2024-10-22',
|
|
142
|
+
},
|
|
143
|
+
body: JSON.stringify({
|
|
144
|
+
model: CLAUDE_MODEL,
|
|
145
|
+
max_tokens: 2048,
|
|
146
|
+
system: systemPrompt,
|
|
147
|
+
tools: [{
|
|
148
|
+
type: 'computer_20241022',
|
|
149
|
+
name: 'computer',
|
|
150
|
+
display_width_px: viewport.width,
|
|
151
|
+
display_height_px: viewport.height,
|
|
152
|
+
}],
|
|
153
|
+
messages,
|
|
154
|
+
}),
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
if (!res.ok) {
|
|
158
|
+
const errText = await res.text();
|
|
159
|
+
console.error('[vision] Claude API error:', res.status, errText.slice(0, 200));
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
response = await res.json();
|
|
164
|
+
} catch (e) {
|
|
165
|
+
console.error('[vision] Fetch error:', e.message);
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Check stop reason
|
|
170
|
+
if (response.stop_reason === 'end_turn') {
|
|
171
|
+
console.log(`[vision] Claude signaled completion after ${actionsExecuted} actions`);
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Find tool use blocks
|
|
176
|
+
const toolUseBlocks = (response.content || []).filter(b => b.type === 'tool_use' && b.name === 'computer');
|
|
177
|
+
if (toolUseBlocks.length === 0) {
|
|
178
|
+
console.log(`[vision] No more tool calls — done after ${actionsExecuted} actions`);
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Add assistant message to history
|
|
183
|
+
messages.push({ role: 'assistant', content: response.content });
|
|
184
|
+
|
|
185
|
+
// Execute each action and collect results
|
|
186
|
+
const toolResults = [];
|
|
187
|
+
for (const block of toolUseBlocks) {
|
|
188
|
+
const action = block.input;
|
|
189
|
+
console.log(`[vision] Action: ${action.action}${action.coordinate ? ` at [${action.coordinate}]` : ''}${action.text ? ` "${action.text.slice(0, 40)}"` : ''}`);
|
|
190
|
+
|
|
191
|
+
let newScreenshot;
|
|
192
|
+
try {
|
|
193
|
+
newScreenshot = await executeAction(page, action);
|
|
194
|
+
actionsExecuted++;
|
|
195
|
+
} catch (e) {
|
|
196
|
+
console.warn(`[vision] Action execution failed: ${e.message}`);
|
|
197
|
+
newScreenshot = await page.screenshot({ type: 'jpeg', quality: SCREENSHOT_QUALITY }).catch(() => screenshot);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
toolResults.push({
|
|
201
|
+
type: 'tool_result',
|
|
202
|
+
tool_use_id: block.id,
|
|
203
|
+
content: [{
|
|
204
|
+
type: 'image',
|
|
205
|
+
source: {
|
|
206
|
+
type: 'base64',
|
|
207
|
+
media_type: 'image/jpeg',
|
|
208
|
+
data: newScreenshot.toString('base64'),
|
|
209
|
+
},
|
|
210
|
+
}],
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Add tool results to conversation so Claude sees the outcome
|
|
215
|
+
messages.push({ role: 'user', content: toolResults });
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return { actionsExecuted, iterations: iteration };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Precision fill: DOM filling already ran but left some fields unfilled.
|
|
223
|
+
* Claude looks only at the skipped fields and fills just those.
|
|
224
|
+
* Called when fillResult.skipped > 2 or fillResult.failed > 0.
|
|
225
|
+
*/
|
|
226
|
+
async function visionFillSkipped(page, aep, apiKey, alreadyFilledMap = new Map()) {
|
|
227
|
+
if (!apiKey) {
|
|
228
|
+
return { success: false, filled: 0 };
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const alreadyFilledLabels = [...alreadyFilledMap.keys()].join(', ') || 'none';
|
|
232
|
+
console.log('[vision] Precision fill for skipped fields...');
|
|
233
|
+
|
|
234
|
+
const systemPrompt = `You are controlling a web browser to fill a job application form.
|
|
235
|
+
Some fields were already filled by an automated DOM filler. Your job is to fill only the REMAINING empty fields.
|
|
236
|
+
|
|
237
|
+
Rules:
|
|
238
|
+
- DO NOT re-fill fields that are already filled — they are listed below as "already filled"
|
|
239
|
+
- Fill only the visible fields that appear empty
|
|
240
|
+
- Click a field before typing, use clear+type pattern
|
|
241
|
+
- For dropdowns: click to open, wait for options, click the correct option
|
|
242
|
+
- For checkboxes/radio buttons: click to select the correct option
|
|
243
|
+
- Do NOT click Next, Continue, or Submit
|
|
244
|
+
- Work top to bottom, left to right
|
|
245
|
+
- If all fields appear filled, take a screenshot and stop
|
|
246
|
+
|
|
247
|
+
Already filled fields (DO NOT touch these): ${alreadyFilledLabels}`;
|
|
248
|
+
|
|
249
|
+
const answers = buildAnswerSummary(aep);
|
|
250
|
+
const userPrompt = `Fill the remaining empty fields with this candidate data:
|
|
251
|
+
|
|
252
|
+
${answers}
|
|
253
|
+
|
|
254
|
+
Skip any fields that are already filled. Focus only on empty fields.`;
|
|
255
|
+
|
|
256
|
+
try {
|
|
257
|
+
const result = await runActionLoop(page, systemPrompt, userPrompt, apiKey, 20);
|
|
258
|
+
return { success: true, filled: Math.max(0, Math.floor(result.actionsExecuted / 2)) };
|
|
259
|
+
} catch (e) {
|
|
260
|
+
console.error('[vision] Precision fill error:', e.message);
|
|
261
|
+
return { success: false, filled: 0 };
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Targeted fallback: DOM filling already ran.
|
|
267
|
+
* Claude's job is just to find and click through to the submit confirmation.
|
|
268
|
+
* Used when findSubmitButton() returns null after full pagination.
|
|
269
|
+
*/
|
|
270
|
+
async function visionNavigateAndSubmit(page, aep, apiKey, { autoSubmit = false, alreadyFilled = new Map() } = {}) {
|
|
271
|
+
if (!apiKey) {
|
|
272
|
+
console.warn('[vision] No API key — vision fallback skipped');
|
|
273
|
+
return { success: false, reason: 'No API key' };
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
console.log('[vision] Starting targeted navigation loop to find and submit...');
|
|
277
|
+
|
|
278
|
+
const alreadyFilledLabels = [...alreadyFilled.keys()].join(', ') || 'none';
|
|
279
|
+
|
|
280
|
+
const systemPrompt = `You are controlling a web browser to complete a job application form that has already been partially filled.
|
|
281
|
+
Your goal is to navigate to the final submission page and, if auto-submit is enabled, click the submit button.
|
|
282
|
+
|
|
283
|
+
Rules:
|
|
284
|
+
- Click Next/Continue buttons to advance through multi-page forms
|
|
285
|
+
- If you see a review page with all filled answers, that is the target state — take a screenshot and stop
|
|
286
|
+
- Only click Submit if autoSubmit is true (it is currently: ${autoSubmit})
|
|
287
|
+
- If the page requires login or shows an error, stop and describe the issue
|
|
288
|
+
- Do NOT re-fill fields that are already filled (already filled: ${alreadyFilledLabels})
|
|
289
|
+
- Do NOT close the browser window or tab`;
|
|
290
|
+
|
|
291
|
+
const profile = aep?.profile_fill || {};
|
|
292
|
+
const userPrompt = `The form is already filled with data for ${profile.first_name} ${profile.last_name} applying to ${aep?.job?.company || 'this company'}.
|
|
293
|
+
|
|
294
|
+
Navigate to the final submit/review page:
|
|
295
|
+
1. If you see a Next or Continue button, click it
|
|
296
|
+
2. If you see a review page summarizing the application, stop (this is the target state)
|
|
297
|
+
3. If you see a Submit button and autoSubmit=${autoSubmit}, click it
|
|
298
|
+
4. If the form is already on the submit page and ${autoSubmit} is false, just confirm you can see the submit button
|
|
299
|
+
|
|
300
|
+
Current page: ${page.url()}`;
|
|
301
|
+
|
|
302
|
+
try {
|
|
303
|
+
const result = await runActionLoop(page, systemPrompt, userPrompt, apiKey, 20);
|
|
304
|
+
console.log(`[vision] Navigation complete. Actions: ${result.actionsExecuted}`);
|
|
305
|
+
|
|
306
|
+
// Check if we landed on a confirmation/thank-you page
|
|
307
|
+
const finalUrl = page.url();
|
|
308
|
+
const pageText = await page.textContent('body').catch(() => '');
|
|
309
|
+
const isDone = /thank you|application submitted|application received|we.ve received|confirmation/i.test(pageText)
|
|
310
|
+
|| /thank|confirm|success|applied|submitted/i.test(finalUrl);
|
|
311
|
+
|
|
312
|
+
return { success: true, submitted: isDone, actionsExecuted: result.actionsExecuted };
|
|
313
|
+
} catch (e) {
|
|
314
|
+
console.error('[vision] Navigation loop error:', e.message);
|
|
315
|
+
return { success: false, reason: e.message };
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Full vision fill: Claude fills the entire form from scratch.
|
|
321
|
+
* Last resort for ATS types that defeat all DOM automation.
|
|
322
|
+
*/
|
|
323
|
+
async function visionFill(page, aep, apiKey, { alreadyFilled = new Map() } = {}) {
|
|
324
|
+
if (!apiKey) {
|
|
325
|
+
console.warn('[vision] No API key — vision fallback skipped');
|
|
326
|
+
return { success: false, filled: 0 };
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
console.log('[vision] Starting full vision fill...');
|
|
330
|
+
|
|
331
|
+
const alreadyFilledLabels = [...alreadyFilled.keys()].join(', ') || 'none';
|
|
332
|
+
|
|
333
|
+
const systemPrompt = `You are controlling a web browser to fill out a job application form.
|
|
334
|
+
Fill every visible input field, textarea, and dropdown using the candidate data provided.
|
|
335
|
+
|
|
336
|
+
Rules:
|
|
337
|
+
- Click a field before typing into it
|
|
338
|
+
- For dropdowns: click to open, wait for options, click the correct option
|
|
339
|
+
- For checkboxes/radio buttons: click to select the correct option
|
|
340
|
+
- Do NOT click Next, Continue, or Submit — only fill visible fields
|
|
341
|
+
- Skip fields that are already correctly filled: ${alreadyFilledLabels}
|
|
342
|
+
- If a field has no matching data, leave it empty
|
|
343
|
+
- Work systematically top to bottom, left to right`;
|
|
344
|
+
|
|
345
|
+
const answers = buildAnswerSummary(aep);
|
|
346
|
+
const userPrompt = `Fill all visible form fields with this candidate data:
|
|
347
|
+
|
|
348
|
+
${answers}
|
|
349
|
+
|
|
350
|
+
Fill each visible input field on the current page. Do not advance to the next page.`;
|
|
351
|
+
|
|
352
|
+
try {
|
|
353
|
+
const result = await runActionLoop(page, systemPrompt, userPrompt, apiKey, MAX_ITERATIONS);
|
|
354
|
+
return { success: true, filled: result.actionsExecuted };
|
|
355
|
+
} catch (e) {
|
|
356
|
+
console.error('[vision] Full fill error:', e.message);
|
|
357
|
+
return { success: false, filled: 0, reason: e.message };
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
function buildAnswerSummary(aep) {
|
|
362
|
+
const p = aep?.profile_fill || {};
|
|
363
|
+
const lines = [
|
|
364
|
+
`Full name: ${p.first_name} ${p.last_name}`,
|
|
365
|
+
`Email: ${p.email}`,
|
|
366
|
+
`Phone: ${p.phone}`,
|
|
367
|
+
p.linkedin && `LinkedIn: ${p.linkedin}`,
|
|
368
|
+
p.github && `GitHub: ${p.github}`,
|
|
369
|
+
p.portfolio && `Portfolio/Website: ${p.portfolio}`,
|
|
370
|
+
p.school && `Education: ${p.school}`,
|
|
371
|
+
p.gpa && `GPA: ${p.gpa}`,
|
|
372
|
+
p.sponsorship_text && `Visa/Sponsorship: ${p.sponsorship_text}`,
|
|
373
|
+
p.relocation_text && `Relocation: ${p.relocation_text}`,
|
|
374
|
+
p.salary && `Salary expectation: ${p.salary}`,
|
|
375
|
+
p.gender && `Gender: ${p.gender}`,
|
|
376
|
+
p.race && `Race/Ethnicity: ${p.race}`,
|
|
377
|
+
p.veteran && `Veteran status: ${p.veteran}`,
|
|
378
|
+
p.disability && `Disability: ${p.disability}`,
|
|
379
|
+
].filter(Boolean);
|
|
380
|
+
|
|
381
|
+
if (aep?.cover_letter) {
|
|
382
|
+
lines.push(`\nCover Letter:\n${aep.cover_letter.slice(0, 600)}...`);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const fieldAnswers = (aep?.field_answers || []).slice(0, 10);
|
|
386
|
+
if (fieldAnswers.length > 0) {
|
|
387
|
+
lines.push('\nApplication-specific answers:');
|
|
388
|
+
for (const fa of fieldAnswers) {
|
|
389
|
+
if (fa.label && fa.value) {
|
|
390
|
+
lines.push(` "${fa.label}": ${fa.value.slice(0, 300)}`);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return lines.join('\n');
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
module.exports = { visionFill, visionNavigateAndSubmit, visionFillSkipped };
|