halo-agent 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectFormErrors.js +188 -0
- package/orchestrator.js +113 -88
- package/package.json +2 -1
- package/smartFill.js +114 -6
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Post-submit form-error detector.
|
|
5
|
+
*
|
|
6
|
+
* The agent clicks Submit. Most ATSes (Greenhouse, Lever, Ashby, modern
|
|
7
|
+
* Workday) DON'T redirect on validation failure — they render inline:
|
|
8
|
+
* - red banner at the top
|
|
9
|
+
* - the failing field gets aria-invalid="true" + a class like .error
|
|
10
|
+
* - a sibling/child element gets the literal text "This field is required"
|
|
11
|
+
* or "Missing entry for ..."
|
|
12
|
+
*
|
|
13
|
+
* Firecrawl's verify-submit can detect these via LLM extract, but:
|
|
14
|
+
* 1. It's an external API round-trip (~3-8s)
|
|
15
|
+
* 2. It sometimes returns null when the page is JS-heavy or behind login
|
|
16
|
+
* 3. The DOM has this info FOR FREE, no LLM needed, in <100ms
|
|
17
|
+
*
|
|
18
|
+
* This module is the deterministic ground truth: walk the DOM, return a
|
|
19
|
+
* list of fields the form is currently complaining about. The orchestrator
|
|
20
|
+
* uses this to know whether to retry, regardless of what Firecrawl says.
|
|
21
|
+
*
|
|
22
|
+
* Output:
|
|
23
|
+
* {
|
|
24
|
+
* hasErrors: boolean,
|
|
25
|
+
* errorBanner: string|null, // top-level "form needs corrections" text
|
|
26
|
+
* invalidFields: [{ // one per highlighted field
|
|
27
|
+
* label, selector, mmid?, errorText
|
|
28
|
+
* }],
|
|
29
|
+
* }
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
const FIELD_ERROR_PATTERNS = [
|
|
33
|
+
/is\s*required/i,
|
|
34
|
+
/this\s*field\s*is\s*required/i,
|
|
35
|
+
/missing\s*entry/i,
|
|
36
|
+
/please\s*(enter|select|fill|provide)/i,
|
|
37
|
+
/required\s*field/i,
|
|
38
|
+
/cannot\s*be\s*blank/i,
|
|
39
|
+
/^required$/i,
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
const BANNER_PATTERNS = [
|
|
43
|
+
/form\s*needs\s*corrections/i,
|
|
44
|
+
/please\s*correct/i,
|
|
45
|
+
/there\s*(was|were)\s*(an?|some)\s*errors?/i,
|
|
46
|
+
/please\s*review/i,
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
async function detectFormErrors(page) {
|
|
50
|
+
return await page.evaluate(({ fieldPatterns, bannerPatterns }) => {
|
|
51
|
+
const fp = fieldPatterns.map((s) => new RegExp(s.source, s.flags));
|
|
52
|
+
const bp = bannerPatterns.map((s) => new RegExp(s.source, s.flags));
|
|
53
|
+
|
|
54
|
+
function visibleText(el) {
|
|
55
|
+
if (!el) return '';
|
|
56
|
+
// Only count text from visible elements
|
|
57
|
+
const rect = el.getBoundingClientRect();
|
|
58
|
+
if (rect.width === 0 && rect.height === 0) return '';
|
|
59
|
+
const t = (el.innerText || el.textContent || '').trim();
|
|
60
|
+
if (!t) return '';
|
|
61
|
+
return t.replace(/\s+/g, ' ').slice(0, 300);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function nearestLabel(el) {
|
|
65
|
+
// Strategy ladder for finding a field's human label:
|
|
66
|
+
// 1. <label for=id>
|
|
67
|
+
// 2. el.labels
|
|
68
|
+
// 3. aria-labelledby chain
|
|
69
|
+
// 4. parent's label/legend/h*
|
|
70
|
+
// 5. fallback to name / id
|
|
71
|
+
if (el.id) {
|
|
72
|
+
const lbl = document.querySelector(`label[for="${el.id}"]`);
|
|
73
|
+
if (lbl) return visibleText(lbl).replace(/\*$/, '').trim();
|
|
74
|
+
}
|
|
75
|
+
if (el.labels && el.labels[0]) return visibleText(el.labels[0]).replace(/\*$/, '').trim();
|
|
76
|
+
const al = el.getAttribute('aria-labelledby');
|
|
77
|
+
if (al) {
|
|
78
|
+
const t = al.split(/\s+/).map((id) => document.getElementById(id)).filter(Boolean).map(visibleText).join(' ').trim();
|
|
79
|
+
if (t) return t;
|
|
80
|
+
}
|
|
81
|
+
let p = el.parentElement; let hops = 0;
|
|
82
|
+
while (p && hops < 6) {
|
|
83
|
+
const lbl = p.querySelector('label, legend, h3, h4');
|
|
84
|
+
if (lbl && !lbl.contains(el)) {
|
|
85
|
+
const t = visibleText(lbl).replace(/\*$/, '').trim();
|
|
86
|
+
if (t && t.length < 200) return t;
|
|
87
|
+
}
|
|
88
|
+
p = p.parentElement;
|
|
89
|
+
hops += 1;
|
|
90
|
+
}
|
|
91
|
+
return el.getAttribute('aria-label') || el.placeholder || el.name || el.id || '(unknown)';
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function bestSelector(el) {
|
|
95
|
+
const mmid = el.getAttribute('mmid');
|
|
96
|
+
if (mmid) return { mmid, selector: `[mmid="${mmid}"]` };
|
|
97
|
+
if (el.id) return { selector: `#${CSS.escape(el.id)}` };
|
|
98
|
+
if (el.name) return { selector: `[name="${CSS.escape(el.name)}"]` };
|
|
99
|
+
return { selector: '' };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// 1. Top-level error banner — used for the log + the NEEDS_ATTENTION reason.
|
|
103
|
+
let errorBanner = null;
|
|
104
|
+
const bannerCandidates = document.querySelectorAll('[role="alert"], .error-banner, [class*="error-message"], [class*="form-error"], [class*="errors"]');
|
|
105
|
+
for (const b of bannerCandidates) {
|
|
106
|
+
const t = visibleText(b);
|
|
107
|
+
if (!t) continue;
|
|
108
|
+
if (bp.some((r) => r.test(t)) || t.length > 30) {
|
|
109
|
+
errorBanner = t.slice(0, 300);
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// 2. Per-field flags. Three independent signals — union them:
|
|
115
|
+
// a. aria-invalid="true" set by the form's validator
|
|
116
|
+
// b. ancestor/sibling has the literal text "is required" near a field
|
|
117
|
+
// c. element matches a CSS error class
|
|
118
|
+
const invalidFields = [];
|
|
119
|
+
const seenMmids = new Set();
|
|
120
|
+
|
|
121
|
+
function addField(el, errorText) {
|
|
122
|
+
if (!el) return;
|
|
123
|
+
const sel = bestSelector(el);
|
|
124
|
+
const key = sel.mmid || sel.selector || el.outerHTML.slice(0, 80);
|
|
125
|
+
if (seenMmids.has(key)) return;
|
|
126
|
+
seenMmids.add(key);
|
|
127
|
+
invalidFields.push({
|
|
128
|
+
label: nearestLabel(el).slice(0, 200),
|
|
129
|
+
mmid: sel.mmid || null,
|
|
130
|
+
selector: sel.selector || null,
|
|
131
|
+
errorText: (errorText || 'flagged').slice(0, 200),
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Signal A: aria-invalid="true" — the cleanest, most reliable. Filter
|
|
136
|
+
// to actual inputs (not random divs).
|
|
137
|
+
document.querySelectorAll('[aria-invalid="true"]').forEach((el) => {
|
|
138
|
+
const tag = el.tagName.toLowerCase();
|
|
139
|
+
if (!['input', 'textarea', 'select'].includes(tag) && !el.isContentEditable && el.getAttribute('role') !== 'combobox') return;
|
|
140
|
+
// Find the error text near this field (sibling or descendant of parent)
|
|
141
|
+
let errTxt = '';
|
|
142
|
+
const parent = el.closest('div, fieldset, label, section');
|
|
143
|
+
if (parent) {
|
|
144
|
+
const errEl = parent.querySelector('[class*="error"], [class*="invalid"], [role="alert"]');
|
|
145
|
+
if (errEl && !errEl.contains(el)) errTxt = visibleText(errEl);
|
|
146
|
+
}
|
|
147
|
+
addField(el, errTxt || 'aria-invalid');
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// Signal B: text-pattern walk. Any element containing required-pattern
|
|
151
|
+
// text → find the nearest fillable sibling/ancestor input. This catches
|
|
152
|
+
// Greenhouse's `<div class="application-error">Phone is required</div>`.
|
|
153
|
+
const allText = Array.from(document.querySelectorAll('div, span, p, small, li, label'));
|
|
154
|
+
for (const el of allText) {
|
|
155
|
+
const t = visibleText(el);
|
|
156
|
+
if (!t || t.length > 200) continue;
|
|
157
|
+
if (!fp.some((r) => r.test(t))) continue;
|
|
158
|
+
// Don't double-count error containers we already matched
|
|
159
|
+
if (el.querySelector('[aria-invalid="true"]')) continue;
|
|
160
|
+
// Find the nearest input by walking up + querying down
|
|
161
|
+
let cursor = el; let found = null; let hops = 0;
|
|
162
|
+
while (cursor && hops < 5 && !found) {
|
|
163
|
+
const parent = cursor.parentElement;
|
|
164
|
+
if (!parent) break;
|
|
165
|
+
found = parent.querySelector('input:not([type=hidden]), textarea, select, [contenteditable="true"], [role="combobox"]');
|
|
166
|
+
cursor = parent; hops += 1;
|
|
167
|
+
}
|
|
168
|
+
if (found) addField(found, t);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Signal C: CSS error classes on the input itself. Some ATSes mark
|
|
172
|
+
// the input directly rather than the wrapper.
|
|
173
|
+
document.querySelectorAll('input[class*="error"], input[class*="invalid"], select[class*="error"], textarea[class*="error"]').forEach((el) => {
|
|
174
|
+
addField(el, 'has-error-class');
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
hasErrors: errorBanner !== null || invalidFields.length > 0,
|
|
179
|
+
errorBanner,
|
|
180
|
+
invalidFields,
|
|
181
|
+
};
|
|
182
|
+
}, {
|
|
183
|
+
fieldPatterns: FIELD_ERROR_PATTERNS.map((r) => ({ source: r.source, flags: r.flags })),
|
|
184
|
+
bannerPatterns: BANNER_PATTERNS.map((r) => ({ source: r.source, flags: r.flags })),
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
module.exports = { detectFormErrors };
|
package/orchestrator.js
CHANGED
|
@@ -13,6 +13,7 @@ const path = require('path');
|
|
|
13
13
|
const fs = require('fs');
|
|
14
14
|
const { fillFields: legacyFillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
|
|
15
15
|
const { smartFillPage } = require('./smartFill');
|
|
16
|
+
const { detectFormErrors } = require('./detectFormErrors');
|
|
16
17
|
|
|
17
18
|
// Switchable filler — smart by default, can be killed via config.useSmartFill=false.
|
|
18
19
|
// smartFill.js internally falls back to legacyFillFields if /smartfill/plan-fill
|
|
@@ -452,16 +453,91 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
|
|
|
452
453
|
});
|
|
453
454
|
await submitBtn.click();
|
|
454
455
|
|
|
455
|
-
// Wait for confirmation page
|
|
456
|
+
// Wait for confirmation page OR for the form to render validation errors.
|
|
457
|
+
// Race the URL redirect against a stable-DOM wait — whichever resolves
|
|
458
|
+
// first tells us what happened. URL redirect → success path. Stable
|
|
459
|
+
// DOM (no redirect) → either validation error or in-page confirmation.
|
|
456
460
|
try {
|
|
457
|
-
await
|
|
461
|
+
await Promise.race([
|
|
462
|
+
page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 12000 }),
|
|
463
|
+
waitForStableDOM(page, 4000),
|
|
464
|
+
]);
|
|
458
465
|
} catch {
|
|
459
|
-
await page.waitForTimeout(
|
|
466
|
+
await page.waitForTimeout(2000);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
470
|
+
// DOM-side error detection. The page IS the ground truth — Firecrawl
|
|
471
|
+
// is a second opinion at best, and unavailable at worst. If the page
|
|
472
|
+
// tells us right now that fields are invalid, retry IMMEDIATELY
|
|
473
|
+
// without waiting on Firecrawl.
|
|
474
|
+
//
|
|
475
|
+
// Bounded to ONE retry via ctx.submitRetryAttempted so we never
|
|
476
|
+
// infinite-loop on a truly stuck form.
|
|
477
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
478
|
+
const errors = await detectFormErrors(page).catch(() => ({ hasErrors: false, errorBanner: null, invalidFields: [] }));
|
|
479
|
+
if (errors.hasErrors && !ctx.submitRetryAttempted) {
|
|
480
|
+
ctx.submitRetryAttempted = true;
|
|
481
|
+
const errFieldList = errors.invalidFields.map((f) => f.label).slice(0, 6).join(' / ') || '(banner only)';
|
|
482
|
+
console.warn(`[orchestrator] Form has ${errors.invalidFields.length} validation error(s): ${errFieldList}`);
|
|
483
|
+
if (errors.errorBanner) console.warn(`[orchestrator] banner: ${errors.errorBanner.slice(0, 200)}`);
|
|
484
|
+
|
|
485
|
+
await reportStatus('IN_PROGRESS', {
|
|
486
|
+
step: 'RETRY_FILL',
|
|
487
|
+
step_detail: `Form rejected: ${errors.errorBanner ? errors.errorBanner.slice(0, 80) : errFieldList.slice(0, 80)} — retrying`,
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
// Re-fill: smartFillPage will re-scan, so the planner sees the
|
|
491
|
+
// currently-invalid fields (aria-invalid="true" propagates into
|
|
492
|
+
// AX state, and the per-field error text becomes part of the
|
|
493
|
+
// description). Bounded retry.
|
|
494
|
+
try {
|
|
495
|
+
await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
|
|
496
|
+
await waitForStableDOM(page, 1500);
|
|
497
|
+
const retrySubmitBtn = await findSubmitButton(page);
|
|
498
|
+
if (retrySubmitBtn) {
|
|
499
|
+
console.log('[orchestrator] Retry: clicking submit again...');
|
|
500
|
+
await retrySubmitBtn.click();
|
|
501
|
+
try {
|
|
502
|
+
await Promise.race([
|
|
503
|
+
page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 10000 }),
|
|
504
|
+
waitForStableDOM(page, 3000),
|
|
505
|
+
]);
|
|
506
|
+
} catch { await page.waitForTimeout(2000); }
|
|
507
|
+
} else {
|
|
508
|
+
console.warn('[orchestrator] Retry: submit button gone (form may have navigated).');
|
|
509
|
+
}
|
|
510
|
+
} catch (e) {
|
|
511
|
+
console.warn(`[orchestrator] Retry pass threw: ${e.message}`);
|
|
512
|
+
}
|
|
513
|
+
} else if (errors.hasErrors) {
|
|
514
|
+
// Already retried once; another error means the user has to step in.
|
|
515
|
+
console.warn(`[orchestrator] Form errors persist after retry: ${errors.invalidFields.length} field(s)`);
|
|
460
516
|
}
|
|
461
517
|
|
|
462
518
|
const confirmScreenshot = await page.screenshot({ type: 'jpeg', quality: 70 });
|
|
463
519
|
let confirmKey = await uploadScreenshot(config, confirmScreenshot, `confirm_${queueId}.jpg`);
|
|
464
520
|
|
|
521
|
+
// After retry: if the page STILL has errors, short-circuit to
|
|
522
|
+
// NEEDS_ATTENTION without bothering Firecrawl. We know it's broken.
|
|
523
|
+
const errorsAfter = ctx.submitRetryAttempted
|
|
524
|
+
? await detectFormErrors(page).catch(() => ({ hasErrors: false, invalidFields: [], errorBanner: null }))
|
|
525
|
+
: { hasErrors: false, invalidFields: [], errorBanner: null };
|
|
526
|
+
if (errorsAfter.hasErrors) {
|
|
527
|
+
const reason = errorsAfter.errorBanner
|
|
528
|
+
|| `${errorsAfter.invalidFields.length} field(s) still invalid: ${errorsAfter.invalidFields.map((f) => f.label).slice(0, 4).join(', ')}`;
|
|
529
|
+
console.warn(`[orchestrator] Submission rejected after retry: ${reason}`);
|
|
530
|
+
await reportStatus('NEEDS_ATTENTION', {
|
|
531
|
+
review_screenshot_r2_key: confirmKey || null,
|
|
532
|
+
needs_attention_reason: `Submit rejected by form: ${reason}`,
|
|
533
|
+
intervention_type: 'submit_failed',
|
|
534
|
+
step: 'VERIFY',
|
|
535
|
+
step_detail: reason.slice(0, 200),
|
|
536
|
+
fields_filled: cumulativeFilled,
|
|
537
|
+
});
|
|
538
|
+
throw new Error(`Submission rejected: ${reason}`);
|
|
539
|
+
}
|
|
540
|
+
|
|
465
541
|
// Verify-then-DONE: trusting waitForURL alone was wrong (the Chalk bug —
|
|
466
542
|
// Ashby rendered "Missing entry for required field: Name, Email, ..."
|
|
467
543
|
// inline without a URL change, and we marked DONE on a failed submit).
|
|
@@ -491,68 +567,17 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
|
|
|
491
567
|
// screenshot — never silently mark DONE on
|
|
492
568
|
// unverified submits (that was the Chalk bug).
|
|
493
569
|
if (verdict.submitted === false) {
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
//
|
|
498
|
-
//
|
|
499
|
-
//
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
// Bounded to ONE retry to prevent infinite loops on truly stuck forms.
|
|
503
|
-
const alreadyRetried = !!ctx.submitRetryAttempted;
|
|
504
|
-
if (!alreadyRetried) {
|
|
505
|
-
console.log('[orchestrator] Attempting fill-validate-retry: re-scanning page for highlighted errors...');
|
|
506
|
-
ctx.submitRetryAttempted = true;
|
|
507
|
-
await reportStatus('IN_PROGRESS', {
|
|
508
|
-
step: 'RETRY_FILL',
|
|
509
|
-
step_detail: `ATS rejected: ${reason.slice(0, 100)} — fixing & retrying`,
|
|
510
|
-
});
|
|
511
|
-
// Re-fill. The new scan will pick up red-highlighted required fields
|
|
512
|
-
// (their AX 'invalid' or 'required' state will tell the planner to
|
|
513
|
-
// re-attempt them). Already-filled correct fields stay put.
|
|
514
|
-
try {
|
|
515
|
-
await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
|
|
516
|
-
await waitForStableDOM(page, 1500);
|
|
517
|
-
const retrySubmitBtn = await findSubmitButton(page);
|
|
518
|
-
if (retrySubmitBtn) {
|
|
519
|
-
console.log('[orchestrator] Retry: clicking submit again...');
|
|
520
|
-
await retrySubmitBtn.click();
|
|
521
|
-
try { await page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 12000 }); } catch { await page.waitForTimeout(2500); }
|
|
522
|
-
// Re-verify
|
|
523
|
-
const retryUrl = page.url();
|
|
524
|
-
const retryShot = await page.screenshot({ type: 'jpeg', quality: 70 }).catch(() => null);
|
|
525
|
-
const retryShotKey = retryShot ? await uploadScreenshot(config, retryShot, `confirm_retry_${queueId}.jpg`) : confirmKey;
|
|
526
|
-
let retryVerdict = { submitted: null, error_message: null, source: 'unavailable' };
|
|
527
|
-
try {
|
|
528
|
-
const rRes = await fetch(`${config.apiUrl}/agent/verify-submit`, {
|
|
529
|
-
method: 'POST',
|
|
530
|
-
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
|
|
531
|
-
body: JSON.stringify({ queue_id: queueId, page_url: retryUrl }),
|
|
532
|
-
});
|
|
533
|
-
if (rRes.ok) retryVerdict = await rRes.json();
|
|
534
|
-
} catch {}
|
|
535
|
-
if (retryVerdict.submitted === true) {
|
|
536
|
-
await reportStatus('DONE', { confirmation_screenshot_r2_key: retryShotKey || null, fields_filled: cumulativeFilled });
|
|
537
|
-
await clearCheckpoint(config, queueId);
|
|
538
|
-
console.log(`[orchestrator] Done (retry-after-validation-error): ${queueItem.company} - ${queueItem.title}`);
|
|
539
|
-
return;
|
|
540
|
-
}
|
|
541
|
-
// Retry verifier also unsure — fall through to NEEDS_ATTENTION with both screenshots
|
|
542
|
-
console.warn(`[orchestrator] Retry verdict: ${retryVerdict.submitted}; source=${retryVerdict.source}`);
|
|
543
|
-
confirmKey = retryShotKey || confirmKey;
|
|
544
|
-
} else {
|
|
545
|
-
console.warn('[orchestrator] Retry: no submit button visible after re-fill — page may have navigated.');
|
|
546
|
-
}
|
|
547
|
-
} catch (e) {
|
|
548
|
-
console.warn(`[orchestrator] Retry pass threw: ${e.message}`);
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
// No retry available (or retry didn't succeed) — surface for human.
|
|
570
|
+
// The DOM-error pass above already retried + short-circuited if errors
|
|
571
|
+
// remained. If we got here AND Firecrawl is saying false, the DOM was
|
|
572
|
+
// clean (no aria-invalid / no banner / no "is required" text) but
|
|
573
|
+
// Firecrawl still detected something wrong — probably a thank-you
|
|
574
|
+
// page that includes some apologetic text the LLM misread. Surface
|
|
575
|
+
// it gently rather than throwing; user can audit the screenshot.
|
|
576
|
+
const reason = verdict.error_message || 'Firecrawl could not confirm submission';
|
|
577
|
+
console.warn(`[orchestrator] Firecrawl says NOT submitted (DOM looked clean): ${reason}`);
|
|
553
578
|
await reportStatus('NEEDS_ATTENTION', {
|
|
554
579
|
review_screenshot_r2_key: confirmKey || null,
|
|
555
|
-
needs_attention_reason: `
|
|
580
|
+
needs_attention_reason: `Verifier flagged this submit: ${reason}. Page DOM looked clean — please eyeball.`,
|
|
556
581
|
intervention_type: 'submit_failed',
|
|
557
582
|
step: 'VERIFY',
|
|
558
583
|
step_detail: reason.slice(0, 200),
|
|
@@ -561,44 +586,44 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
|
|
|
561
586
|
throw new Error(`Submission failed verification: ${reason}`);
|
|
562
587
|
}
|
|
563
588
|
|
|
589
|
+
// Decide final state (DONE / REVIEWING) based on verifier verdict +
|
|
590
|
+
// auto-submit, but DO NOT return — the audit-trail postFillSession
|
|
591
|
+
// call at the bottom of this block runs for every terminal state so
|
|
592
|
+
// the user's receipt detail always has resume PDF + cover letter PDF +
|
|
593
|
+
// per-field decisions.
|
|
594
|
+
let finalState = 'DONE';
|
|
564
595
|
if (verdict.submitted === null) {
|
|
565
596
|
const autoSubmit = config.autoSubmit || aep.agent_config?.auto_submit;
|
|
566
597
|
if (autoSubmit) {
|
|
567
|
-
// Auto-submit ON + verifier unavailable: trust the click
|
|
568
|
-
//
|
|
569
|
-
// screenshot prominently — if it shows a red banner, the user
|
|
570
|
-
// clicks "Not submitted" on the receipt and we re-queue.
|
|
571
|
-
// This is the user-chosen policy: false-positives surface visually,
|
|
572
|
-
// not as a blocked REVIEWING row. Faster loop, audit by eyeball.
|
|
598
|
+
// Auto-submit ON + verifier unavailable: trust the click; the
|
|
599
|
+
// screenshot becomes the audit trail.
|
|
573
600
|
console.log(`[orchestrator] Verifier unavailable (source: ${verdict.source}); auto-submit ON — trusting click, screenshot is the receipt.`);
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
step_detail: 'Submitted (verifier unavailable, trust-on-click)',
|
|
579
|
-
});
|
|
580
|
-
await clearCheckpoint(config, queueId);
|
|
581
|
-
return;
|
|
601
|
+
} else {
|
|
602
|
+
// No auto-submit → REVIEWING so the user eyeballs first.
|
|
603
|
+
console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). REVIEWING — please eyeball the screenshot + click Submit.`);
|
|
604
|
+
finalState = 'REVIEWING';
|
|
582
605
|
}
|
|
583
|
-
|
|
584
|
-
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
if (finalState === 'REVIEWING') {
|
|
585
609
|
await reportStatus('REVIEWING', {
|
|
586
610
|
review_screenshot_r2_key: confirmKey || null,
|
|
587
611
|
step: 'REVIEWING',
|
|
588
612
|
step_detail: `Submit clicked at ${verdictUrl.slice(0, 100)} — verifier unavailable, please confirm`,
|
|
589
613
|
fields_filled: cumulativeFilled,
|
|
590
614
|
});
|
|
591
|
-
|
|
615
|
+
} else {
|
|
616
|
+
await reportStatus('DONE', {
|
|
617
|
+
confirmation_screenshot_r2_key: confirmKey || null,
|
|
618
|
+
fields_filled: cumulativeFilled,
|
|
619
|
+
});
|
|
620
|
+
await clearCheckpoint(config, queueId);
|
|
621
|
+
const verifiedTag = verdict.source === 'firecrawl' ? 'firecrawl-verified'
|
|
622
|
+
: verdict.source === 'url_pattern' ? 'url-pattern-verified'
|
|
623
|
+
: 'unverified-but-auto-submit';
|
|
624
|
+
console.log(`[orchestrator] Done (${verifiedTag}): ${queueItem.company} - ${queueItem.title}`);
|
|
592
625
|
}
|
|
593
626
|
|
|
594
|
-
await reportStatus('DONE', {
|
|
595
|
-
confirmation_screenshot_r2_key: confirmKey || null,
|
|
596
|
-
fields_filled: cumulativeFilled,
|
|
597
|
-
});
|
|
598
|
-
await clearCheckpoint(config, queueId);
|
|
599
|
-
|
|
600
|
-
console.log(`[orchestrator] Done (verified): ${queueItem.company} - ${queueItem.title} · firecrawl-verified`);
|
|
601
|
-
|
|
602
627
|
// Post fill session data to backend for learning loop + receipt audit
|
|
603
628
|
// trail. filled_actions becomes the per-field decision list the user
|
|
604
629
|
// sees on Applications detail — "agent typed X for First Name because
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "halo-agent",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.4",
|
|
4
4
|
"description": "HALO local apply agent — auto-fills job applications using your real Chrome session",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
"scanPage.js",
|
|
25
25
|
"scanAccessibility.js",
|
|
26
26
|
"smartFill.js",
|
|
27
|
+
"detectFormErrors.js",
|
|
27
28
|
"captcha.js",
|
|
28
29
|
"vision.js",
|
|
29
30
|
"manusAutomate.js",
|
package/smartFill.js
CHANGED
|
@@ -196,11 +196,93 @@ async function executePlanItem(page, item, fieldByMmid, ctx) {
|
|
|
196
196
|
* Falls back to LLM synonym matching when local exact/substring
|
|
197
197
|
* matching fails, so "Straight" still picks "Heterosexual."
|
|
198
198
|
*/
|
|
199
|
+
/**
|
|
200
|
+
* Special-case intl-tel-input country picker. The Country field on
|
|
201
|
+
* Greenhouse forms isn't a normal dropdown — it's an <input type=tel>
|
|
202
|
+
* with a sibling .iti__selected-flag button. Clicking the input does
|
|
203
|
+
* NOTHING; you have to click the flag to open the list, then click the
|
|
204
|
+
* matching .iti__country list item.
|
|
205
|
+
*
|
|
206
|
+
* Returns null if this isn't an intl-tel-input field; otherwise the
|
|
207
|
+
* fill result.
|
|
208
|
+
*/
|
|
209
|
+
async function tryIntlTelCountry(page, triggerLocator, value) {
|
|
210
|
+
try {
|
|
211
|
+
// Is this field wrapped by an .iti container?
|
|
212
|
+
const wrap = await triggerLocator.evaluate((el) => {
|
|
213
|
+
const c = el.closest('.iti, .iti--allow-dropdown');
|
|
214
|
+
return c ? { has: true } : null;
|
|
215
|
+
}).catch(() => null);
|
|
216
|
+
if (!wrap) return null;
|
|
217
|
+
|
|
218
|
+
// Click the flag button to open the country list. There's only one
|
|
219
|
+
// active flag per .iti wrapper.
|
|
220
|
+
const flag = await triggerLocator.evaluateHandle((el) => {
|
|
221
|
+
const c = el.closest('.iti, .iti--allow-dropdown');
|
|
222
|
+
return c?.querySelector('.iti__selected-flag, [aria-label*="country" i]') || null;
|
|
223
|
+
});
|
|
224
|
+
if (!flag || !(await flag.evaluate((n) => !!n).catch(() => false))) return null;
|
|
225
|
+
await flag.click({ timeout: 1500 }).catch(async () => {
|
|
226
|
+
// Some intl-tel versions need force-click because the flag is
|
|
227
|
+
// positioned absolute under the input
|
|
228
|
+
await flag.click({ force: true, timeout: 1500 }).catch(() => {});
|
|
229
|
+
});
|
|
230
|
+
await page.waitForTimeout(400);
|
|
231
|
+
|
|
232
|
+
// Find the country whose name OR dial code matches `value`. Strip
|
|
233
|
+
// "+1" / parens for matching ("United States +1" → "United States").
|
|
234
|
+
const v = String(value).toLowerCase().replace(/\s*\+\d+\s*$/, '').trim();
|
|
235
|
+
const items = page.locator('.iti__country-list:visible .iti__country, .iti__dropdown-content .iti__country');
|
|
236
|
+
const count = await items.count().catch(() => 0);
|
|
237
|
+
if (count === 0) {
|
|
238
|
+
// Country list never opened
|
|
239
|
+
await page.keyboard.press('Escape').catch(() => {});
|
|
240
|
+
return { ok: false, reason: 'intl-tel country list did not open' };
|
|
241
|
+
}
|
|
242
|
+
// Collect the country names to find the right one
|
|
243
|
+
const names = await items.evaluateAll((nodes) => nodes.map((n) => {
|
|
244
|
+
const nm = n.querySelector('.iti__country-name')?.textContent?.trim() || '';
|
|
245
|
+
const dial = n.querySelector('.iti__dial-code')?.textContent?.trim() || '';
|
|
246
|
+
return { name: nm, dial };
|
|
247
|
+
})).catch(() => []);
|
|
248
|
+
let idx = names.findIndex((n) => n.name.toLowerCase() === v);
|
|
249
|
+
if (idx === -1) idx = names.findIndex((n) => n.name.toLowerCase().includes(v) || v.includes(n.name.toLowerCase()));
|
|
250
|
+
if (idx === -1) {
|
|
251
|
+
// Try matching by dial code (e.g. "+1")
|
|
252
|
+
const dialMatch = String(value).match(/\+(\d+)/);
|
|
253
|
+
if (dialMatch) idx = names.findIndex((n) => n.dial === `+${dialMatch[1]}`);
|
|
254
|
+
}
|
|
255
|
+
if (idx === -1) {
|
|
256
|
+
await page.keyboard.press('Escape').catch(() => {});
|
|
257
|
+
return { ok: false, reason: `no intl-tel country matched "${value}"` };
|
|
258
|
+
}
|
|
259
|
+
await items.nth(idx).click({ timeout: 1500 }).catch(async () => {
|
|
260
|
+
await items.nth(idx).click({ force: true, timeout: 1500 });
|
|
261
|
+
});
|
|
262
|
+
return { ok: true, reason: `intl-tel picked: ${names[idx].name} ${names[idx].dial}` };
|
|
263
|
+
} catch (e) {
|
|
264
|
+
return { ok: false, reason: `intl-tel handler threw: ${e.message}` };
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
199
268
|
async function openAndPickOption(page, triggerLocator, value, llmCtx) {
|
|
269
|
+
try {
|
|
270
|
+
// First: intl-tel-input special case. The Country dial-code picker on
|
|
271
|
+
// Greenhouse is a non-standard widget that ignores clicks to its
|
|
272
|
+
// input — has to click the flag button. tryIntlTelCountry returns
|
|
273
|
+
// null when the field isn't intl-tel, so other dropdowns continue.
|
|
274
|
+
const itlResult = await tryIntlTelCountry(page, triggerLocator, value);
|
|
275
|
+
if (itlResult !== null) return itlResult;
|
|
276
|
+
} catch {}
|
|
277
|
+
|
|
200
278
|
try {
|
|
201
279
|
// Snapshot option-list state BEFORE opening so we can identify
|
|
202
|
-
// the new options.
|
|
203
|
-
|
|
280
|
+
// the new options. Includes:
|
|
281
|
+
// .pac-item — Google Places (Greenhouse Location)
|
|
282
|
+
// .iti__country — intl-tel-input (Greenhouse Country dial-code)
|
|
283
|
+
// .select__option — React-Select
|
|
284
|
+
// role=option — ARIA-correct dropdowns
|
|
285
|
+
const optionSel = '[role="option"], [role="menuitem"], .select__option, li[class*="option"], .pac-item, .iti__country';
|
|
204
286
|
const beforeCount = await page.locator(optionSel).count().catch(() => 0);
|
|
205
287
|
|
|
206
288
|
await triggerLocator.click({ timeout: 2500 });
|
|
@@ -333,12 +415,35 @@ async function typeAndPickSuggestion(page, locator, value) {
|
|
|
333
415
|
await locator.press('Delete').catch(() => {});
|
|
334
416
|
const firstChunk = String(value).split(/[,;]/)[0].trim();
|
|
335
417
|
await page.keyboard.type(firstChunk, { delay: 60 });
|
|
336
|
-
|
|
337
|
-
|
|
418
|
+
// Wait for suggestions to render. Google Places PAC takes longer than
|
|
419
|
+
// most (~800ms-1.2s), so wait a bit more aggressively before giving up.
|
|
420
|
+
await page.waitForTimeout(900);
|
|
421
|
+
// Selector ladder — Google Places (.pac-item) uses no role attribute,
|
|
422
|
+
// so we ADD it to the union. Without this, Greenhouse Location always
|
|
423
|
+
// failed because its dropdown is .pac-container > .pac-item.
|
|
424
|
+
const optionSel = [
|
|
425
|
+
'.pac-item', // Google Places (Greenhouse Location)
|
|
426
|
+
'[role="option"]', // ARIA-correct dropdowns
|
|
427
|
+
'[role="listbox"] li', // older listbox conventions
|
|
428
|
+
'[role="listbox"] [role="option"]', // nested
|
|
429
|
+
'.select__option', // React-Select
|
|
430
|
+
'ul[class*="autocomplete"] li', // generic autocomplete
|
|
431
|
+
'ul[class*="suggestion"] li',
|
|
432
|
+
'div[class*="suggestion"]',
|
|
433
|
+
].join(', ');
|
|
434
|
+
// Wait for ANY option to actually appear (some libs lazy-render). Up
|
|
435
|
+
// to 1.5s additional. waitFor errors if nothing appears — that's the
|
|
436
|
+
// signal that the field accepted free text instead.
|
|
437
|
+
try {
|
|
438
|
+
await page.locator(optionSel).first().waitFor({ state: 'visible', timeout: 1500 });
|
|
439
|
+
} catch {
|
|
440
|
+
const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
|
|
441
|
+
if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
|
|
442
|
+
return { ok: false, reason: 'typeahead opened no suggestions' };
|
|
443
|
+
}
|
|
338
444
|
const opts = page.locator(optionSel);
|
|
339
445
|
const count = await opts.count().catch(() => 0);
|
|
340
446
|
if (count === 0) {
|
|
341
|
-
// Some fields accept the typed value directly. Verify.
|
|
342
447
|
const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
|
|
343
448
|
if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
|
|
344
449
|
return { ok: false, reason: 'typeahead opened no suggestions' };
|
|
@@ -348,7 +453,10 @@ async function typeAndPickSuggestion(page, locator, value) {
|
|
|
348
453
|
let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
|
|
349
454
|
if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().trim().startsWith(v));
|
|
350
455
|
if (idx === -1) idx = 0;
|
|
351
|
-
|
|
456
|
+
// Google Places PAC items have a quirky click handler that doesn't fire
|
|
457
|
+
// on Playwright's normal click — use force-click as fallback.
|
|
458
|
+
try { await opts.nth(idx).click({ timeout: 2000 }); }
|
|
459
|
+
catch { await opts.nth(idx).click({ force: true, timeout: 2000 }); }
|
|
352
460
|
await page.waitForTimeout(200);
|
|
353
461
|
return { ok: true, reason: `typeahead picked: ${texts[idx]}` };
|
|
354
462
|
} catch (e) {
|