halo-agent 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Post-submit form-error detector.
5
+ *
6
+ * The agent clicks Submit. Most ATSes (Greenhouse, Lever, Ashby, modern
7
+ * Workday) DON'T redirect on validation failure — they render inline:
8
+ * - red banner at the top
9
+ * - the failing field gets aria-invalid="true" + a class like .error
10
+ * - a sibling/child element gets the literal text "This field is required"
11
+ * or "Missing entry for ..."
12
+ *
13
+ * Firecrawl's verify-submit can detect these via LLM extract, but:
14
+ * 1. It's an external API round-trip (~3-8s)
15
+ * 2. It sometimes returns null when the page is JS-heavy or behind login
16
+ * 3. The DOM has this info FOR FREE, no LLM needed, in <100ms
17
+ *
18
+ * This module is the deterministic ground truth: walk the DOM, return a
19
+ * list of fields the form is currently complaining about. The orchestrator
20
+ * uses this to know whether to retry, regardless of what Firecrawl says.
21
+ *
22
+ * Output:
23
+ * {
24
+ * hasErrors: boolean,
25
+ * errorBanner: string|null, // top-level "form needs corrections" text
26
+ * invalidFields: [{ // one per highlighted field
27
+ * label, selector, mmid?, errorText
28
+ * }],
29
+ * }
30
+ */
31
+
32
+ const FIELD_ERROR_PATTERNS = [
33
+ /is\s*required/i,
34
+ /this\s*field\s*is\s*required/i,
35
+ /missing\s*entry/i,
36
+ /please\s*(enter|select|fill|provide)/i,
37
+ /required\s*field/i,
38
+ /cannot\s*be\s*blank/i,
39
+ /^required$/i,
40
+ ];
41
+
42
+ const BANNER_PATTERNS = [
43
+ /form\s*needs\s*corrections/i,
44
+ /please\s*correct/i,
45
+ /there\s*(was|were)\s*(an?|some)\s*errors?/i,
46
+ /please\s*review/i,
47
+ ];
48
+
49
+ async function detectFormErrors(page) {
50
+ return await page.evaluate(({ fieldPatterns, bannerPatterns }) => {
51
+ const fp = fieldPatterns.map((s) => new RegExp(s.source, s.flags));
52
+ const bp = bannerPatterns.map((s) => new RegExp(s.source, s.flags));
53
+
54
+ function visibleText(el) {
55
+ if (!el) return '';
56
+ // Only count text from visible elements
57
+ const rect = el.getBoundingClientRect();
58
+ if (rect.width === 0 && rect.height === 0) return '';
59
+ const t = (el.innerText || el.textContent || '').trim();
60
+ if (!t) return '';
61
+ return t.replace(/\s+/g, ' ').slice(0, 300);
62
+ }
63
+
64
+ function nearestLabel(el) {
65
+ // Strategy ladder for finding a field's human label:
66
+ // 1. <label for=id>
67
+ // 2. el.labels
68
+ // 3. aria-labelledby chain
69
+ // 4. parent's label/legend/h*
70
+ // 5. fallback to name / id
71
+ if (el.id) {
72
+ const lbl = document.querySelector(`label[for="${el.id}"]`);
73
+ if (lbl) return visibleText(lbl).replace(/\*$/, '').trim();
74
+ }
75
+ if (el.labels && el.labels[0]) return visibleText(el.labels[0]).replace(/\*$/, '').trim();
76
+ const al = el.getAttribute('aria-labelledby');
77
+ if (al) {
78
+ const t = al.split(/\s+/).map((id) => document.getElementById(id)).filter(Boolean).map(visibleText).join(' ').trim();
79
+ if (t) return t;
80
+ }
81
+ let p = el.parentElement; let hops = 0;
82
+ while (p && hops < 6) {
83
+ const lbl = p.querySelector('label, legend, h3, h4');
84
+ if (lbl && !lbl.contains(el)) {
85
+ const t = visibleText(lbl).replace(/\*$/, '').trim();
86
+ if (t && t.length < 200) return t;
87
+ }
88
+ p = p.parentElement;
89
+ hops += 1;
90
+ }
91
+ return el.getAttribute('aria-label') || el.placeholder || el.name || el.id || '(unknown)';
92
+ }
93
+
94
+ function bestSelector(el) {
95
+ const mmid = el.getAttribute('mmid');
96
+ if (mmid) return { mmid, selector: `[mmid="${mmid}"]` };
97
+ if (el.id) return { selector: `#${CSS.escape(el.id)}` };
98
+ if (el.name) return { selector: `[name="${CSS.escape(el.name)}"]` };
99
+ return { selector: '' };
100
+ }
101
+
102
+ // 1. Top-level error banner — used for the log + the NEEDS_ATTENTION reason.
103
+ let errorBanner = null;
104
+ const bannerCandidates = document.querySelectorAll('[role="alert"], .error-banner, [class*="error-message"], [class*="form-error"], [class*="errors"]');
105
+ for (const b of bannerCandidates) {
106
+ const t = visibleText(b);
107
+ if (!t) continue;
108
+ if (bp.some((r) => r.test(t)) || t.length > 30) {
109
+ errorBanner = t.slice(0, 300);
110
+ break;
111
+ }
112
+ }
113
+
114
+ // 2. Per-field flags. Three independent signals — union them:
115
+ // a. aria-invalid="true" set by the form's validator
116
+ // b. ancestor/sibling has the literal text "is required" near a field
117
+ // c. element matches a CSS error class
118
+ const invalidFields = [];
119
+ const seenMmids = new Set();
120
+
121
+ // CRITICAL: only flag fields that are ACTUALLY empty. Otherwise we
122
+ // get a cascade of false positives — Greenhouse marks every required
123
+ // field's LABEL red after a failed submit (visual highlight), and the
124
+ // text-pattern walk matches "First Name is required" near a perfectly-
125
+ // filled First Name field. That triggered the retry to re-touch fields
126
+ // that didn't need fixing, which is how Gender went from Male → Female
127
+ // and Ethnicity East Asian → South Asian.
128
+ function isEmptyField(el) {
129
+ const tag = el.tagName.toLowerCase();
130
+ const type = (el.type || '').toLowerCase();
131
+ // Checkboxes / radios: empty means !checked.
132
+ if (type === 'checkbox' || type === 'radio') return !el.checked;
133
+ // Native select: empty value or selected option is the placeholder.
134
+ if (tag === 'select') {
135
+ if (!el.value) return true;
136
+ const selOpt = el.options[el.selectedIndex];
137
+ // A select whose first option is "Select..." with empty value AND
138
+ // that option is selected → empty.
139
+ if (selOpt && !selOpt.value) return true;
140
+ return false;
141
+ }
142
+ // File inputs: empty means files.length === 0.
143
+ if (type === 'file') return !el.files || el.files.length === 0;
144
+ // contenteditable: empty when innerText is whitespace.
145
+ if (el.isContentEditable) return !(el.innerText || '').trim();
146
+ // Combobox role with no inner input (custom dropdown trigger): look
147
+ // for the displayed value via aria-activedescendant or innerText.
148
+ if (el.getAttribute('role') === 'combobox') {
149
+ const v = (el.value || el.innerText || '').trim();
150
+ // Empty AND no chip/pill rendered as a sibling means truly empty.
151
+ const wrap = el.closest('[class*="select"], [class*="combobox"]');
152
+ const chip = wrap?.querySelector('[class*="chip"], [class*="multi-value"], [class*="tag"]');
153
+ if (chip && (chip.innerText || '').trim()) return false;
154
+ return !v;
155
+ }
156
+ // Plain inputs / textareas
157
+ return !(el.value || '').trim();
158
+ }
159
+
160
+ function addField(el, errorText) {
161
+ if (!el) return;
162
+ // Skip fields that are already filled — they're not the cause of
163
+ // the form rejection even if their label happens to be near
164
+ // required-text styling.
165
+ if (!isEmptyField(el)) return;
166
+ const sel = bestSelector(el);
167
+ const key = sel.mmid || sel.selector || el.outerHTML.slice(0, 80);
168
+ if (seenMmids.has(key)) return;
169
+ seenMmids.add(key);
170
+ invalidFields.push({
171
+ label: nearestLabel(el).slice(0, 200),
172
+ mmid: sel.mmid || null,
173
+ selector: sel.selector || null,
174
+ errorText: (errorText || 'flagged').slice(0, 200),
175
+ });
176
+ }
177
+
178
+ // Signal A: aria-invalid="true" — the cleanest, most reliable. Filter
179
+ // to actual inputs (not random divs).
180
+ document.querySelectorAll('[aria-invalid="true"]').forEach((el) => {
181
+ const tag = el.tagName.toLowerCase();
182
+ if (!['input', 'textarea', 'select'].includes(tag) && !el.isContentEditable && el.getAttribute('role') !== 'combobox') return;
183
+ // Find the error text near this field (sibling or descendant of parent)
184
+ let errTxt = '';
185
+ const parent = el.closest('div, fieldset, label, section');
186
+ if (parent) {
187
+ const errEl = parent.querySelector('[class*="error"], [class*="invalid"], [role="alert"]');
188
+ if (errEl && !errEl.contains(el)) errTxt = visibleText(errEl);
189
+ }
190
+ addField(el, errTxt || 'aria-invalid');
191
+ });
192
+
193
+ // Signal B: text-pattern walk. Any element containing required-pattern
194
+ // text → find the nearest fillable sibling/ancestor input. This catches
195
+ // Greenhouse's `<div class="application-error">Phone is required</div>`.
196
+ const allText = Array.from(document.querySelectorAll('div, span, p, small, li, label'));
197
+ for (const el of allText) {
198
+ const t = visibleText(el);
199
+ if (!t || t.length > 200) continue;
200
+ if (!fp.some((r) => r.test(t))) continue;
201
+ // Don't double-count error containers we already matched
202
+ if (el.querySelector('[aria-invalid="true"]')) continue;
203
+ // Find the nearest input by walking up + querying down
204
+ let cursor = el; let found = null; let hops = 0;
205
+ while (cursor && hops < 5 && !found) {
206
+ const parent = cursor.parentElement;
207
+ if (!parent) break;
208
+ found = parent.querySelector('input:not([type=hidden]), textarea, select, [contenteditable="true"], [role="combobox"]');
209
+ cursor = parent; hops += 1;
210
+ }
211
+ if (found) addField(found, t);
212
+ }
213
+
214
+ // Signal C: CSS error classes on the input itself. Some ATSes mark
215
+ // the input directly rather than the wrapper.
216
+ document.querySelectorAll('input[class*="error"], input[class*="invalid"], select[class*="error"], textarea[class*="error"]').forEach((el) => {
217
+ addField(el, 'has-error-class');
218
+ });
219
+
220
+ return {
221
+ hasErrors: errorBanner !== null || invalidFields.length > 0,
222
+ errorBanner,
223
+ invalidFields,
224
+ };
225
+ }, {
226
+ fieldPatterns: FIELD_ERROR_PATTERNS.map((r) => ({ source: r.source, flags: r.flags })),
227
+ bannerPatterns: BANNER_PATTERNS.map((r) => ({ source: r.source, flags: r.flags })),
228
+ });
229
+ }
230
+
231
+ module.exports = { detectFormErrors };
package/orchestrator.js CHANGED
@@ -13,6 +13,7 @@ const path = require('path');
13
13
  const fs = require('fs');
14
14
  const { fillFields: legacyFillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
15
15
  const { smartFillPage } = require('./smartFill');
16
+ const { detectFormErrors } = require('./detectFormErrors');
16
17
 
17
18
  // Switchable filler — smart by default, can be killed via config.useSmartFill=false.
18
19
  // smartFill.js internally falls back to legacyFillFields if /smartfill/plan-fill
@@ -452,16 +453,91 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
452
453
  });
453
454
  await submitBtn.click();
454
455
 
455
- // Wait for confirmation page
456
+ // Wait for confirmation page OR for the form to render validation errors.
457
+ // Race the URL redirect against a stable-DOM wait — whichever resolves
458
+ // first tells us what happened. URL redirect → success path. Stable
459
+ // DOM (no redirect) → either validation error or in-page confirmation.
456
460
  try {
457
- await page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 15000 });
461
+ await Promise.race([
462
+ page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 12000 }),
463
+ waitForStableDOM(page, 4000),
464
+ ]);
458
465
  } catch {
459
- await page.waitForTimeout(3000); // fallback wait
466
+ await page.waitForTimeout(2000);
467
+ }
468
+
469
+ // ─────────────────────────────────────────────────────────────────────
470
+ // DOM-side error detection. The page IS the ground truth — Firecrawl
471
+ // is a second opinion at best, and unavailable at worst. If the page
472
+ // tells us right now that fields are invalid, retry IMMEDIATELY
473
+ // without waiting on Firecrawl.
474
+ //
475
+ // Bounded to ONE retry via ctx.submitRetryAttempted so we never
476
+ // infinite-loop on a truly stuck form.
477
+ // ─────────────────────────────────────────────────────────────────────
478
+ const errors = await detectFormErrors(page).catch(() => ({ hasErrors: false, errorBanner: null, invalidFields: [] }));
479
+ if (errors.hasErrors && !ctx.submitRetryAttempted) {
480
+ ctx.submitRetryAttempted = true;
481
+ const errFieldList = errors.invalidFields.map((f) => f.label).slice(0, 6).join(' / ') || '(banner only)';
482
+ console.warn(`[orchestrator] Form has ${errors.invalidFields.length} validation error(s): ${errFieldList}`);
483
+ if (errors.errorBanner) console.warn(`[orchestrator] banner: ${errors.errorBanner.slice(0, 200)}`);
484
+
485
+ await reportStatus('IN_PROGRESS', {
486
+ step: 'RETRY_FILL',
487
+ step_detail: `Form rejected: ${errors.errorBanner ? errors.errorBanner.slice(0, 80) : errFieldList.slice(0, 80)} — retrying`,
488
+ });
489
+
490
+ // Re-fill: smartFillPage will re-scan, so the planner sees the
491
+ // currently-invalid fields (aria-invalid="true" propagates into
492
+ // AX state, and the per-field error text becomes part of the
493
+ // description). Bounded retry.
494
+ try {
495
+ await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
496
+ await waitForStableDOM(page, 1500);
497
+ const retrySubmitBtn = await findSubmitButton(page);
498
+ if (retrySubmitBtn) {
499
+ console.log('[orchestrator] Retry: clicking submit again...');
500
+ await retrySubmitBtn.click();
501
+ try {
502
+ await Promise.race([
503
+ page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 10000 }),
504
+ waitForStableDOM(page, 3000),
505
+ ]);
506
+ } catch { await page.waitForTimeout(2000); }
507
+ } else {
508
+ console.warn('[orchestrator] Retry: submit button gone (form may have navigated).');
509
+ }
510
+ } catch (e) {
511
+ console.warn(`[orchestrator] Retry pass threw: ${e.message}`);
512
+ }
513
+ } else if (errors.hasErrors) {
514
+ // Already retried once; another error means the user has to step in.
515
+ console.warn(`[orchestrator] Form errors persist after retry: ${errors.invalidFields.length} field(s)`);
460
516
  }
461
517
 
462
518
  const confirmScreenshot = await page.screenshot({ type: 'jpeg', quality: 70 });
463
519
  let confirmKey = await uploadScreenshot(config, confirmScreenshot, `confirm_${queueId}.jpg`);
464
520
 
521
+ // After retry: if the page STILL has errors, short-circuit to
522
+ // NEEDS_ATTENTION without bothering Firecrawl. We know it's broken.
523
+ const errorsAfter = ctx.submitRetryAttempted
524
+ ? await detectFormErrors(page).catch(() => ({ hasErrors: false, invalidFields: [], errorBanner: null }))
525
+ : { hasErrors: false, invalidFields: [], errorBanner: null };
526
+ if (errorsAfter.hasErrors) {
527
+ const reason = errorsAfter.errorBanner
528
+ || `${errorsAfter.invalidFields.length} field(s) still invalid: ${errorsAfter.invalidFields.map((f) => f.label).slice(0, 4).join(', ')}`;
529
+ console.warn(`[orchestrator] Submission rejected after retry: ${reason}`);
530
+ await reportStatus('NEEDS_ATTENTION', {
531
+ review_screenshot_r2_key: confirmKey || null,
532
+ needs_attention_reason: `Submit rejected by form: ${reason}`,
533
+ intervention_type: 'submit_failed',
534
+ step: 'VERIFY',
535
+ step_detail: reason.slice(0, 200),
536
+ fields_filled: cumulativeFilled,
537
+ });
538
+ throw new Error(`Submission rejected: ${reason}`);
539
+ }
540
+
465
541
  // Verify-then-DONE: trusting waitForURL alone was wrong (the Chalk bug —
466
542
  // Ashby rendered "Missing entry for required field: Name, Email, ..."
467
543
  // inline without a URL change, and we marked DONE on a failed submit).
@@ -491,68 +567,17 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
491
567
  // screenshot — never silently mark DONE on
492
568
  // unverified submits (that was the Chalk bug).
493
569
  if (verdict.submitted === false) {
494
- const reason = verdict.error_message || 'Submission did not confirm form may still have errors';
495
- console.warn(`[orchestrator] Submission NOT verified. Reason: ${reason}`);
496
-
497
- // Fill-validate-retry: when the page shows inline validation errors
498
- // (red banners + highlighted fields), the right move is to rescan +
499
- // re-plan + fix + re-submit ONCE. Most validation failures are
500
- // recoverable: a missed required checkbox, an empty optional field
501
- // that turned out to be required, an LLM picked a too-creative answer.
502
- // Bounded to ONE retry to prevent infinite loops on truly stuck forms.
503
- const alreadyRetried = !!ctx.submitRetryAttempted;
504
- if (!alreadyRetried) {
505
- console.log('[orchestrator] Attempting fill-validate-retry: re-scanning page for highlighted errors...');
506
- ctx.submitRetryAttempted = true;
507
- await reportStatus('IN_PROGRESS', {
508
- step: 'RETRY_FILL',
509
- step_detail: `ATS rejected: ${reason.slice(0, 100)} — fixing & retrying`,
510
- });
511
- // Re-fill. The new scan will pick up red-highlighted required fields
512
- // (their AX 'invalid' or 'required' state will tell the planner to
513
- // re-attempt them). Already-filled correct fields stay put.
514
- try {
515
- await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
516
- await waitForStableDOM(page, 1500);
517
- const retrySubmitBtn = await findSubmitButton(page);
518
- if (retrySubmitBtn) {
519
- console.log('[orchestrator] Retry: clicking submit again...');
520
- await retrySubmitBtn.click();
521
- try { await page.waitForURL(/thank|confirm|success|applied|submitted/i, { timeout: 12000 }); } catch { await page.waitForTimeout(2500); }
522
- // Re-verify
523
- const retryUrl = page.url();
524
- const retryShot = await page.screenshot({ type: 'jpeg', quality: 70 }).catch(() => null);
525
- const retryShotKey = retryShot ? await uploadScreenshot(config, retryShot, `confirm_retry_${queueId}.jpg`) : confirmKey;
526
- let retryVerdict = { submitted: null, error_message: null, source: 'unavailable' };
527
- try {
528
- const rRes = await fetch(`${config.apiUrl}/agent/verify-submit`, {
529
- method: 'POST',
530
- headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
531
- body: JSON.stringify({ queue_id: queueId, page_url: retryUrl }),
532
- });
533
- if (rRes.ok) retryVerdict = await rRes.json();
534
- } catch {}
535
- if (retryVerdict.submitted === true) {
536
- await reportStatus('DONE', { confirmation_screenshot_r2_key: retryShotKey || null, fields_filled: cumulativeFilled });
537
- await clearCheckpoint(config, queueId);
538
- console.log(`[orchestrator] Done (retry-after-validation-error): ${queueItem.company} - ${queueItem.title}`);
539
- return;
540
- }
541
- // Retry verifier also unsure — fall through to NEEDS_ATTENTION with both screenshots
542
- console.warn(`[orchestrator] Retry verdict: ${retryVerdict.submitted}; source=${retryVerdict.source}`);
543
- confirmKey = retryShotKey || confirmKey;
544
- } else {
545
- console.warn('[orchestrator] Retry: no submit button visible after re-fill — page may have navigated.');
546
- }
547
- } catch (e) {
548
- console.warn(`[orchestrator] Retry pass threw: ${e.message}`);
549
- }
550
- }
551
-
552
- // No retry available (or retry didn't succeed) — surface for human.
570
+ // The DOM-error pass above already retried + short-circuited if errors
571
+ // remained. If we got here AND Firecrawl is saying false, the DOM was
572
+ // clean (no aria-invalid / no banner / no "is required" text) but
573
+ // Firecrawl still detected something wrong probably a thank-you
574
+ // page that includes some apologetic text the LLM misread. Surface
575
+ // it gently rather than throwing; user can audit the screenshot.
576
+ const reason = verdict.error_message || 'Firecrawl could not confirm submission';
577
+ console.warn(`[orchestrator] Firecrawl says NOT submitted (DOM looked clean): ${reason}`);
553
578
  await reportStatus('NEEDS_ATTENTION', {
554
579
  review_screenshot_r2_key: confirmKey || null,
555
- needs_attention_reason: `Submit clicked but ATS rejected it: ${reason}`,
580
+ needs_attention_reason: `Verifier flagged this submit: ${reason}. Page DOM looked clean — please eyeball.`,
556
581
  intervention_type: 'submit_failed',
557
582
  step: 'VERIFY',
558
583
  step_detail: reason.slice(0, 200),
@@ -561,44 +586,44 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
561
586
  throw new Error(`Submission failed verification: ${reason}`);
562
587
  }
563
588
 
589
+ // Decide final state (DONE / REVIEWING) based on verifier verdict +
590
+ // auto-submit, but DO NOT return — the audit-trail postFillSession
591
+ // call at the bottom of this block runs for every terminal state so
592
+ // the user's receipt detail always has resume PDF + cover letter PDF +
593
+ // per-field decisions.
594
+ let finalState = 'DONE';
564
595
  if (verdict.submitted === null) {
565
596
  const autoSubmit = config.autoSubmit || aep.agent_config?.auto_submit;
566
597
  if (autoSubmit) {
567
- // Auto-submit ON + verifier unavailable: trust the click AND make
568
- // the screenshot the audit trail. The receipt detail surfaces this
569
- // screenshot prominently — if it shows a red banner, the user
570
- // clicks "Not submitted" on the receipt and we re-queue.
571
- // This is the user-chosen policy: false-positives surface visually,
572
- // not as a blocked REVIEWING row. Faster loop, audit by eyeball.
598
+ // Auto-submit ON + verifier unavailable: trust the click; the
599
+ // screenshot becomes the audit trail.
573
600
  console.log(`[orchestrator] Verifier unavailable (source: ${verdict.source}); auto-submit ON — trusting click, screenshot is the receipt.`);
574
- await reportStatus('DONE', {
575
- confirmation_screenshot_r2_key: confirmKey || null,
576
- fields_filled: cumulativeFilled,
577
- step: 'DONE',
578
- step_detail: 'Submitted (verifier unavailable, trust-on-click)',
579
- });
580
- await clearCheckpoint(config, queueId);
581
- return;
601
+ } else {
602
+ // No auto-submit → REVIEWING so the user eyeballs first.
603
+ console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). REVIEWING — please eyeball the screenshot + click Submit.`);
604
+ finalState = 'REVIEWING';
582
605
  }
583
- // No auto-submit → bounce to REVIEWING so the user eyeballs first.
584
- console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). REVIEWING — please eyeball the screenshot + click Submit.`);
606
+ }
607
+
608
+ if (finalState === 'REVIEWING') {
585
609
  await reportStatus('REVIEWING', {
586
610
  review_screenshot_r2_key: confirmKey || null,
587
611
  step: 'REVIEWING',
588
612
  step_detail: `Submit clicked at ${verdictUrl.slice(0, 100)} — verifier unavailable, please confirm`,
589
613
  fields_filled: cumulativeFilled,
590
614
  });
591
- return;
615
+ } else {
616
+ await reportStatus('DONE', {
617
+ confirmation_screenshot_r2_key: confirmKey || null,
618
+ fields_filled: cumulativeFilled,
619
+ });
620
+ await clearCheckpoint(config, queueId);
621
+ const verifiedTag = verdict.source === 'firecrawl' ? 'firecrawl-verified'
622
+ : verdict.source === 'url_pattern' ? 'url-pattern-verified'
623
+ : 'unverified-but-auto-submit';
624
+ console.log(`[orchestrator] Done (${verifiedTag}): ${queueItem.company} - ${queueItem.title}`);
592
625
  }
593
626
 
594
- await reportStatus('DONE', {
595
- confirmation_screenshot_r2_key: confirmKey || null,
596
- fields_filled: cumulativeFilled,
597
- });
598
- await clearCheckpoint(config, queueId);
599
-
600
- console.log(`[orchestrator] Done (verified): ${queueItem.company} - ${queueItem.title} · firecrawl-verified`);
601
-
602
627
  // Post fill session data to backend for learning loop + receipt audit
603
628
  // trail. filled_actions becomes the per-field decision list the user
604
629
  // sees on Applications detail — "agent typed X for First Name because
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "halo-agent",
3
- "version": "2.0.3",
3
+ "version": "2.0.5",
4
4
  "description": "HALO local apply agent — auto-fills job applications using your real Chrome session",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -24,6 +24,7 @@
24
24
  "scanPage.js",
25
25
  "scanAccessibility.js",
26
26
  "smartFill.js",
27
+ "detectFormErrors.js",
27
28
  "captcha.js",
28
29
  "vision.js",
29
30
  "manusAutomate.js",
@@ -429,6 +429,20 @@ async function scanAccessibility(page) {
429
429
  const label = pickLabel(f);
430
430
  // Filter noise: no label AND no visible role → skip
431
431
  if (!label && !['button', 'link'].includes(normalizeRole(f))) continue;
432
+ // Filter out the "Remove X" chip-clear buttons that React-Select
433
+ // renders next to selected items in multi-select dropdowns. The
434
+ // planner was treating these as separate fields and "filling" them
435
+ // (clicking them, which DELETED the correct answer) on the retry
436
+ // pass — that's how Gender flipped Male → Female on retry. Same for
437
+ // the X button that clears a single-select's value.
438
+ if (normalizeRole(f) === 'button' && (
439
+ /^remove\s+\S/i.test(label) ||
440
+ /^clear\s+selection/i.test(label) ||
441
+ /^remove\s+item/i.test(label) ||
442
+ label === '×' || label === '✕' || label === 'x'
443
+ )) {
444
+ continue;
445
+ }
432
446
  out.push({
433
447
  mmid: f.mmid,
434
448
  role: normalizeRole(f),
package/smartFill.js CHANGED
@@ -196,11 +196,145 @@ async function executePlanItem(page, item, fieldByMmid, ctx) {
196
196
  * Falls back to LLM synonym matching when local exact/substring
197
197
  * matching fails, so "Straight" still picks "Heterosexual."
198
198
  */
199
+ /**
200
+ * Special-case intl-tel-input country picker. The Country field on
201
+ * Greenhouse forms isn't a normal dropdown — it's an <input type=tel>
202
+ * with a sibling .iti__selected-flag button. Clicking the input does
203
+ * NOTHING; you have to click the flag to open the list, then click the
204
+ * matching .iti__country list item.
205
+ *
206
+ * Returns null if this isn't an intl-tel-input field; otherwise the
207
+ * fill result.
208
+ */
209
+ async function tryIntlTelCountry(page, triggerLocator, value) {
210
+ try {
211
+ // Detect intl-tel-input by looking for ANY characteristic class within
212
+ // a reasonable ancestor radius. Newer versions (v18+) shifted from
213
+ // .iti to .iti--allow-dropdown / .iti-mobile, plus they may wrap
214
+ // in .country-select instead. Scan up to 6 ancestors.
215
+ const probe = await triggerLocator.evaluate((el) => {
216
+ let p = el; let hops = 0;
217
+ while (p && hops < 6) {
218
+ const cls = String(p.className || '');
219
+ if (/\biti\b|\biti--|country-select|country-picker/i.test(cls)) {
220
+ // Found the wrapper. Find the flag/trigger inside.
221
+ const flag = p.querySelector(
222
+ '.iti__selected-flag, .iti__country-container, ' +
223
+ '[class*="selected-flag"], [class*="country-button"], ' +
224
+ '[aria-haspopup="listbox"][class*="iti"], ' +
225
+ 'button[class*="country"]'
226
+ );
227
+ return { found: true, wrapperOuterClass: cls.slice(0, 200), hasFlag: !!flag };
228
+ }
229
+ p = p.parentElement; hops += 1;
230
+ }
231
+ return { found: false };
232
+ }).catch(() => ({ found: false }));
233
+ if (!probe.found) return null;
234
+
235
+ // Click the flag/trigger element to open the country list.
236
+ const flag = await triggerLocator.evaluateHandle((el) => {
237
+ let p = el; let hops = 0;
238
+ while (p && hops < 6) {
239
+ const cls = String(p.className || '');
240
+ if (/\biti\b|\biti--|country-select|country-picker/i.test(cls)) {
241
+ return p.querySelector(
242
+ '.iti__selected-flag, .iti__country-container, ' +
243
+ '[class*="selected-flag"], [class*="country-button"], ' +
244
+ '[aria-haspopup="listbox"][class*="iti"], ' +
245
+ 'button[class*="country"]'
246
+ ) || null;
247
+ }
248
+ p = p.parentElement; hops += 1;
249
+ }
250
+ return null;
251
+ });
252
+ if (!flag || !(await flag.evaluate((n) => !!n).catch(() => false))) {
253
+ return { ok: false, reason: `intl-tel wrapper found (${probe.wrapperOuterClass}) but no flag button inside` };
254
+ }
255
+ await flag.click({ timeout: 1500 }).catch(async () => {
256
+ await flag.click({ force: true, timeout: 1500 }).catch(() => {});
257
+ });
258
+ await page.waitForTimeout(500);
259
+
260
+ // Country items — try several known class patterns
261
+ const itemSel = '.iti__country-list:visible .iti__country, .iti__dropdown-content .iti__country, [class*="country-list"] [class*="country-item"], [role="listbox"] [role="option"][class*="iti"]';
262
+ const items = page.locator(itemSel);
263
+ const count = await items.count().catch(() => 0);
264
+ if (count === 0) {
265
+ await page.keyboard.press('Escape').catch(() => {});
266
+ return { ok: false, reason: `intl-tel country list did not open (wrapper=${probe.wrapperOuterClass})` };
267
+ }
268
+ const v = String(value).toLowerCase().replace(/\s*\+\d+\s*$/, '').trim();
269
+ const names = await items.evaluateAll((nodes) => nodes.map((n) => {
270
+ const nm = n.querySelector('.iti__country-name, [class*="country-name"]')?.textContent?.trim()
271
+ || (n.getAttribute('data-country-name') || '').trim()
272
+ || (n.textContent || '').replace(/\+\d+/, '').trim();
273
+ const dial = n.querySelector('.iti__dial-code, [class*="dial-code"]')?.textContent?.trim()
274
+ || (n.getAttribute('data-dial-code') ? `+${n.getAttribute('data-dial-code')}` : '');
275
+ return { name: nm, dial };
276
+ })).catch(() => []);
277
+ let idx = names.findIndex((n) => n.name.toLowerCase() === v);
278
+ if (idx === -1) idx = names.findIndex((n) => n.name.toLowerCase().includes(v) || v.includes(n.name.toLowerCase()));
279
+ if (idx === -1) {
280
+ const dialMatch = String(value).match(/\+(\d+)/);
281
+ if (dialMatch) idx = names.findIndex((n) => n.dial === `+${dialMatch[1]}`);
282
+ }
283
+ if (idx === -1) {
284
+ await page.keyboard.press('Escape').catch(() => {});
285
+ return { ok: false, reason: `no intl-tel country matched "${value}" (${count} items)` };
286
+ }
287
+ await items.nth(idx).click({ timeout: 1500 }).catch(async () => {
288
+ await items.nth(idx).click({ force: true, timeout: 1500 });
289
+ });
290
+ return { ok: true, reason: `intl-tel picked: ${names[idx].name} ${names[idx].dial}` };
291
+ } catch (e) {
292
+ return { ok: false, reason: `intl-tel handler threw: ${e.message}` };
293
+ }
294
+ }
295
+
199
296
  async function openAndPickOption(page, triggerLocator, value, llmCtx) {
297
+ try {
298
+ // First: intl-tel-input special case. The Country dial-code picker on
299
+ // Greenhouse is a non-standard widget that ignores clicks to its
300
+ // input — has to click the flag button. tryIntlTelCountry returns
301
+ // null when the field isn't intl-tel, so other dropdowns continue.
302
+ const itlResult = await tryIntlTelCountry(page, triggerLocator, value);
303
+ if (itlResult !== null) return itlResult;
304
+ } catch {}
305
+
306
+ // Second: Google Places typeahead. The Greenhouse Location field is
307
+ // marked role=combobox in AX, so the planner sends click_option — but
308
+ // it's really a type-then-pick autocomplete that opens a .pac-container
309
+ // mounted at document.body (NOT inside the field's wrapper). Detect
310
+ // by checking for adjacent "Locate me" UI or for an existing pac-container.
311
+ try {
312
+ const isPac = await triggerLocator.evaluate((el) => {
313
+ // Heuristic 1: a "Locate me" link/button nearby (Greenhouse-specific)
314
+ const ancestor = el.closest('div, section, fieldset') || el.parentElement;
315
+ const hasLocateMe = ancestor && Array.from(ancestor.querySelectorAll('a, button')).some((b) => /locate\s*me/i.test(b.textContent || ''));
316
+ // Heuristic 2: a pac-container is mounted somewhere on the page
317
+ const hasPacContainer = !!document.querySelector('.pac-container, .pac-target-input');
318
+ // Heuristic 3: the input itself is a pac-target-input (Google sets this when initialized)
319
+ const isPacInput = el.classList?.contains('pac-target-input');
320
+ return hasLocateMe || hasPacContainer || isPacInput;
321
+ }).catch(() => false);
322
+ if (isPac) {
323
+ const r = await typeAndPickSuggestion(page, triggerLocator, value);
324
+ // If typeahead failed, fall through to the normal dropdown path
325
+ // (some fields are both, weirdly)
326
+ if (r.ok) return r;
327
+ }
328
+ } catch {}
329
+
200
330
  try {
201
331
  // Snapshot option-list state BEFORE opening so we can identify
202
- // the new options.
203
- const optionSel = '[role="option"], [role="menuitem"], .select__option, li[class*="option"]';
332
+ // the new options. Includes:
333
+ // .pac-item — Google Places (Greenhouse Location)
334
+ // .iti__country — intl-tel-input (Greenhouse Country dial-code)
335
+ // .select__option — React-Select
336
+ // role=option — ARIA-correct dropdowns
337
+ const optionSel = '[role="option"], [role="menuitem"], .select__option, li[class*="option"], .pac-item, .iti__country';
204
338
  const beforeCount = await page.locator(optionSel).count().catch(() => 0);
205
339
 
206
340
  await triggerLocator.click({ timeout: 2500 });
@@ -333,12 +467,35 @@ async function typeAndPickSuggestion(page, locator, value) {
333
467
  await locator.press('Delete').catch(() => {});
334
468
  const firstChunk = String(value).split(/[,;]/)[0].trim();
335
469
  await page.keyboard.type(firstChunk, { delay: 60 });
336
- await page.waitForTimeout(700);
337
- const optionSel = '[role="option"], [role="listbox"] li, .select__option, ul[class*="autocomplete"] li';
470
+ // Wait for suggestions to render. Google Places PAC takes longer than
471
+ // most (~800ms-1.2s), so wait a bit more aggressively before giving up.
472
+ await page.waitForTimeout(900);
473
+ // Selector ladder — Google Places (.pac-item) uses no role attribute,
474
+ // so we ADD it to the union. Without this, Greenhouse Location always
475
+ // failed because its dropdown is .pac-container > .pac-item.
476
+ const optionSel = [
477
+ '.pac-item', // Google Places (Greenhouse Location)
478
+ '[role="option"]', // ARIA-correct dropdowns
479
+ '[role="listbox"] li', // older listbox conventions
480
+ '[role="listbox"] [role="option"]', // nested
481
+ '.select__option', // React-Select
482
+ 'ul[class*="autocomplete"] li', // generic autocomplete
483
+ 'ul[class*="suggestion"] li',
484
+ 'div[class*="suggestion"]',
485
+ ].join(', ');
486
+ // Wait for ANY option to actually appear (some libs lazy-render). Up
487
+ // to 1.5s additional. waitFor errors if nothing appears — that's the
488
+ // signal that the field accepted free text instead.
489
+ try {
490
+ await page.locator(optionSel).first().waitFor({ state: 'visible', timeout: 1500 });
491
+ } catch {
492
+ const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
493
+ if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
494
+ return { ok: false, reason: 'typeahead opened no suggestions' };
495
+ }
338
496
  const opts = page.locator(optionSel);
339
497
  const count = await opts.count().catch(() => 0);
340
498
  if (count === 0) {
341
- // Some fields accept the typed value directly. Verify.
342
499
  const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
343
500
  if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
344
501
  return { ok: false, reason: 'typeahead opened no suggestions' };
@@ -348,7 +505,10 @@ async function typeAndPickSuggestion(page, locator, value) {
348
505
  let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
349
506
  if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().trim().startsWith(v));
350
507
  if (idx === -1) idx = 0;
351
- await opts.nth(idx).click({ timeout: 2000 });
508
+ // Google Places PAC items have a quirky click handler that doesn't fire
509
+ // on Playwright's normal click — use force-click as fallback.
510
+ try { await opts.nth(idx).click({ timeout: 2000 }); }
511
+ catch { await opts.nth(idx).click({ force: true, timeout: 2000 }); }
352
512
  await page.waitForTimeout(200);
353
513
  return { ok: true, reason: `typeahead picked: ${texts[idx]}` };
354
514
  } catch (e) {