halo-agent 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/filler.js +25 -5
- package/orchestrator.js +56 -1
- package/package.json +1 -1
- package/scanPage.js +40 -7
package/filler.js
CHANGED
|
@@ -637,6 +637,8 @@ async function fillFields(page, aep, options = {}) {
|
|
|
637
637
|
|
|
638
638
|
// ── Pass 3: fill each field ───────────────────────────────────────────────
|
|
639
639
|
for (const { field, value, source } of resolved) {
|
|
640
|
+
const labelShort = (field.label || field.selector || '?').slice(0, 50);
|
|
641
|
+
|
|
640
642
|
// Skip if already answered on a previous page
|
|
641
643
|
const trackKey = field.label || field.selector;
|
|
642
644
|
if (ctx && trackKey && ctx.answeredFields.has(trackKey)) {
|
|
@@ -660,8 +662,15 @@ async function fillFields(page, aep, options = {}) {
|
|
|
660
662
|
continue;
|
|
661
663
|
}
|
|
662
664
|
|
|
663
|
-
// No value for a profile field — missing from profile, skip
|
|
664
|
-
|
|
665
|
+
// No value for a profile field — missing from profile, skip.
|
|
666
|
+
// Log this loudly: it's almost always the actual cause of "the agent
|
|
667
|
+
// didn't fill anything." profile_fill missing a value the user definitely
|
|
668
|
+
// entered means the AEP builder isn't sending it from strategic_profile.
|
|
669
|
+
if (!value) {
|
|
670
|
+
console.warn(`[filler] no-value ${source}: "${labelShort}" (category=${field.category}) — value missing from AEP`);
|
|
671
|
+
skipped++;
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
665
674
|
|
|
666
675
|
// Locate the element — use the scanner's selector first, then fall back to semantic finder
|
|
667
676
|
let locator = null;
|
|
@@ -690,7 +699,10 @@ async function fillFields(page, aep, options = {}) {
|
|
|
690
699
|
locator = await semanticFindField(page, { field_id: field.id || field.name, label: field.label }).catch(() => null);
|
|
691
700
|
}
|
|
692
701
|
|
|
693
|
-
if (!locator) {
|
|
702
|
+
if (!locator) {
|
|
703
|
+
console.warn(`[filler] no-locator: "${labelShort}" (selector="${(field.selector || '').slice(0,60)}") — element not visible / selector dead`);
|
|
704
|
+
skipped++; continue;
|
|
705
|
+
}
|
|
694
706
|
|
|
695
707
|
// Special case: cover letter — type character by character
|
|
696
708
|
if (field.category === 'cover_letter' && field.tag === 'textarea') {
|
|
@@ -739,11 +751,19 @@ async function fillFields(page, aep, options = {}) {
|
|
|
739
751
|
try {
|
|
740
752
|
const ok = await fillLocator(page, locator, value, field.label);
|
|
741
753
|
if (ok) {
|
|
754
|
+
const valShort = String(value).slice(0, 40).replace(/\n/g, ' ');
|
|
755
|
+
console.log(`[filler] filled (${source}): "${labelShort}" = "${valShort}${String(value).length > 40 ? '...' : ''}"`);
|
|
742
756
|
filled++;
|
|
743
757
|
if (ctx && trackKey) ctx.answeredFields.set(trackKey, { value, pageIndex: ctx.currentPageIndex, source });
|
|
744
758
|
await delay();
|
|
745
|
-
} else {
|
|
746
|
-
|
|
759
|
+
} else {
|
|
760
|
+
console.warn(`[filler] fill-failed: "${labelShort}" — fillLocator returned false`);
|
|
761
|
+
failed++;
|
|
762
|
+
}
|
|
763
|
+
} catch (e) {
|
|
764
|
+
console.warn(`[filler] fill-threw: "${labelShort}" — ${e.message}`);
|
|
765
|
+
failed++;
|
|
766
|
+
}
|
|
747
767
|
}
|
|
748
768
|
|
|
749
769
|
// ── Legacy fallback: also run old profile map + semantic matcher ──────────
|
package/orchestrator.js
CHANGED
|
@@ -381,13 +381,68 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
|
|
|
381
381
|
const confirmScreenshot = await page.screenshot({ type: 'jpeg', quality: 70 });
|
|
382
382
|
const confirmKey = await uploadScreenshot(config, confirmScreenshot, `confirm_${queueId}.jpg`);
|
|
383
383
|
|
|
384
|
+
// Verify-then-DONE: trusting waitForURL alone was wrong (the Chalk bug —
|
|
385
|
+
// Ashby rendered "Missing entry for required field: Name, Email, ..."
|
|
386
|
+
// inline without a URL change, and we marked DONE on a failed submit).
|
|
387
|
+
// The backend re-fetches the page through Firecrawl + extract and tells
|
|
388
|
+
// us if the submit actually went through. On Firecrawl failure or no
|
|
389
|
+
// key, the endpoint returns submitted:true so we don't deadlock — we're
|
|
390
|
+
// strictly more correct than before, not less.
|
|
391
|
+
const verdictUrl = page.url();
|
|
392
|
+
console.log(`[orchestrator] Verifying submission at ${verdictUrl}...`);
|
|
393
|
+
let verdict = { submitted: true, error_message: null, confirmation_text: null, source: 'unavailable' };
|
|
394
|
+
try {
|
|
395
|
+
const vRes = await fetch(`${config.apiUrl}/agent/verify-submit`, {
|
|
396
|
+
method: 'POST',
|
|
397
|
+
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
|
|
398
|
+
body: JSON.stringify({ queue_id: queueId, page_url: verdictUrl }),
|
|
399
|
+
});
|
|
400
|
+
if (vRes.ok) verdict = await vRes.json();
|
|
401
|
+
} catch (e) {
|
|
402
|
+
console.warn(`[orchestrator] verify-submit unavailable: ${e.message}`);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Three-way verdict:
|
|
406
|
+
// submitted: true → real success, mark DONE
|
|
407
|
+
// submitted: false → real failure, NEEDS_ATTENTION with the error
|
|
408
|
+
// submitted: null → could not verify (Firecrawl down/missing).
|
|
409
|
+
// Bounce to REVIEWING so the user eyeballs the
|
|
410
|
+
// screenshot — never silently mark DONE on
|
|
411
|
+
// unverified submits (that was the Chalk bug).
|
|
412
|
+
if (verdict.submitted === false) {
|
|
413
|
+
const reason = verdict.error_message || 'Submission did not confirm — form may still have errors';
|
|
414
|
+
console.warn(`[orchestrator] Submission NOT verified. Reason: ${reason}`);
|
|
415
|
+
await reportStatus('NEEDS_ATTENTION', {
|
|
416
|
+
review_screenshot_r2_key: confirmKey || null,
|
|
417
|
+
needs_attention_reason: `Submit clicked but Ashby/ATS rejected it: ${reason}`,
|
|
418
|
+
intervention_type: 'submit_failed',
|
|
419
|
+
step: 'VERIFY',
|
|
420
|
+
step_detail: reason.slice(0, 200),
|
|
421
|
+
fields_filled: cumulativeFilled,
|
|
422
|
+
});
|
|
423
|
+
throw new Error(`Submission failed verification: ${reason}`);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (verdict.submitted === null) {
|
|
427
|
+
console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). Sending to REVIEWING for your eyeball.`);
|
|
428
|
+
await reportStatus('REVIEWING', {
|
|
429
|
+
review_screenshot_r2_key: confirmKey || null,
|
|
430
|
+
step: 'REVIEWING',
|
|
431
|
+
step_detail: 'Could not auto-verify — please confirm the submit',
|
|
432
|
+
fields_filled: cumulativeFilled,
|
|
433
|
+
});
|
|
434
|
+
// Stop here; user clicks Submit on dashboard → /apply-queue/submit/:id
|
|
435
|
+
// will flip to DONE. Don't return — let the function return naturally.
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
|
|
384
439
|
await reportStatus('DONE', {
|
|
385
440
|
confirmation_screenshot_r2_key: confirmKey || null,
|
|
386
441
|
fields_filled: cumulativeFilled,
|
|
387
442
|
});
|
|
388
443
|
await clearCheckpoint(config, queueId);
|
|
389
444
|
|
|
390
|
-
console.log(`[orchestrator] Done: ${queueItem.company} - ${queueItem.title}`);
|
|
445
|
+
console.log(`[orchestrator] Done (verified): ${queueItem.company} - ${queueItem.title} · firecrawl-verified`);
|
|
391
446
|
|
|
392
447
|
// Post fill session data to backend for learning loop
|
|
393
448
|
await postFillSession(config, {
|
package/package.json
CHANGED
package/scanPage.js
CHANGED
|
@@ -207,8 +207,27 @@ const CONSENT_PATTERNS = [
|
|
|
207
207
|
/privacy\s*policy/i,
|
|
208
208
|
];
|
|
209
209
|
|
|
210
|
+
// Normalize a label so trailing decorations don't break strict regex matches.
|
|
211
|
+
// Ashby/Greenhouse often render labels like "Name *", "Email (required)",
|
|
212
|
+
// "LinkedIn Profile — Required", "Phone *Required". Without this, the
|
|
213
|
+
// PROFILE_PATTERNS (which use ^anchored regexes for short fields like name)
|
|
214
|
+
// silently miss and the field falls through to 'custom', which means the
|
|
215
|
+
// agent skips filling it with profile data.
|
|
216
|
+
function normalizeLabel(raw) {
|
|
217
|
+
if (!raw) return '';
|
|
218
|
+
return raw
|
|
219
|
+
.toLowerCase()
|
|
220
|
+
.replace(/[*†‡]/g, ' ') // markers
|
|
221
|
+
.replace(/\((required|optional|mandatory)\)/g, ' ') // "(required)"
|
|
222
|
+
.replace(/\b(required|optional|mandatory)\b/g, ' ') // "required"
|
|
223
|
+
.replace(/[—–-]+\s*(required|optional)\s*$/g, ' ') // "— required"
|
|
224
|
+
.replace(/[:?]+$/g, '') // trailing : ?
|
|
225
|
+
.replace(/\s+/g, ' ')
|
|
226
|
+
.trim();
|
|
227
|
+
}
|
|
228
|
+
|
|
210
229
|
function classifyField(field) {
|
|
211
|
-
const label = field.label
|
|
230
|
+
const label = normalizeLabel(field.label);
|
|
212
231
|
|
|
213
232
|
// Consent checkboxes
|
|
214
233
|
if (field.inputType === 'checkbox' && CONSENT_PATTERNS.some(r => r.test(label))) {
|
|
@@ -222,7 +241,7 @@ function classifyField(field) {
|
|
|
222
241
|
if (regex.test(label)) return 'profile:' + name;
|
|
223
242
|
}
|
|
224
243
|
|
|
225
|
-
// Ashby system fields by name attribute
|
|
244
|
+
// Ashby system fields by name attribute (legacy form schema)
|
|
226
245
|
if (field.name && field.name.startsWith('_systemfield_')) {
|
|
227
246
|
const sfName = field.name.replace('_systemfield_', '');
|
|
228
247
|
if (['name', 'email', 'phone', 'resume', 'linkedin', 'website'].includes(sfName)) {
|
|
@@ -320,14 +339,22 @@ async function scanAshby(page) {
|
|
|
320
339
|
if (f.name && f.name.startsWith('_systemfield_')) {
|
|
321
340
|
const sfField = f.name.replace('_systemfield_', '');
|
|
322
341
|
const profileMap = { name: 'full_name', email: 'email', phone: 'phone', linkedin: 'linkedin', website: 'portfolio', resume: null };
|
|
323
|
-
if (sfField === 'resume'
|
|
342
|
+
if (sfField === 'resume') return { ...f, category: 'file:resume' };
|
|
324
343
|
if (profileMap[sfField] !== undefined) return { ...f, category: 'profile:' + profileMap[sfField] };
|
|
325
344
|
}
|
|
326
|
-
//
|
|
327
|
-
|
|
328
|
-
|
|
345
|
+
// File inputs: distinguish resume vs cover-letter vs other by the visible
|
|
346
|
+
// label. Without this, Ashby's separate cover-letter file input was being
|
|
347
|
+
// shadowed by 'file:resume' and uploadResume() only handles one resume.
|
|
348
|
+
if (f.inputType === 'file') {
|
|
349
|
+
const lbl = (f.label || '').toLowerCase();
|
|
350
|
+
if (/cover\s*letter/.test(lbl)) return { ...f, category: 'file:cover_letter' };
|
|
351
|
+
return { ...f, category: 'file:resume' };
|
|
329
352
|
}
|
|
330
|
-
|
|
353
|
+
// Modern Ashby uses UUID names for ALL fields (Name, Email, Phone, custom).
|
|
354
|
+
// Classify by LABEL first — that's the only signal that distinguishes
|
|
355
|
+
// a profile field from a custom question when the name attr is opaque.
|
|
356
|
+
// Don't shortcut UUID-named fields to 'custom' — that's exactly the
|
|
357
|
+
// Chalk bug where Name/Email/Phone/LinkedIn all fell through unfilled.
|
|
331
358
|
return { ...f, category: classifyField(f) };
|
|
332
359
|
}).filter(f => f.category !== 'ignore');
|
|
333
360
|
}
|
|
@@ -505,6 +532,12 @@ async function scanPage(page, ats) {
|
|
|
505
532
|
});
|
|
506
533
|
|
|
507
534
|
console.log(`[scanPage] ${platform}: found ${out.length} fields (${out.filter(f => f.category.startsWith('profile')).length} profile, ${out.filter(f => f.category === 'custom').length} custom, ${out.filter(f => f.category === 'eeo').length} eeo)`);
|
|
535
|
+
// Per-field breakdown so we can SEE why a "Name" field ended up classified
|
|
536
|
+
// as 'custom' instead of 'profile:full_name' — without this, every fill
|
|
537
|
+
// failure is a guessing game.
|
|
538
|
+
for (const f of out) {
|
|
539
|
+
console.log(`[scanPage] - ${f.category.padEnd(22)} | ${(f.label || '(no label)').slice(0, 60)} | ${f.tag}/${f.inputType}`);
|
|
540
|
+
}
|
|
508
541
|
return out;
|
|
509
542
|
}
|
|
510
543
|
|