halo-agent 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/orchestrator.js +39 -1
- package/package.json +1 -1
- package/scanPage.js +26 -6
package/orchestrator.js
CHANGED
|
@@ -381,13 +381,51 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
|
|
|
381
381
|
const confirmScreenshot = await page.screenshot({ type: 'jpeg', quality: 70 });
|
|
382
382
|
const confirmKey = await uploadScreenshot(config, confirmScreenshot, `confirm_${queueId}.jpg`);
|
|
383
383
|
|
|
384
|
+
// Verify-then-DONE: trusting waitForURL alone was wrong (the Chalk bug —
|
|
385
|
+
// Ashby rendered "Missing entry for required field: Name, Email, ..."
|
|
386
|
+
// inline without a URL change, and we marked DONE on a failed submit).
|
|
387
|
+
// The backend re-fetches the page through Firecrawl + extract and tells
|
|
388
|
+
// us if the submit actually went through. On Firecrawl failure or no
|
|
389
|
+
// key, the endpoint returns submitted:true so we don't deadlock — we're
|
|
390
|
+
// strictly more correct than before, not less.
|
|
391
|
+
const verdictUrl = page.url();
|
|
392
|
+
console.log(`[orchestrator] Verifying submission at ${verdictUrl}...`);
|
|
393
|
+
let verdict = { submitted: true, error_message: null, confirmation_text: null, source: 'unavailable' };
|
|
394
|
+
try {
|
|
395
|
+
const vRes = await fetch(`${config.apiUrl}/agent/verify-submit`, {
|
|
396
|
+
method: 'POST',
|
|
397
|
+
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
|
|
398
|
+
body: JSON.stringify({ queue_id: queueId, page_url: verdictUrl }),
|
|
399
|
+
});
|
|
400
|
+
if (vRes.ok) verdict = await vRes.json();
|
|
401
|
+
} catch (e) {
|
|
402
|
+
console.warn(`[orchestrator] verify-submit unavailable: ${e.message}`);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (!verdict.submitted) {
|
|
406
|
+
const reason = verdict.error_message || 'Submission did not confirm — form may still have errors';
|
|
407
|
+
console.warn(`[orchestrator] Submission NOT verified. Reason: ${reason}`);
|
|
408
|
+
await reportStatus('NEEDS_ATTENTION', {
|
|
409
|
+
review_screenshot_r2_key: confirmKey || null,
|
|
410
|
+
needs_attention_reason: `Submit clicked but not confirmed: ${reason}`,
|
|
411
|
+
intervention_type: 'submit_failed',
|
|
412
|
+
step: 'VERIFY',
|
|
413
|
+
step_detail: reason.slice(0, 200),
|
|
414
|
+
fields_filled: cumulativeFilled,
|
|
415
|
+
});
|
|
416
|
+
// Do NOT clearCheckpoint — user may dismiss + re-queue, and a stale
|
|
417
|
+
// checkpoint would resume into the same failed state. The dismiss flow
|
|
418
|
+
// clears it (DELETE /apply-queue/:id sets form_checkpoint_json = NULL).
|
|
419
|
+
throw new Error(`Submission failed verification: ${reason}`);
|
|
420
|
+
}
|
|
421
|
+
|
|
384
422
|
await reportStatus('DONE', {
|
|
385
423
|
confirmation_screenshot_r2_key: confirmKey || null,
|
|
386
424
|
fields_filled: cumulativeFilled,
|
|
387
425
|
});
|
|
388
426
|
await clearCheckpoint(config, queueId);
|
|
389
427
|
|
|
390
|
-
console.log(`[orchestrator] Done: ${queueItem.company} - ${queueItem.title}`);
|
|
428
|
+
console.log(`[orchestrator] Done (verified): ${queueItem.company} - ${queueItem.title}${verdict.source === 'firecrawl' ? ' · firecrawl-verified' : ' · unverified'}`);
|
|
391
429
|
|
|
392
430
|
// Post fill session data to backend for learning loop
|
|
393
431
|
await postFillSession(config, {
|
package/package.json
CHANGED
package/scanPage.js
CHANGED
|
@@ -207,8 +207,27 @@ const CONSENT_PATTERNS = [
|
|
|
207
207
|
/privacy\s*policy/i,
|
|
208
208
|
];
|
|
209
209
|
|
|
210
|
+
// Normalize a label so trailing decorations don't break strict regex matches.
|
|
211
|
+
// Ashby/Greenhouse often render labels like "Name *", "Email (required)",
|
|
212
|
+
// "LinkedIn Profile — Required", "Phone *Required". Without this, the
|
|
213
|
+
// PROFILE_PATTERNS (which use ^anchored regexes for short fields like name)
|
|
214
|
+
// silently miss and the field falls through to 'custom', which means the
|
|
215
|
+
// agent skips filling it with profile data.
|
|
216
|
+
function normalizeLabel(raw) {
|
|
217
|
+
if (!raw) return '';
|
|
218
|
+
return raw
|
|
219
|
+
.toLowerCase()
|
|
220
|
+
.replace(/[*†‡]/g, ' ') // markers
|
|
221
|
+
.replace(/\((required|optional|mandatory)\)/g, ' ') // "(required)"
|
|
222
|
+
.replace(/\b(required|optional|mandatory)\b/g, ' ') // "required"
|
|
223
|
+
.replace(/[—–-]+\s*(required|optional)\s*$/g, ' ') // "— required"
|
|
224
|
+
.replace(/[:?]+$/g, '') // trailing : ?
|
|
225
|
+
.replace(/\s+/g, ' ')
|
|
226
|
+
.trim();
|
|
227
|
+
}
|
|
228
|
+
|
|
210
229
|
function classifyField(field) {
|
|
211
|
-
const label = field.label
|
|
230
|
+
const label = normalizeLabel(field.label);
|
|
212
231
|
|
|
213
232
|
// Consent checkboxes
|
|
214
233
|
if (field.inputType === 'checkbox' && CONSENT_PATTERNS.some(r => r.test(label))) {
|
|
@@ -222,7 +241,7 @@ function classifyField(field) {
|
|
|
222
241
|
if (regex.test(label)) return 'profile:' + name;
|
|
223
242
|
}
|
|
224
243
|
|
|
225
|
-
// Ashby system fields by name attribute
|
|
244
|
+
// Ashby system fields by name attribute (legacy form schema)
|
|
226
245
|
if (field.name && field.name.startsWith('_systemfield_')) {
|
|
227
246
|
const sfName = field.name.replace('_systemfield_', '');
|
|
228
247
|
if (['name', 'email', 'phone', 'resume', 'linkedin', 'website'].includes(sfName)) {
|
|
@@ -323,11 +342,12 @@ async function scanAshby(page) {
|
|
|
323
342
|
if (sfField === 'resume' || f.inputType === 'file') return { ...f, category: 'file:resume' };
|
|
324
343
|
if (profileMap[sfField] !== undefined) return { ...f, category: 'profile:' + profileMap[sfField] };
|
|
325
344
|
}
|
|
326
|
-
// UUID-named fields are custom questions
|
|
327
|
-
if (f.name && /^[0-9a-f-]{36}$/.test(f.name)) {
|
|
328
|
-
return { ...f, category: 'custom' };
|
|
329
|
-
}
|
|
330
345
|
if (f.inputType === 'file') return { ...f, category: 'file:resume' };
|
|
346
|
+
// Modern Ashby uses UUID names for ALL fields (Name, Email, Phone, custom).
|
|
347
|
+
// Classify by LABEL first — that's the only signal that distinguishes
|
|
348
|
+
// a profile field from a custom question when the name attr is opaque.
|
|
349
|
+
// Don't shortcut UUID-named fields to 'custom' — that's exactly the
|
|
350
|
+
// Chalk bug where Name/Email/Phone/LinkedIn all fell through unfilled.
|
|
331
351
|
return { ...f, category: classifyField(f) };
|
|
332
352
|
}).filter(f => f.category !== 'ignore');
|
|
333
353
|
}
|