halo-agent 1.3.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/filler.js CHANGED
@@ -259,7 +259,7 @@ async function uploadFile(page, triggerSelector, filePath) {
259
259
  *
260
260
  * Returns true if fill succeeded.
261
261
  */
262
- async function fillLocator(page, locator, value, label) {
262
+ async function fillLocator(page, locator, value, label, dropdownCtx = null) {
263
263
  if (!value) return false;
264
264
 
265
265
  try {
@@ -321,13 +321,50 @@ async function fillLocator(page, locator, value, label) {
321
321
  );
322
322
  if (fuzzy) ok = await locator.selectOption(fuzzy.v).then(() => true).catch(() => false);
323
323
  }
324
+ // 5. AI synonym pick — "Straight" → "Heterosexual", "Asian" →
325
+ // "South Asian" etc. We hand the LLM the user's literal answer
326
+ // plus the EXACT option list and let it pick the equivalent.
327
+ if (!ok && dropdownCtx && opts.length > 0) {
328
+ try {
329
+ const optionTexts = opts.map(o => o.t).filter(Boolean);
330
+ const res = await fetch(`${dropdownCtx.config.apiUrl}/smartfill/field-answer`, {
331
+ method: 'POST',
332
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${dropdownCtx.config.token}` },
333
+ body: JSON.stringify({
334
+ job_id: dropdownCtx.jobId,
335
+ field_label: label || '',
336
+ field_type: 'select',
337
+ options: optionTexts,
338
+ previously_answered: [{ label: 'user intent', value: String(value) }],
339
+ }),
340
+ });
341
+ if (res.ok) {
342
+ const { value: picked } = await res.json();
343
+ if (picked && picked !== 'SKIP') {
344
+ const pickedV = picked.toLowerCase().trim();
345
+ const match = opts.find(o => o.t.toLowerCase().trim() === pickedV)
346
+ || opts.find(o => o.t.toLowerCase().includes(pickedV) || pickedV.includes(o.t.toLowerCase()));
347
+ if (match) {
348
+ ok = await locator.selectOption(match.v).then(() => true).catch(() => false);
349
+ if (ok) console.log(`[filler] AI-picked select: "${label}" "${value}" → "${match.t}"`);
350
+ }
351
+ }
352
+ }
353
+ } catch (e) {
354
+ console.warn(`[filler] AI native-select pick failed for "${label}": ${e.message}`);
355
+ }
356
+ }
324
357
  return ok;
325
358
  }
326
359
 
327
360
  // ── Custom dropdown / combobox (React Select, Ashby, Lever custom selects) ──
328
361
  if (meta.role === 'combobox' || meta.role === 'listbox' ||
329
362
  meta.tag === 'div' || meta.tag === 'button') {
330
- return await fillCustomDropdown(page, locator, value);
363
+ return await fillCustomDropdown(page, locator, value, dropdownCtx ? {
364
+ config: dropdownCtx.config,
365
+ jobId: dropdownCtx.jobId,
366
+ label,
367
+ } : {});
331
368
  }
332
369
 
333
370
  // ── Radio group (native) ──
@@ -449,49 +486,85 @@ async function fillTypeahead(page, locator, value) {
449
486
  }
450
487
  }
451
488
 
452
- async function fillCustomDropdown(page, triggerLocator, value) {
489
+ async function fillCustomDropdown(page, triggerLocator, value, options = {}) {
453
490
  try {
454
491
  await triggerLocator.click();
455
492
  await page.waitForTimeout(350);
456
493
 
457
- // Look for an open listbox, menu, or option list
458
- const optionSelectors = [
459
- `[role="option"]:has-text("${value}")`,
460
- `[role="menuitem"]:has-text("${value}")`,
461
- `li:has-text("${value}")`,
462
- `.Select-option:has-text("${value}")`,
463
- `[class*="option"]:has-text("${value}")`,
464
- ];
465
-
466
- for (const sel of optionSelectors) {
467
- try {
468
- const opt = page.locator(sel).first();
469
- if (await opt.isVisible({ timeout: 800 })) {
470
- await opt.click();
471
- return true;
472
- }
473
- } catch {}
494
+ // STEP 1: Discover ALL available options (visible after opening). We
495
+ // need this for two reasons:
496
+ // a) substring/exact match locally (cheap, no LLM round trip)
497
+ // b) if no local match, ask the LLM to pick from this exact list —
498
+ // that's how "Straight" maps to "Heterosexual" without us having
499
+ // to hardcode a synonym table for every form
500
+ // We collect texts BEFORE any click attempts so the LLM has the same
501
+ // option universe the user would see.
502
+ const optionLocators = page.locator('[role="option"], [role="menuitem"], .select__option, li[class*="option"]');
503
+ const optionCount = await optionLocators.count().catch(() => 0);
504
+ const optionTexts = [];
505
+ for (let i = 0; i < Math.min(optionCount, 40); i++) {
506
+ const t = (await optionLocators.nth(i).textContent().catch(() => '')).trim();
507
+ if (t) optionTexts.push(t);
474
508
  }
475
509
 
476
- // Fuzzy: get all visible options and find closest match
477
- const allOptions = await page.locator('[role="option"], [role="menuitem"]').all();
478
- let bestMatch = null;
479
- let bestScore = 0;
480
- for (const opt of allOptions) {
481
- const text = (await opt.textContent().catch(() => '')).trim().toLowerCase();
482
- const target = value.toLowerCase();
483
- const score = text === target ? 1 :
484
- text.includes(target) ? 0.8 :
485
- target.includes(text) ? 0.7 : 0;
486
- if (score > bestScore) { bestScore = score; bestMatch = opt; }
510
+ // STEP 2: Local exact match (case-insensitive)
511
+ const v = String(value).toLowerCase().trim();
512
+ let pickIdx = optionTexts.findIndex(t => t.toLowerCase().trim() === v);
513
+
514
+ // STEP 3: Local substring match (either direction)
515
+ if (pickIdx === -1) {
516
+ pickIdx = optionTexts.findIndex(t => {
517
+ const tt = t.toLowerCase().trim();
518
+ return tt.includes(v) || v.includes(tt);
519
+ });
487
520
  }
488
- if (bestMatch && bestScore >= 0.7) {
489
- await bestMatch.click();
521
+
522
+ if (pickIdx !== -1) {
523
+ await optionLocators.nth(pickIdx).click({ timeout: 2000 });
490
524
  return true;
491
525
  }
492
526
 
493
- // Nothing found press Escape to close the dropdown
494
- await page.keyboard.press('Escape');
527
+ // STEP 4: LLM picks from the option universe — the smart-fill path.
528
+ // Sends user's intent ("Straight") + question text + the EXACT list
529
+ // of options Greenhouse is rendering ("Heterosexual", "Gay or
530
+ // Lesbian", "Bisexual", "Prefer not to say"). Backend already has
531
+ // the /smartfill/field-answer endpoint with options[] support.
532
+ if (optionTexts.length > 0 && options.config && options.jobId && options.label) {
533
+ try {
534
+ const res = await fetch(`${options.config.apiUrl}/smartfill/field-answer`, {
535
+ method: 'POST',
536
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${options.config.token}` },
537
+ body: JSON.stringify({
538
+ job_id: options.jobId,
539
+ field_label: options.label,
540
+ field_type: 'select',
541
+ options: optionTexts,
542
+ // Pass user's stated intent so the LLM picks the closest
543
+ // option that REPRESENTS that intent, not just text similarity.
544
+ previously_answered: [{ label: 'user intent', value: String(value) }],
545
+ }),
546
+ });
547
+ if (res.ok) {
548
+ const { value: picked } = await res.json();
549
+ if (picked && picked !== 'SKIP') {
550
+ // Match the picked text back to an option locator
551
+ const pickedV = picked.toLowerCase().trim();
552
+ const aiIdx = optionTexts.findIndex(t => t.toLowerCase().trim() === pickedV)
553
+ ?? optionTexts.findIndex(t => t.toLowerCase().includes(pickedV) || pickedV.includes(t.toLowerCase()));
554
+ if (aiIdx !== -1 && aiIdx >= 0) {
555
+ await optionLocators.nth(aiIdx).click({ timeout: 2000 });
556
+ console.log(`[filler] AI-picked dropdown: "${options.label}" "${value}" → "${optionTexts[aiIdx]}"`);
557
+ return true;
558
+ }
559
+ }
560
+ }
561
+ } catch (e) {
562
+ console.warn(`[filler] AI dropdown pick failed for "${options.label}": ${e.message}`);
563
+ }
564
+ }
565
+
566
+ // Nothing matched — close dropdown gracefully so the next field can fill
567
+ await page.keyboard.press('Escape').catch(() => {});
495
568
  return false;
496
569
  } catch {
497
570
  return false;
@@ -729,6 +802,12 @@ async function fillFields(page, aep, options = {}) {
729
802
  const speed = options.speed || 'normal';
730
803
  const ctx = options.ctx || null;
731
804
  const ats = options.ats || 'generic';
805
+ // dropdownCtx flows to fillLocator → fillCustomDropdown so custom selects
806
+ // can call the backend's /smartfill/field-answer with the question text +
807
+ // available options. That's the "Straight" → "Heterosexual" synonym fix.
808
+ const dropdownCtx = options.config && options.jobId
809
+ ? { config: options.config, jobId: options.jobId }
810
+ : null;
732
811
  const delay = () => new Promise(r => setTimeout(r, jitter(100 + Math.random() * 150)));
733
812
 
734
813
  let filled = 0, skipped = 0, failed = 0;
@@ -862,7 +941,7 @@ async function fillFields(page, aep, options = {}) {
862
941
  await delay();
863
942
  } else {
864
943
  // Try clicking the locator directly (custom styled radio)
865
- const ok2 = await fillLocator(page, locator, value, field.label);
944
+ const ok2 = await fillLocator(page, locator, value, field.label, dropdownCtx);
866
945
  if (ok2) { filled++; if (ctx && trackKey) ctx.answeredFields.set(trackKey, { value, pageIndex: ctx.currentPageIndex, source }); await delay(); }
867
946
  else { skipped++; }
868
947
  }
@@ -884,7 +963,7 @@ async function fillFields(page, aep, options = {}) {
884
963
 
885
964
  // All other types — use universal fillLocator
886
965
  try {
887
- const ok = await fillLocator(page, locator, value, field.label);
966
+ const ok = await fillLocator(page, locator, value, field.label, dropdownCtx);
888
967
  if (ok) {
889
968
  const valShort = String(value).slice(0, 40).replace(/\n/g, ' ');
890
969
  console.log(`[filler] filled (${source}): "${labelShort}" = "${valShort}${String(value).length > 40 ? '...' : ''}"`);
package/orchestrator.js CHANGED
@@ -11,7 +11,43 @@
11
11
  const os = require('os');
12
12
  const path = require('path');
13
13
  const fs = require('fs');
14
- const { fillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
14
+ const { fillFields: legacyFillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
15
+ const { smartFillPage } = require('./smartFill');
16
+
17
+ // Switchable filler — smart by default, can be killed via config.useSmartFill=false.
18
+ // smartFill.js internally falls back to legacyFillFields if /smartfill/plan-fill
19
+ // is unavailable, so a planner outage doesn't break us — but this flag is the
20
+ // hard kill switch if smart mode is misbehaving on a specific user / form.
21
+ async function fillFields(page, aep, opts) {
22
+ const config = opts?.config;
23
+ const smartEnabled = config?.useSmartFill !== false; // default true
24
+ if (!smartEnabled) {
25
+ return await legacyFillFields(page, aep, opts);
26
+ }
27
+ const result = await smartFillPage(page, aep, {
28
+ config,
29
+ jobId: opts.jobId,
30
+ resumePath: aep.__resumeLocalPath || null,
31
+ coverLetterPath: aep.__coverLetterLocalPath || null,
32
+ ctx: opts.ctx,
33
+ ats: opts.ats,
34
+ }).catch(async (e) => {
35
+ console.warn(`[orchestrator] smartFillPage threw: ${e.message} — falling back to legacy`);
36
+ return await legacyFillFields(page, aep, opts);
37
+ });
38
+ // Normalize the result shape (smart returns askUserReasons, planned, fallback;
39
+ // legacy returns needsAI). The orchestrator only reads filled/skipped/failed/
40
+ // needsAI downstream, so we preserve needsAI as empty when smart succeeded.
41
+ return {
42
+ filled: result.filled || 0,
43
+ skipped: result.skipped || 0,
44
+ failed: result.failed || 0,
45
+ needsAI: result.needsAI || [],
46
+ askUserReasons: result.askUserReasons || [],
47
+ plannedActions: result.planned || 0,
48
+ fellBackToLegacy: !!result.fallback,
49
+ };
50
+ }
15
51
  const { detectCaptcha, solveCaptcha, injectCaptchaToken } = require('./captcha');
16
52
  const { visionFill, visionNavigateAndSubmit, visionFillSkipped } = require('./vision');
17
53
 
@@ -67,6 +103,8 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
67
103
  if (aep.recommended_resume?.pdf_presigned_url) {
68
104
  tempResumeFile = await downloadResume(aep.recommended_resume.pdf_presigned_url);
69
105
  }
106
+ // Expose resume path on aep so smartFill's upload_file action can route it.
107
+ aep.__resumeLocalPath = tempResumeFile;
70
108
 
71
109
  // Download cover-letter PDF too — separate file because Greenhouse/Ashby
72
110
  // have separate file inputs for each. Only present when the user
@@ -76,8 +114,8 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
76
114
  tempCoverLetterFile = await downloadResume(aep.cover_letter_pdf.pdf_presigned_url);
77
115
  if (tempCoverLetterFile) console.log('[orchestrator] Cover letter PDF downloaded');
78
116
  }
79
- // Expose to fillFields via aep so the file:cover_letter category resolver
80
- // can hand it to the uploader.
117
+ // Expose to fillFields via aep so smartFill's upload_file action can
118
+ // route it (legacy filler also reads this for file:cover_letter).
81
119
  aep.__coverLetterLocalPath = tempCoverLetterFile;
82
120
 
83
121
  // Check for an existing checkpoint — if a previous run got past page 1
@@ -130,7 +168,7 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
130
168
  console.log(`[orchestrator] Using vision fallback for ${ats_type}`);
131
169
  fillResult = await visionFill(page, aep, anthropicKey, { alreadyFilled: ctx.answeredFields });
132
170
  } else {
133
- fillResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
171
+ fillResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
134
172
  }
135
173
 
136
174
  // Fetch AI answers for any fields that scanPage identified as needing AI
@@ -157,7 +195,7 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
157
195
  }
158
196
  }
159
197
  // Second fill pass with newly fetched answers
160
- const retryResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
198
+ const retryResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
161
199
  fillResult.filled += retryResult.filled;
162
200
  fillResult.skipped = retryResult.skipped;
163
201
  console.log(`[orchestrator] Retry fill: +${retryResult.filled} fields filled`);
@@ -280,7 +318,7 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
280
318
  ctx.currentPageIndex++;
281
319
 
282
320
  // Fill any new fields that appeared on this page
283
- const pageResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
321
+ const pageResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
284
322
  cumulativeFilled += pageResult.filled || 0;
285
323
 
286
324
  // Fetch AI answers for custom fields scanPage identified on this page
@@ -297,7 +335,7 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
297
335
  }
298
336
  } catch {}
299
337
  }
300
- const retryPage = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
338
+ const retryPage = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
301
339
  cumulativeFilled += retryPage.filled || 0;
302
340
  }
303
341
 
@@ -436,6 +474,22 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
436
474
  }
437
475
 
438
476
  if (verdict.submitted === null) {
477
+ // Auto-submit mode means "don't ask me, just submit." If we can't
478
+ // verify but the user opted into hands-off, trust the click and
479
+ // mark DONE (the screenshot is the receipt; user can audit later).
480
+ // Without this, autoSubmit was silently being ignored every time
481
+ // Firecrawl was slow/down — exactly the case user hit.
482
+ const autoSubmit = config.autoSubmit || aep.agent_config?.auto_submit;
483
+ if (autoSubmit) {
484
+ console.log(`[orchestrator] Could not verify (source: ${verdict.source}) — auto-submit ON, trusting click.`);
485
+ await reportStatus('DONE', {
486
+ confirmation_screenshot_r2_key: confirmKey || null,
487
+ fields_filled: cumulativeFilled,
488
+ });
489
+ await clearCheckpoint(config, queueId);
490
+ console.log(`[orchestrator] Done (auto-submit, unverified): ${queueItem.company} - ${queueItem.title}`);
491
+ return;
492
+ }
439
493
  console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). Sending to REVIEWING for your eyeball.`);
440
494
  await reportStatus('REVIEWING', {
441
495
  review_screenshot_r2_key: confirmKey || null,
@@ -1004,6 +1058,8 @@ async function runExtensionFill({
1004
1058
  if (aep.recommended_resume?.pdf_presigned_url) {
1005
1059
  tempResumeFile = await downloadResume(aep.recommended_resume.pdf_presigned_url);
1006
1060
  }
1061
+ // Expose for smartFill upload_file action (and legacy gate uploader)
1062
+ aep.__resumeLocalPath = tempResumeFile;
1007
1063
 
1008
1064
  const ctx = createFormContext();
1009
1065
  const useVisionForThis = useVision && VISION_ATS.has((ats_type || '').toLowerCase());
@@ -1030,7 +1086,7 @@ async function runExtensionFill({
1030
1086
  log(`Vision fill for ${ats_type}`);
1031
1087
  fillResult = await visionFill(page, aep, anthropicKey, { alreadyFilled: ctx.answeredFields });
1032
1088
  } else {
1033
- fillResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
1089
+ fillResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
1034
1090
  }
1035
1091
 
1036
1092
  reportFillStatus('FILLING', { fieldsFilled: fillResult.filled || 0, message: `Filled ${fillResult.filled || 0} fields...` });
@@ -1055,7 +1111,7 @@ async function runExtensionFill({
1055
1111
  }
1056
1112
  } catch (e) { log(`AI answer failed for "${f.label}": ${e.message}`); }
1057
1113
  }
1058
- const retry = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
1114
+ const retry = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
1059
1115
  fillResult.filled += retry.filled;
1060
1116
  fillResult.skipped = retry.skipped;
1061
1117
  }
@@ -1119,7 +1175,7 @@ async function runExtensionFill({
1119
1175
  }
1120
1176
 
1121
1177
  ctx.currentPageIndex++;
1122
- const pageResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type });
1178
+ const pageResult = await fillFields(page, aep, { speed: typingSpeed, ctx, ats: ats_type, config, jobId });
1123
1179
  cumulativeFilled += pageResult.filled || 0;
1124
1180
 
1125
1181
  if (anthropicKey && !useVisionForThis && (pageResult.skipped > 2 || pageResult.failed > 0)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "halo-agent",
3
- "version": "1.3.5",
3
+ "version": "2.0.0",
4
4
  "description": "HALO local apply agent — auto-fills job applications using your real Chrome session",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -22,6 +22,8 @@
22
22
  "localServer.js",
23
23
  "filler.js",
24
24
  "scanPage.js",
25
+ "scanAccessibility.js",
26
+ "smartFill.js",
25
27
  "captcha.js",
26
28
  "vision.js",
27
29
  "manusAutomate.js",
@@ -0,0 +1,447 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Accessibility-tree-based form scanner.
5
+ *
6
+ * Replaces the per-ATS regex scanners (scanGreenhouse, scanLever, scanAshby,
7
+ * scanWorkday, scanICIMS, scanGeneric) with a single uniform pass that works
8
+ * on any form because it reads what the browser tells screen readers.
9
+ *
10
+ * How:
11
+ * 1. INJECT mmid="N" attribute on every interactive DOM element (input,
12
+ * textarea, select, [contenteditable], [role=combobox|listbox|radio|
13
+ * checkbox|button|option], <a>). The mmid is a stable handle the LLM
14
+ * planner refers to when it returns an action plan.
15
+ * 2. FETCH the accessibility tree via Chrome DevTools Protocol's
16
+ * Accessibility.getFullAXTree. This returns the same tree
17
+ * screen readers see — already deduped, already labeled, already
18
+ * grouped by semantic role. Way cleaner than walking the DOM.
19
+ * 3. RECONCILE: walk the AX tree, find nodes carrying our injected mmid
20
+ * (via aria-keyshortcuts — we steal that attribute because it surfaces
21
+ * verbatim in the AX tree.name|description and is otherwise unused).
22
+ * For each AX node, read its name/role/description/options/required
23
+ * flags, then enrich with DOM-only signals (typeahead heuristics, the
24
+ * raw input type, the parent label text for grouped fields).
25
+ * 4. PRUNE: drop hidden, decorative, or already-filled fields. Return
26
+ * a flat list the LLM planner can reason over in one prompt.
27
+ *
28
+ * Why this beats per-ATS scanners:
29
+ * - Works on any ATS (Greenhouse, Lever, Ashby, Workday, iCIMS) AND any
30
+ * unknown ATS without code changes — the AX tree is the same shape.
31
+ * - Picks up labels the DOM scanner missed (aria-labelledby chains,
32
+ * fieldset/legend grouping, parent-walk heuristics) because the
33
+ * browser already did all that work for screen readers.
34
+ * - Deduplicates radio/checkbox groups into a single fieldset entry
35
+ * (the AX tree has 'radiogroup' role).
36
+ *
37
+ * Output shape (one entry per fillable field):
38
+ * {
39
+ * mmid: "12", // stable handle for the planner
40
+ * role: "textbox" | "combobox" | "checkbox" | "radio" | "button" |
41
+ * "listbox" | "menu" | "file_upload" | "textarea",
42
+ * label: "First Name", // primary name from AX tree
43
+ * description: "Required", // helper / placeholder / hint
44
+ * required: true,
45
+ * options: ["Male","Female",...] | null,
46
+ * selectorHint: "#first_name", // best CSS selector we could build
47
+ * inputType: "text", // HTML type= for inputs
48
+ * value: "" | "Male", // current filled value
49
+ * isTypeahead: false, // pure DOM heuristic
50
+ * groupLabel: null | "Pronouns", // fieldset/legend grouping
51
+ * filledAlready: false,
52
+ * }
53
+ */
54
+
55
+ const MMID_ATTR = 'mmid';
56
+ const HANDLE_ATTR = 'aria-keyshortcuts'; // we hijack this — surfaces in AX tree
57
+
58
+ // ── Step 1: Inject mmid into every interactive element ───────────────────────
59
+
60
+ async function injectMmid(page) {
61
+ // Returns the count so the caller can sanity-check that injection happened.
62
+ return await page.evaluate(({ mmidAttr, handleAttr }) => {
63
+ const sel = [
64
+ 'input',
65
+ 'textarea',
66
+ 'select',
67
+ '[contenteditable="true"]',
68
+ '[role="textbox"]',
69
+ '[role="combobox"]',
70
+ '[role="listbox"]',
71
+ '[role="radiogroup"]',
72
+ '[role="radio"]',
73
+ '[role="checkbox"]',
74
+ '[role="switch"]',
75
+ '[role="button"]',
76
+ '[role="option"]',
77
+ '[role="menuitem"]',
78
+ 'button[type="submit"]',
79
+ 'a[href]',
80
+ ].join(',');
81
+ const all = document.querySelectorAll(sel);
82
+ let n = 0;
83
+ all.forEach((el) => {
84
+ n += 1;
85
+ const id = String(n);
86
+ el.setAttribute(mmidAttr, id);
87
+ // Stash the original aria-keyshortcuts so we don't destroy real a11y data
88
+ const prev = el.getAttribute(handleAttr);
89
+ if (prev && !el.hasAttribute('data-orig-aria-keyshortcuts')) {
90
+ el.setAttribute('data-orig-aria-keyshortcuts', prev);
91
+ }
92
+ el.setAttribute(handleAttr, id);
93
+ });
94
+ return n;
95
+ }, { mmidAttr: MMID_ATTR, handleAttr: HANDLE_ATTR });
96
+ }
97
+
98
+ // ── Step 2: Fetch the full AX tree via CDP ───────────────────────────────────
99
+
100
+ async function fetchAxTree(page) {
101
+ // Open a CDP session — Playwright exposes this via context.newCDPSession.
102
+ // We need the page's context, which lives on the browser context.
103
+ const session = await page.context().newCDPSession(page);
104
+ try {
105
+ await session.send('Accessibility.enable');
106
+ const { nodes } = await session.send('Accessibility.getFullAXTree');
107
+ return nodes;
108
+ } finally {
109
+ try { await session.detach(); } catch {}
110
+ }
111
+ }
112
+
113
+ // ── Step 3: Reconcile AX tree → flat list keyed by mmid ──────────────────────
114
+
115
+ /**
116
+ * Pull a flat-string value from an AX property bag. AX values come as
117
+ * { type, value } objects; we just want the value (string|bool).
118
+ */
119
+ function axProp(node, key) {
120
+ // Top-level fields like 'name' / 'role' have { type, value } shape.
121
+ if (node?.[key]?.value !== undefined) return node[key].value;
122
+ // 'properties' is an array of { name, value: {type, value} }
123
+ if (Array.isArray(node?.properties)) {
124
+ const p = node.properties.find((x) => x.name === key);
125
+ if (p?.value?.value !== undefined) return p.value.value;
126
+ }
127
+ return undefined;
128
+ }
129
+
130
+ /**
131
+ * Some labels arrive as the literal mmid we injected (when an element's
132
+ * name resolves to its own aria-keyshortcuts via labelledby chains).
133
+ * Strip those — they're our injection, not real labels.
134
+ */
135
+ function isMmidLiteral(s) {
136
+ return typeof s === 'string' && /^\d+$/.test(s.trim());
137
+ }
138
+
139
+ function reconcile(axNodes) {
140
+ // Build a map mmid → AX node and mmid → parent group label.
141
+ // The AX tree is a flat list of nodes with parentId/childIds, so we
142
+ // first build a parent index then walk to find ancestor radiogroup
143
+ // / fieldset names for grouped fields.
144
+ const byId = new Map();
145
+ for (const n of axNodes) byId.set(n.nodeId, n);
146
+
147
+ // For each node, find the mmid (lives in keyshortcuts because we hijacked it)
148
+ // and the closest ancestor whose role is 'radiogroup' / 'group' / 'form'.
149
+ const out = [];
150
+ for (const n of axNodes) {
151
+ const mmid = axProp(n, 'keyshortcuts');
152
+ if (!mmid || !/^\d+$/.test(String(mmid))) continue;
153
+
154
+ const role = (axProp(n, 'role') || '').toString();
155
+ // Skip nodes that are containers/decorative — we want fillable leaves.
156
+ // Buttons / links we keep so the planner can decide to click them
157
+ // (Submit, Next, etc.).
158
+ if (['generic', 'none', 'presentation'].includes(role)) continue;
159
+
160
+ // Walk up to find a grouping label
161
+ let groupLabel = null;
162
+ let cursor = n;
163
+ let hops = 0;
164
+ while (cursor && hops < 10) {
165
+ const parentId = cursor.parentId;
166
+ if (!parentId) break;
167
+ const parent = byId.get(parentId);
168
+ if (!parent) break;
169
+ const parentRole = (axProp(parent, 'role') || '').toString();
170
+ if (['radiogroup', 'group', 'form'].includes(parentRole)) {
171
+ const pname = axProp(parent, 'name');
172
+ if (pname && !isMmidLiteral(pname)) {
173
+ groupLabel = String(pname).trim();
174
+ break;
175
+ }
176
+ }
177
+ cursor = parent;
178
+ hops += 1;
179
+ }
180
+
181
+ out.push({
182
+ mmid: String(mmid),
183
+ role,
184
+ // Strip mmid-literal names — that happens when the element has no real
185
+ // accessible name and the AX tree falls back to our hijacked attr.
186
+ rawName: (() => {
187
+ const v = axProp(n, 'name');
188
+ return v && !isMmidLiteral(v) ? String(v).trim() : '';
189
+ })(),
190
+ description: (() => {
191
+ const v = axProp(n, 'description');
192
+ return v && !isMmidLiteral(v) ? String(v).trim() : '';
193
+ })(),
194
+ required: !!axProp(n, 'required'),
195
+ disabled: !!axProp(n, 'disabled'),
196
+ focused: !!axProp(n, 'focused'),
197
+ checked: axProp(n, 'checked'),
198
+ selected: !!axProp(n, 'selected'),
199
+ // valuetext is what the user "sees" in a filled combobox / spinner.
200
+ // value is the underlying value when it differs (rare for inputs).
201
+ axValue: axProp(n, 'value'),
202
+ groupLabel,
203
+ });
204
+ }
205
+ return out;
206
+ }
207
+
208
+ // ── Step 4: Enrich each reconciled field with DOM-only signals ───────────────
209
+
210
+ /**
211
+ * The AX tree tells us what the field IS; DOM tells us what we need to
212
+ * INTERACT with it. We pull:
213
+ * - tag, inputType, name, id (for selector building + classification)
214
+ * - currentValue (so we can skip already-filled fields)
215
+ * - options[] (for native <select> AND custom comboboxes)
216
+ * - typeahead heuristics (aria-autocomplete, role=combobox, "Locate me" sibling)
217
+ * - iframeSelector when the field lives inside an iframe (iCIMS)
218
+ *
219
+ * One round trip per page — pulls all mmids at once for efficiency.
220
+ */
221
+ async function enrichFromDom(page, axFields) {
222
+ const mmids = axFields.map((f) => f.mmid);
223
+ if (mmids.length === 0) return [];
224
+
225
+ const enriched = await page.evaluate(({ mmids, mmidAttr }) => {
226
+ function safeText(t) { return (t || '').replace(/\s+/g, ' ').trim(); }
227
+ function cssEscape(s) {
228
+ // CSS.escape isn't available in some older contexts; polyfill.
229
+ try { return CSS.escape(s); } catch { return s.replace(/([!"#$%&'()*+,.\/:;<=>?@\[\\\]^`{|}~])/g, '\\$1'); }
230
+ }
231
+
232
+ return mmids.map((mmid) => {
233
+ const el = document.querySelector(`[${mmidAttr}="${mmid}"]`);
234
+ if (!el) return { mmid, missing: true };
235
+
236
+ const tag = el.tagName.toLowerCase();
237
+ const type = (el.type || '').toLowerCase();
238
+ const role = (el.getAttribute('role') || '').toLowerCase();
239
+ const isContentEditable = el.isContentEditable || false;
240
+
241
+ // Skip non-visible elements UNLESS they're radio/checkbox/file (often
242
+ // styled-hidden but functional) or option lists in a dropdown.
243
+ let isVisible = true;
244
+ if (!['radio', 'checkbox', 'file'].includes(type) && role !== 'option') {
245
+ const rect = el.getBoundingClientRect();
246
+ if (rect.width === 0 && rect.height === 0) isVisible = false;
247
+ if (el.offsetParent === null && !el.closest('[role="dialog"]')) isVisible = false;
248
+ }
249
+
250
+ // Best selector — used by the executor to re-locate the element.
251
+ // mmid attribute is the most reliable since we control it.
252
+ let selectorHint = `[${mmidAttr}="${mmid}"]`;
253
+ if (el.id) selectorHint = `#${cssEscape(el.id)}`;
254
+ else if (el.name) selectorHint = `${tag}[name="${el.name.replace(/"/g, '\\"')}"]`;
255
+
256
+ // Current value (skip already-filled)
257
+ const currentValue = (el.value || (isContentEditable ? safeText(el.innerText) : '') || '').trim();
258
+
259
+ // Native <select> options
260
+ let options = null;
261
+ if (tag === 'select' && el.options) {
262
+ options = Array.from(el.options).map((o) => ({ value: o.value, label: safeText(o.text) })).filter((o) => o.label);
263
+ }
264
+
265
+ // Typeahead heuristic (Greenhouse Location, Lever City, Workday locations).
266
+ // Same logic as the old filler — kept here so the planner can reason
267
+ // about HOW to fill, not just WHAT.
268
+ const isTypeahead = !!(
269
+ el.getAttribute('aria-autocomplete') === 'list' ||
270
+ el.getAttribute('aria-autocomplete') === 'both' ||
271
+ el.getAttribute('aria-haspopup') === 'listbox' ||
272
+ el.getAttribute('aria-haspopup') === 'true' ||
273
+ (el.getAttribute('autocomplete') === 'off' && el.getAttribute('aria-expanded') !== null) ||
274
+ el.closest('[role="combobox"]') ||
275
+ Array.from((el.closest('div') || el.parentElement || document).querySelectorAll('button, a')).some((b) => /locate\s*me/i.test(b.textContent || ''))
276
+ );
277
+
278
+ // Iframe context — if the element is inside an iframe (iCIMS), the
279
+ // executor needs a frame-aware selector. We can't directly serialize
280
+ // a frame here, so we report the iframe src for matching later.
281
+ // (When this scanner runs on a frame's page, the frame itself is the
282
+ // page context — this only matters for the main-frame run.)
283
+ const ownerFrame = window.frameElement;
284
+ const iframeSrc = ownerFrame ? ownerFrame.src || ownerFrame.name || '' : '';
285
+
286
+ return {
287
+ mmid,
288
+ tag,
289
+ inputType: type,
290
+ role,
291
+ name: el.name || '',
292
+ id: el.id || '',
293
+ ariaLabel: el.getAttribute('aria-label') || '',
294
+ placeholder: el.placeholder || '',
295
+ selectorHint,
296
+ currentValue,
297
+ options,
298
+ isTypeahead,
299
+ iframeSrc,
300
+ isVisible,
301
+ isContentEditable,
302
+ // The element's own text content — for <button> / <a> the AX name
303
+ // sometimes misses the visible label (icons-only buttons surface
304
+ // weirdly). Keep as a fallback.
305
+ textContent: safeText(el.innerText || el.textContent || '').slice(0, 120),
306
+ };
307
+ });
308
+ }, { mmids, mmidAttr: MMID_ATTR });
309
+
310
+ // Merge AX side and DOM side by mmid
311
+ const domByMmid = new Map(enriched.map((e) => [e.mmid, e]));
312
+ return axFields.map((ax) => {
313
+ const dom = domByMmid.get(ax.mmid) || {};
314
+ return { ...ax, ...dom };
315
+ }).filter((f) => !f.missing && f.isVisible !== false);
316
+ }
317
+
318
+ // ── Step 5: Normalize into the agent-facing schema ───────────────────────────
319
+
320
+ /**
321
+ * Collapse AX role + HTML tag/type into one of our canonical interaction
322
+ * categories. The LLM planner reasons over THIS field, not the raw role,
323
+ * because the AX tree has 20+ roles but the agent only knows 7 ways to act.
324
+ */
325
+ function normalizeRole(f) {
326
+ if (f.inputType === 'file') return 'file_upload';
327
+ if (f.inputType === 'checkbox' || f.role === 'checkbox' || f.role === 'switch') return 'checkbox';
328
+ if (f.inputType === 'radio' || f.role === 'radio') return 'radio';
329
+ if (f.tag === 'select' || f.role === 'combobox' || f.role === 'listbox') return 'combobox';
330
+ if (f.tag === 'textarea' || f.isContentEditable) return 'textarea';
331
+ if (f.tag === 'button' || f.role === 'button') return 'button';
332
+ if (f.role === 'option' || f.role === 'menuitem') return 'option';
333
+ if (f.tag === 'a') return 'link';
334
+ // Default: free-text input. Typeahead is still a textbox; planner sees
335
+ // isTypeahead and chooses type-then-pick-suggestion.
336
+ return 'textbox';
337
+ }
338
+
339
+ function pickLabel(f) {
340
+ // Priority: AX name → aria-label → placeholder → text content → field id.
341
+ // (AX name already merges <label for>, aria-labelledby, fieldset/legend
342
+ // walks — we get them all for free.)
343
+ return (f.rawName || f.ariaLabel || f.placeholder || f.textContent || f.id || f.name || '').trim();
344
+ }
345
+
346
+ function pickDescription(f) {
347
+ // AX description first (helper text, hints), placeholder second.
348
+ return (f.description || (f.rawName && f.placeholder ? f.placeholder : '')).trim();
349
+ }
350
+
351
+ // ── Public entry point ───────────────────────────────────────────────────────
352
+
353
+ /**
354
+ * Main scanner. Returns a flat list of fields the LLM planner can plan over.
355
+ * Iterates same-origin frames so iCIMS/Workday iframes don't get missed.
356
+ */
357
+ async function scanAccessibility(page) {
358
+ // Run injection + AX fetch on the main frame AND every accessible same-
359
+ // origin frame. Cross-origin frames are blocked; we silently skip them.
360
+ const frames = [page, ...page.frames().filter((f) => f !== page.mainFrame())];
361
+ const allFields = [];
362
+
363
+ for (const ctx of frames) {
364
+ try {
365
+ // Each frame's "page" is the frame itself — playwright Page and Frame
366
+ // both expose evaluate(). For non-main frames we still need a CDP
367
+ // session against the underlying page, but the AX tree we fetch
368
+ // pertains to the entire frame tree from the root. We only fetch it
369
+ // once (on the main page) and reconcile against all frames' DOMs.
370
+ const isMain = ctx === page || ctx === page.mainFrame?.();
371
+
372
+ const injected = await (isMain
373
+ ? injectMmid(page)
374
+ : ctx.evaluate(({ mmidAttr, handleAttr }) => {
375
+ const sel = 'input,textarea,select,[contenteditable="true"],[role="textbox"],[role="combobox"],[role="listbox"],[role="radiogroup"],[role="radio"],[role="checkbox"],[role="switch"],[role="button"],[role="option"],button[type="submit"],a[href]';
376
+ // Continue numbering from a high offset so frames don't collide
377
+ // with main-frame ids. 100000 * frame index is plenty.
378
+ let n = Math.floor(Math.random() * 90000) + 100000;
379
+ const all = document.querySelectorAll(sel);
380
+ all.forEach((el) => {
381
+ n += 1; const id = String(n);
382
+ el.setAttribute(mmidAttr, id);
383
+ const prev = el.getAttribute(handleAttr);
384
+ if (prev && !el.hasAttribute('data-orig-aria-keyshortcuts')) {
385
+ el.setAttribute('data-orig-aria-keyshortcuts', prev);
386
+ }
387
+ el.setAttribute(handleAttr, id);
388
+ });
389
+ return all.length;
390
+ }, { mmidAttr: MMID_ATTR, handleAttr: HANDLE_ATTR }));
391
+
392
+ if (!injected || injected === 0) continue;
393
+
394
+ // Only the main page can drive CDP; for frames, AX is reachable from
395
+ // the same root tree fetched on the main page (it includes all frame
396
+ // subtrees). Skip the per-frame fetch.
397
+ if (!isMain) {
398
+ // For frames, just enrich DOM — we'll re-run reconcile after the
399
+ // main-page AX fetch. Stash the frame's DOM data only.
400
+ // (Simpler: skip frames entirely in v1 — the main-frame AX tree
401
+ // doesn't include cross-realm frame nodes anyway.)
402
+ continue;
403
+ }
404
+
405
+ const axNodes = await fetchAxTree(page);
406
+ const axFields = reconcile(axNodes);
407
+ const enriched = await enrichFromDom(page, axFields);
408
+ allFields.push(...enriched);
409
+ } catch (e) {
410
+ console.warn(`[scanAx] Frame scan failed: ${e.message}`);
411
+ }
412
+ }
413
+
414
+ // Normalize, dedupe by mmid (frames could collide, though we offset above)
415
+ const seen = new Set();
416
+ const out = [];
417
+ for (const f of allFields) {
418
+ if (seen.has(f.mmid)) continue;
419
+ seen.add(f.mmid);
420
+ const label = pickLabel(f);
421
+ // Filter noise: no label AND no visible role → skip
422
+ if (!label && !['button', 'link'].includes(normalizeRole(f))) continue;
423
+ out.push({
424
+ mmid: f.mmid,
425
+ role: normalizeRole(f),
426
+ label,
427
+ description: pickDescription(f),
428
+ required: f.required,
429
+ disabled: f.disabled,
430
+ options: f.options ? f.options.map((o) => o.label) : null,
431
+ selectorHint: f.selectorHint,
432
+ inputType: f.inputType,
433
+ currentValue: f.currentValue,
434
+ isTypeahead: f.isTypeahead,
435
+ groupLabel: f.groupLabel,
436
+ // Forward the raw text content for buttons/links the planner might click
437
+ textContent: f.textContent,
438
+ // Already-filled hint so the planner can skip
439
+ filledAlready: !!(f.currentValue && f.currentValue.length > 0),
440
+ });
441
+ }
442
+
443
+ console.log(`[scanAx] ${out.length} fields detected (${out.filter((f) => f.role === 'textbox').length} text, ${out.filter((f) => f.role === 'combobox').length} dropdowns, ${out.filter((f) => f.role === 'file_upload').length} files, ${out.filter((f) => f.role === 'button').length} buttons)`);
444
+ return out;
445
+ }
446
+
447
+ module.exports = { scanAccessibility };
package/scanPage.js CHANGED
@@ -268,6 +268,21 @@ async function scanGreenhouse(page) {
268
268
  if (f.id && ghSystemIds[f.id]) {
269
269
  return { ...f, category: f.id === 'resume' || f.id === 'cover_letter' ? 'file:' + f.id : 'profile:' + ghSystemIds[f.id] };
270
270
  }
271
+ // File inputs — Greenhouse's modern job-boards UI labels both file
272
+ // inputs as "Attach" (the visible button text), not "Resume" or
273
+ // "Cover Letter". Distinguish by name= attribute instead.
274
+ // resume: name="resume" or name="job_application[resume]"
275
+ // cover ltr: name="cover_letter" or name="job_application[cover_letter]"
276
+ if (f.inputType === 'file') {
277
+ const n = (f.name || '').toLowerCase();
278
+ if (n.includes('cover_letter') || n.includes('coverletter')) return { ...f, category: 'file:cover_letter' };
279
+ if (n.includes('resume') || n.includes('cv')) return { ...f, category: 'file:resume' };
280
+ // Order on the page: first file input is resume, second is cover letter.
281
+ // Without name/id signals, fall back to ordering. We mark unknowns
282
+ // 'file:resume' here and re-tag the second occurrence below after
283
+ // the map (where we have the whole list).
284
+ return { ...f, category: 'file:unknown' };
285
+ }
271
286
  // Custom question fields: id like question_12345678
272
287
  if (f.id && /^question_\d+$/.test(f.id)) {
273
288
  return { ...f, category: 'custom' };
@@ -281,7 +296,17 @@ async function scanGreenhouse(page) {
281
296
  return { ...f, category: 'profile:' + f.id.split('--')[0] };
282
297
  }
283
298
  return { ...f, category: classifyField(f) };
284
- }).filter(f => f.category !== 'ignore' && f.inputType !== 'hidden');
299
+ }).reduce((acc, f) => {
300
+ // Position-based resolution for ambiguous file inputs: first unknown
301
+ // file → resume, second → cover_letter. Greenhouse always renders
302
+ // resume above cover letter; the DOM walk preserves that order.
303
+ if (f.category === 'file:unknown') {
304
+ const sawResume = acc.some(p => p.category === 'file:resume');
305
+ f.category = sawResume ? 'file:cover_letter' : 'file:resume';
306
+ }
307
+ acc.push(f);
308
+ return acc;
309
+ }, []).filter(f => f.category !== 'ignore' && f.inputType !== 'hidden');
285
310
  }
286
311
 
287
312
  /**
package/smartFill.js ADDED
@@ -0,0 +1,362 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * The smart fill loop. One round per page:
5
+ * 1. Scan the page via scanAccessibility (AX tree + mmid injection).
6
+ * 2. POST those fields to /smartfill/plan-fill with canonical facts.
7
+ * 3. Execute the returned action plan deterministically.
8
+ *
9
+ * Replaces the old fillFields/fillLocator stack. The action executor here
10
+ * is the ONLY place that touches the DOM — every "how do I fill X" question
11
+ * lives in one switch statement instead of being spread across filler.js,
12
+ * scanPage.js, and the orchestrator.
13
+ *
14
+ * Falls back to legacy fillFields when /smartfill/plan-fill returns 502
15
+ * (LLM down, parser bug, etc.) — so a planner outage degrades to the old
16
+ * agent rather than freezing.
17
+ */
18
+
19
+ const { scanAccessibility } = require('./scanAccessibility');
20
+ const { fillFields: legacyFillFields } = require('./filler');
21
+
22
+ // ── Plan executor ───────────────────────────────────────────────────────────
23
+
24
+ /**
25
+ * Execute ONE plan entry. Returns:
26
+ * { ok: true, reason: '...' } on success
27
+ * { ok: false, reason: '...' } on failure (executor will continue with next)
28
+ * { ok: 'ask_user', reason } when planner asked us to surface
29
+ */
30
+ async function executePlanItem(page, item, fieldByMmid, ctx) {
31
+ const field = fieldByMmid.get(item.mmid);
32
+ if (!field) return { ok: false, reason: 'mmid vanished' };
33
+
34
+ const labelShort = (field.label || field.selectorHint || '?').slice(0, 50);
35
+
36
+ // Re-locate the element via the mmid attribute (the executor's anchor).
37
+ // Falls back to selectorHint if mmid was wiped (rare; happens on full
38
+ // re-renders between scan and execute).
39
+ let locator = page.locator(`[mmid="${item.mmid}"]`).first();
40
+ let visible = await locator.isVisible({ timeout: 800 }).catch(() => false);
41
+ if (!visible && field.selectorHint) {
42
+ locator = page.locator(field.selectorHint).first();
43
+ visible = await locator.isVisible({ timeout: 800 }).catch(() => false);
44
+ }
45
+ if (!visible) {
46
+ return { ok: false, reason: `element not visible (mmid=${item.mmid})` };
47
+ }
48
+
49
+ switch (item.action) {
50
+ case 'skip':
51
+ return { ok: true, reason: `skip: ${item.reasoning || 'planner skipped'}` };
52
+
53
+ case 'ask_user':
54
+ return { ok: 'ask_user', reason: item.reasoning || 'planner requested human input' };
55
+
56
+ case 'type': {
57
+ if (!item.value) return { ok: true, reason: 'skip: empty value' };
58
+ // Typeahead path: open suggestion list, pick first match.
59
+ if (field.isTypeahead) {
60
+ return await typeAndPickSuggestion(page, locator, item.value);
61
+ }
62
+ return await reactSafeType(page, locator, item.value);
63
+ }
64
+
65
+ case 'select_option': {
66
+ // Native <select>. value is the option LABEL (model was instructed to
67
+ // pick from the options[] list).
68
+ try {
69
+ await locator.selectOption({ label: item.value }).catch(async () => {
70
+ await locator.selectOption({ value: item.value });
71
+ });
72
+ return { ok: true, reason: `select: ${item.value}` };
73
+ } catch (e) {
74
+ return { ok: false, reason: `select failed: ${e.message}` };
75
+ }
76
+ }
77
+
78
+ case 'click_option': {
79
+ // Custom dropdown: click trigger, find option by text, click it.
80
+ try {
81
+ await locator.click({ timeout: 2500 });
82
+ await page.waitForTimeout(300);
83
+ // Look for an option whose visible text equals the planner's pick.
84
+ const optionSel = '[role="option"], [role="menuitem"], .select__option, li[class*="option"]';
85
+ const opts = page.locator(optionSel);
86
+ const count = await opts.count().catch(() => 0);
87
+ if (count === 0) return { ok: false, reason: 'dropdown opened but no options' };
88
+ const texts = await opts.allTextContents().catch(() => []);
89
+ const v = String(item.value).toLowerCase().trim();
90
+ let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
91
+ if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().includes(v) || v.includes(t.toLowerCase()));
92
+ if (idx === -1) {
93
+ await page.keyboard.press('Escape').catch(() => {});
94
+ return { ok: false, reason: `option "${item.value}" not in dropdown (have: ${texts.slice(0, 5).join(', ')})` };
95
+ }
96
+ await opts.nth(idx).click({ timeout: 2000 });
97
+ return { ok: true, reason: `clicked option: ${texts[idx]}` };
98
+ } catch (e) {
99
+ await page.keyboard.press('Escape').catch(() => {});
100
+ return { ok: false, reason: `click_option failed: ${e.message}` };
101
+ }
102
+ }
103
+
104
+ case 'set_checkbox': {
105
+ try {
106
+ const wantChecked = String(item.value).toLowerCase() === 'true';
107
+ const isChecked = await locator.isChecked().catch(() => false);
108
+ if (wantChecked !== isChecked) await locator.click({ timeout: 2000 });
109
+ return { ok: true, reason: `checkbox: ${wantChecked}` };
110
+ } catch (e) {
111
+ return { ok: false, reason: `checkbox failed: ${e.message}` };
112
+ }
113
+ }
114
+
115
+ case 'set_radio': {
116
+ // Radios identified by mmid point to ONE option; the planner's value is
117
+ // the label of the right one. Walk the radiogroup and click the match.
118
+ try {
119
+ // Strategy 1: native radio with name= — find by name + label match.
120
+ const name = field.name;
121
+ if (name) {
122
+ const group = page.locator(`input[type="radio"][name="${name}"]`);
123
+ const gc = await group.count();
124
+ for (let i = 0; i < gc; i++) {
125
+ const rad = group.nth(i);
126
+ const id = await rad.getAttribute('id').catch(() => null);
127
+ let lbl = '';
128
+ if (id) {
129
+ lbl = (await page.locator(`label[for="${id}"]`).first().textContent().catch(() => '') || '').trim();
130
+ }
131
+ if (!lbl) lbl = (await rad.evaluate(el => el.value || '').catch(() => '') || '');
132
+ if (lbl.toLowerCase().includes(String(item.value).toLowerCase())) {
133
+ await rad.check({ force: true, timeout: 1500 }).catch(() => rad.click({ force: true }));
134
+ return { ok: true, reason: `radio: ${lbl}` };
135
+ }
136
+ }
137
+ }
138
+ // Strategy 2: just click the labeled option directly (custom radios).
139
+ const v = String(item.value);
140
+ const opt = page.locator(`[role="radio"]:has-text("${v}"), label:has-text("${v}")`).first();
141
+ if (await opt.isVisible({ timeout: 800 }).catch(() => false)) {
142
+ await opt.click({ timeout: 1500 });
143
+ return { ok: true, reason: `radio (label): ${v}` };
144
+ }
145
+ return { ok: false, reason: `radio option "${item.value}" not found` };
146
+ } catch (e) {
147
+ return { ok: false, reason: `radio failed: ${e.message}` };
148
+ }
149
+ }
150
+
151
+ case 'upload_file': {
152
+ const which = String(item.value).toLowerCase().trim();
153
+ const path = which === 'cover_letter'
154
+ ? ctx.coverLetterPath
155
+ : ctx.resumePath;
156
+ if (!path) return { ok: false, reason: `no ${which} file available` };
157
+ try {
158
+ // Unhide so setInputFiles doesn't reject a 0x0 element
159
+ await locator.evaluate((el) => {
160
+ el.style.display = 'block';
161
+ el.style.opacity = '1';
162
+ el.style.visibility = 'visible';
163
+ el.style.width = '1px';
164
+ el.style.height = '1px';
165
+ }).catch(() => {});
166
+ await locator.setInputFiles(path, { timeout: 6000 });
167
+ return { ok: true, reason: `uploaded ${which}` };
168
+ } catch (e) {
169
+ return { ok: false, reason: `upload failed: ${e.message}` };
170
+ }
171
+ }
172
+
173
+ default:
174
+ return { ok: false, reason: `unknown action: ${item.action}` };
175
+ }
176
+ }
177
+
178
+ // ── Strategy helpers (shared by type + typeahead) ────────────────────────────
179
+
180
+ /**
181
+ * React-safe text fill. Same ladder as the legacy filler — fill, then
182
+ * keyboard.type, then native-setter — but with verify-after-write to catch
183
+ * React rejecting our value.
184
+ */
185
+ async function reactSafeType(page, locator, value) {
186
+ const v = String(value);
187
+ try {
188
+ await locator.fill(v, { timeout: 4000 });
189
+ const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
190
+ if (got && got.trim()) return { ok: true, reason: `typed (fill): "${v.slice(0, 30)}"` };
191
+ } catch {}
192
+ try {
193
+ await locator.click({ timeout: 1500 });
194
+ await locator.fill('', { timeout: 1500 }).catch(() => {});
195
+ await page.keyboard.type(v, { delay: 20 });
196
+ const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
197
+ if (got && got.trim()) return { ok: true, reason: `typed (keyboard): "${v.slice(0, 30)}"` };
198
+ } catch {}
199
+ try {
200
+ await locator.evaluate((el, val) => {
201
+ el.focus();
202
+ const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
203
+ const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
204
+ if (setter) setter.call(el, val); else el.value = val;
205
+ el.dispatchEvent(new Event('input', { bubbles: true }));
206
+ el.dispatchEvent(new Event('change', { bubbles: true }));
207
+ el.blur();
208
+ }, v);
209
+ const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
210
+ if (got && got.trim()) return { ok: true, reason: `typed (native): "${v.slice(0, 30)}"` };
211
+ } catch {}
212
+ return { ok: false, reason: 'all type strategies returned empty value' };
213
+ }
214
+
215
+ /**
216
+ * Type-and-pick for typeahead inputs (Greenhouse Location etc).
217
+ */
218
+ async function typeAndPickSuggestion(page, locator, value) {
219
+ try {
220
+ await locator.click({ timeout: 2000 });
221
+ await locator.press('Meta+A').catch(() => locator.press('Control+A')).catch(() => {});
222
+ await locator.press('Delete').catch(() => {});
223
+ const firstChunk = String(value).split(/[,;]/)[0].trim();
224
+ await page.keyboard.type(firstChunk, { delay: 60 });
225
+ await page.waitForTimeout(700);
226
+ const optionSel = '[role="option"], [role="listbox"] li, .select__option, ul[class*="autocomplete"] li';
227
+ const opts = page.locator(optionSel);
228
+ const count = await opts.count().catch(() => 0);
229
+ if (count === 0) {
230
+ // Some fields accept the typed value directly. Verify.
231
+ const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
232
+ if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
233
+ return { ok: false, reason: 'typeahead opened no suggestions' };
234
+ }
235
+ const texts = await opts.allTextContents().catch(() => []);
236
+ const v = firstChunk.toLowerCase();
237
+ let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
238
+ if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().trim().startsWith(v));
239
+ if (idx === -1) idx = 0;
240
+ await opts.nth(idx).click({ timeout: 2000 });
241
+ await page.waitForTimeout(200);
242
+ return { ok: true, reason: `typeahead picked: ${texts[idx]}` };
243
+ } catch (e) {
244
+ return { ok: false, reason: `typeahead failed: ${e.message}` };
245
+ }
246
+ }
247
+
248
+ // ── Main entry: scan + plan + execute one round on the current page ──────────
249
+
250
+ /**
251
+ * @param {Page} page
252
+ * @param {object} aep — the agent execution packet (carries facts)
253
+ * @param {object} options
254
+ * @param {object} options.config — agent config (apiUrl, token)
255
+ * @param {string} options.jobId
256
+ * @param {string} options.resumePath
257
+ * @param {string|null} options.coverLetterPath
258
+ * @param {object} options.ctx — fill context (answeredFields map etc)
259
+ * @returns {Promise<{filled, skipped, failed, askUserReasons, planned}>}
260
+ */
261
+ async function smartFillPage(page, aep, options) {
262
+ const { config, jobId, resumePath, coverLetterPath, ctx } = options;
263
+
264
+ // 1. Scan via AX tree
265
+ const fields = await scanAccessibility(page).catch((e) => {
266
+ console.warn(`[smartFill] scanAccessibility failed: ${e.message}`);
267
+ return [];
268
+ });
269
+ if (fields.length === 0) {
270
+ console.warn('[smartFill] no fields detected on page');
271
+ return { filled: 0, skipped: 0, failed: 0, askUserReasons: [], planned: 0, fallback: false };
272
+ }
273
+
274
+ // 2. Build already_filled snapshot from ctx for the planner
275
+ const alreadyFilled = [];
276
+ if (ctx && ctx.answeredFields) {
277
+ for (const [label, info] of ctx.answeredFields) {
278
+ alreadyFilled.push({ label, value: String(info.value || '').slice(0, 80) });
279
+ }
280
+ }
281
+
282
+ // 3. Request the plan
283
+ let plan = null;
284
+ try {
285
+ const res = await fetch(`${config.apiUrl}/smartfill/plan-fill`, {
286
+ method: 'POST',
287
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
288
+ body: JSON.stringify({ job_id: jobId, fields, already_filled: alreadyFilled }),
289
+ });
290
+ if (res.ok) {
291
+ const j = await res.json();
292
+ plan = Array.isArray(j.plan) ? j.plan : null;
293
+ } else {
294
+ console.warn(`[smartFill] plan-fill ${res.status}; falling back to legacy filler`);
295
+ }
296
+ } catch (e) {
297
+ console.warn(`[smartFill] plan-fill threw: ${e.message}; falling back`);
298
+ }
299
+
300
+ if (!plan) {
301
+ // Planner unavailable — degrade gracefully.
302
+ const legacyResult = await legacyFillFields(page, aep, {
303
+ ats: options.ats || 'generic',
304
+ ctx,
305
+ config,
306
+ jobId,
307
+ }).catch(() => ({ filled: 0, skipped: 0, failed: 0 }));
308
+ return { ...legacyResult, askUserReasons: [], planned: 0, fallback: true };
309
+ }
310
+
311
+ console.log(`[smartFill] planner returned ${plan.length} actions for ${fields.length} fields`);
312
+
313
+ // 4. Execute the plan in DOM order (planner doesn't dictate order; we
314
+ // mirror the field scan order so dependent fields fill after their parents).
315
+ const fieldByMmid = new Map(fields.map((f) => [String(f.mmid), f]));
316
+ const planByMmid = new Map(plan.map((p) => [String(p.mmid), p]));
317
+ let filled = 0, skipped = 0, failed = 0;
318
+ const askUserReasons = [];
319
+
320
+ for (const f of fields) {
321
+ const item = planByMmid.get(String(f.mmid));
322
+ if (!item) {
323
+ // Planner didn't include this field — skip silently (planner was told
324
+ // to return EVERY field, but if it missed one, treat as a skip).
325
+ continue;
326
+ }
327
+ const labelShort = (f.label || f.selectorHint || '?').slice(0, 50);
328
+ const result = await executePlanItem(page, item, fieldByMmid, { resumePath, coverLetterPath });
329
+ if (result.ok === 'ask_user') {
330
+ askUserReasons.push(`${labelShort}: ${result.reason}`);
331
+ console.warn(`[smartFill] ASK USER: "${labelShort}" — ${result.reason}`);
332
+ failed += 1;
333
+ continue;
334
+ }
335
+ if (result.ok) {
336
+ if (item.action === 'skip') {
337
+ console.log(`[smartFill] skip "${labelShort}" — ${result.reason}`);
338
+ skipped += 1;
339
+ } else {
340
+ console.log(`[smartFill] ${item.action} "${labelShort}" — ${result.reason}${item.reasoning ? ` (why: ${item.reasoning.slice(0, 80)})` : ''}`);
341
+ filled += 1;
342
+ // Track in ctx for cross-page dedup
343
+ if (ctx && ctx.answeredFields) {
344
+ ctx.answeredFields.set(f.label || f.selectorHint, {
345
+ value: item.value,
346
+ pageIndex: ctx.currentPageIndex || 0,
347
+ source: `planner:${item.action}`,
348
+ });
349
+ }
350
+ }
351
+ // Small jitter between actions so we don't slam the DOM
352
+ await page.waitForTimeout(80 + Math.random() * 120);
353
+ } else {
354
+ console.warn(`[smartFill] FAIL "${labelShort}" — ${result.reason}`);
355
+ failed += 1;
356
+ }
357
+ }
358
+
359
+ return { filled, skipped, failed, askUserReasons, planned: plan.length, fallback: false };
360
+ }
361
+
362
+ module.exports = { smartFillPage };