npm - halo-agent - Versions diffs - 1.3.6 → 2.0.1 - Mend

halo-agent 1.3.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/orchestrator.js CHANGED Viewed

@@ -11,7 +11,43 @@
 const os = require('os');
 const path = require('path');
 const fs = require('fs');
-const { fillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
+const { fillFields: legacyFillFields, uploadFile, findNextButton, findSubmitButton, waitForStableDOM, snapshotFieldLabels } = require('./filler');
+const { smartFillPage } = require('./smartFill');
+// Switchable filler — smart by default, can be killed via config.useSmartFill=false.
+// smartFill.js internally falls back to legacyFillFields if /smartfill/plan-fill
+// is unavailable, so a planner outage doesn't break us — but this flag is the
+// hard kill switch if smart mode is misbehaving on a specific user / form.
+async function fillFields(page, aep, opts) {
+  const config = opts?.config;
+  const smartEnabled = config?.useSmartFill !== false; // default true
+  if (!smartEnabled) {
+    return await legacyFillFields(page, aep, opts);
+  }
+  const result = await smartFillPage(page, aep, {
+    config,
+    jobId: opts.jobId,
+    resumePath: aep.__resumeLocalPath || null,
+    coverLetterPath: aep.__coverLetterLocalPath || null,
+    ctx: opts.ctx,
+    ats: opts.ats,
+  }).catch(async (e) => {
+    console.warn(`[orchestrator] smartFillPage threw: ${e.message} — falling back to legacy`);
+    return await legacyFillFields(page, aep, opts);
+  });
+  // Normalize the result shape (smart returns askUserReasons, planned, fallback;
+  // legacy returns needsAI). The orchestrator only reads filled/skipped/failed/
+  // needsAI downstream, so we preserve needsAI as empty when smart succeeded.
+  return {
+    filled: result.filled || 0,
+    skipped: result.skipped || 0,
+    failed: result.failed || 0,
+    needsAI: result.needsAI || [],
+    askUserReasons: result.askUserReasons || [],
+    plannedActions: result.planned || 0,
+    fellBackToLegacy: !!result.fallback,
+  };
+}
 const { detectCaptcha, solveCaptcha, injectCaptchaToken } = require('./captcha');
 const { visionFill, visionNavigateAndSubmit, visionFillSkipped } = require('./vision');
@@ -67,6 +103,8 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
     if (aep.recommended_resume?.pdf_presigned_url) {
       tempResumeFile = await downloadResume(aep.recommended_resume.pdf_presigned_url);
     }
+    // Expose resume path on aep so smartFill's upload_file action can route it.
+    aep.__resumeLocalPath = tempResumeFile;
     // Download cover-letter PDF too — separate file because Greenhouse/Ashby
     // have separate file inputs for each. Only present when the user
@@ -76,8 +114,8 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
       tempCoverLetterFile = await downloadResume(aep.cover_letter_pdf.pdf_presigned_url);
       if (tempCoverLetterFile) console.log('[orchestrator] Cover letter PDF downloaded');
     }
-    // Expose to fillFields via aep so the file:cover_letter category resolver
-    // can hand it to the uploader.
+    // Expose to fillFields via aep so smartFill's upload_file action can
+    // route it (legacy filler also reads this for file:cover_letter).
     aep.__coverLetterLocalPath = tempCoverLetterFile;
     // Check for an existing checkpoint — if a previous run got past page 1
@@ -331,15 +369,46 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
       });
       if (visionResult.submitted) {
-        // Vision already submitted — we're done
+        // Vision THINKS it submitted, but we shouldn't trust that without
+        // verifying — vision can confuse "review page rendered" with
+        // "application accepted." Route through the same verify-submit
+        // gate everything else uses. Worst case → REVIEWING, user clicks
+        // Submit. Better than a false-positive DONE.
         const confirmShot = await page.screenshot({ type: 'jpeg', quality: 70 }).catch(() => null);
         const confirmKey = confirmShot ? await uploadScreenshot(config, confirmShot, `confirm_${queueId}.jpg`) : null;
-        await reportStatus('DONE', {
-          confirmation_screenshot_r2_key: confirmKey || null,
+        const verdictUrl = page.url();
+        let vVerdict = { submitted: null, error_message: null, source: 'unavailable' };
+        try {
+          const vRes = await fetch(`${config.apiUrl}/agent/verify-submit`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
+            body: JSON.stringify({ queue_id: queueId, page_url: verdictUrl }),
+          });
+          if (vRes.ok) vVerdict = await vRes.json();
+        } catch {}
+        if (vVerdict.submitted === true) {
+          await reportStatus('DONE', { confirmation_screenshot_r2_key: confirmKey || null, fields_filled: cumulativeFilled });
+          await clearCheckpoint(config, queueId);
+          console.log(`[orchestrator] Done via vision (verified): ${queueItem.company} - ${queueItem.title}`);
+          return;
+        }
+        if (vVerdict.submitted === false) {
+          await reportStatus('NEEDS_ATTENTION', {
+            review_screenshot_r2_key: confirmKey || null,
+            needs_attention_reason: `Vision submitted but ATS rejected: ${vVerdict.error_message || 'unknown'}`,
+            intervention_type: 'submit_failed',
+            step: 'VERIFY',
+            step_detail: (vVerdict.error_message || '').slice(0, 200),
+            fields_filled: cumulativeFilled,
+          });
+          throw new Error(`Vision-submit failed verification: ${vVerdict.error_message || 'unknown'}`);
+        }
+        await reportStatus('REVIEWING', {
+          review_screenshot_r2_key: confirmKey || null,
+          step: 'REVIEWING',
+          step_detail: 'Vision attempted submit — verifier unavailable, please eyeball',
           fields_filled: cumulativeFilled,
         });
-        await clearCheckpoint(config, queueId);
-        console.log(`[orchestrator] Done via vision: ${queueItem.company} - ${queueItem.title}`);
         return;
       }
@@ -436,31 +505,22 @@ async function runJob(queueItem, chromeConn, config, reportStatus) {
     }
     if (verdict.submitted === null) {
-      // Auto-submit mode means "don't ask me, just submit." If we can't
-      // verify but the user opted into hands-off, trust the click and
-      // mark DONE (the screenshot is the receipt; user can audit later).
-      // Without this, autoSubmit was silently being ignored every time
-      // Firecrawl was slow/down — exactly the case user hit.
-      const autoSubmit = config.autoSubmit || aep.agent_config?.auto_submit;
-      if (autoSubmit) {
-        console.log(`[orchestrator] Could not verify (source: ${verdict.source}) — auto-submit ON, trusting click.`);
-        await reportStatus('DONE', {
-          confirmation_screenshot_r2_key: confirmKey || null,
-          fields_filled: cumulativeFilled,
-        });
-        await clearCheckpoint(config, queueId);
-        console.log(`[orchestrator] Done (auto-submit, unverified): ${queueItem.company} - ${queueItem.title}`);
-        return;
-      }
-      console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). Sending to REVIEWING for your eyeball.`);
+      // EARLIER VERSION: when auto-submit was ON, we trusted the click and
+      // marked DONE. That was wrong — it produced false-positive submissions
+      // (applied=true in DB, no actual application sent). Auto-submit means
+      // "don't make me click Submit on the dashboard" — it does NOT mean
+      // "lie about delivery."
+      //
+      // Honest behavior: unverified == REVIEWING regardless of auto-submit.
+      // The screenshot is right there in the dashboard, one click confirms.
+      // Better to over-ask than to ghost-apply.
+      console.warn(`[orchestrator] Could not verify submission (source: ${verdict.source}). REVIEWING — please eyeball the screenshot + click Submit.`);
       await reportStatus('REVIEWING', {
         review_screenshot_r2_key: confirmKey || null,
         step: 'REVIEWING',
-        step_detail: 'Could not auto-verify — please confirm the submit',
+        step_detail: `Submit clicked at ${verdictUrl.slice(0, 100)} — verifier unavailable, please confirm`,
         fields_filled: cumulativeFilled,
       });
-      // Stop here; user clicks Submit on dashboard → /apply-queue/submit/:id
-      // will flip to DONE. Don't return — let the function return naturally.
       return;
     }
@@ -1020,6 +1080,8 @@ async function runExtensionFill({
   if (aep.recommended_resume?.pdf_presigned_url) {
     tempResumeFile = await downloadResume(aep.recommended_resume.pdf_presigned_url);
   }
+  // Expose for smartFill upload_file action (and legacy gate uploader)
+  aep.__resumeLocalPath = tempResumeFile;
   const ctx = createFormContext();
   const useVisionForThis = useVision && VISION_ATS.has((ats_type || '').toLowerCase());

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "halo-agent",
-  "version": "1.3.6",
+  "version": "2.0.1",
   "description": "HALO local apply agent — auto-fills job applications using your real Chrome session",
   "main": "index.js",
   "bin": {
@@ -22,6 +22,8 @@
     "localServer.js",
     "filler.js",
     "scanPage.js",
+    "scanAccessibility.js",
+    "smartFill.js",
     "captcha.js",
     "vision.js",
     "manusAutomate.js",

package/scanAccessibility.js ADDED Viewed

@@ -0,0 +1,447 @@
+'use strict';
+/**
+ * Accessibility-tree-based form scanner.
+ *
+ * Replaces the per-ATS regex scanners (scanGreenhouse, scanLever, scanAshby,
+ * scanWorkday, scanICIMS, scanGeneric) with a single uniform pass that works
+ * on any form because it reads what the browser tells screen readers.
+ *
+ * How:
+ *   1. INJECT mmid="N" attribute on every interactive DOM element (input,
+ *      textarea, select, [contenteditable], [role=combobox|listbox|radio|
+ *      checkbox|button|option], <a>). The mmid is a stable handle the LLM
+ *      planner refers to when it returns an action plan.
+ *   2. FETCH the accessibility tree via Chrome DevTools Protocol's
+ *      Accessibility.getFullAXTree. This returns the same tree
+ *      screen readers see — already deduped, already labeled, already
+ *      grouped by semantic role. Way cleaner than walking the DOM.
+ *   3. RECONCILE: walk the AX tree, find nodes carrying our injected mmid
+ *      (via aria-keyshortcuts — we steal that attribute because it surfaces
+ *      verbatim in the AX tree.name|description and is otherwise unused).
+ *      For each AX node, read its name/role/description/options/required
+ *      flags, then enrich with DOM-only signals (typeahead heuristics, the
+ *      raw input type, the parent label text for grouped fields).
+ *   4. PRUNE: drop hidden, decorative, or already-filled fields. Return
+ *      a flat list the LLM planner can reason over in one prompt.
+ *
+ * Why this beats per-ATS scanners:
+ *   - Works on any ATS (Greenhouse, Lever, Ashby, Workday, iCIMS) AND any
+ *     unknown ATS without code changes — the AX tree is the same shape.
+ *   - Picks up labels the DOM scanner missed (aria-labelledby chains,
+ *     fieldset/legend grouping, parent-walk heuristics) because the
+ *     browser already did all that work for screen readers.
+ *   - Deduplicates radio/checkbox groups into a single fieldset entry
+ *     (the AX tree has 'radiogroup' role).
+ *
+ * Output shape (one entry per fillable field):
+ *   {
+ *     mmid: "12",                          // stable handle for the planner
+ *     role: "textbox" | "combobox" | "checkbox" | "radio" | "button" |
+ *           "listbox" | "menu" | "file_upload" | "textarea",
+ *     label: "First Name",                 // primary name from AX tree
+ *     description: "Required",             // helper / placeholder / hint
+ *     required: true,
+ *     options: ["Male","Female",...] | null,
+ *     selectorHint: "#first_name",         // best CSS selector we could build
+ *     inputType: "text",                   // HTML type= for inputs
+ *     value: "" | "Male",                  // current filled value
+ *     isTypeahead: false,                  // pure DOM heuristic
+ *     groupLabel: null | "Pronouns",       // fieldset/legend grouping
+ *     filledAlready: false,
+ *   }
+ */
+const MMID_ATTR = 'mmid';
+const HANDLE_ATTR = 'aria-keyshortcuts'; // we hijack this — surfaces in AX tree
+// ── Step 1: Inject mmid into every interactive element ───────────────────────
+async function injectMmid(page) {
+  // Returns the count so the caller can sanity-check that injection happened.
+  return await page.evaluate(({ mmidAttr, handleAttr }) => {
+    const sel = [
+      'input',
+      'textarea',
+      'select',
+      '[contenteditable="true"]',
+      '[role="textbox"]',
+      '[role="combobox"]',
+      '[role="listbox"]',
+      '[role="radiogroup"]',
+      '[role="radio"]',
+      '[role="checkbox"]',
+      '[role="switch"]',
+      '[role="button"]',
+      '[role="option"]',
+      '[role="menuitem"]',
+      'button[type="submit"]',
+      'a[href]',
+    ].join(',');
+    const all = document.querySelectorAll(sel);
+    let n = 0;
+    all.forEach((el) => {
+      n += 1;
+      const id = String(n);
+      el.setAttribute(mmidAttr, id);
+      // Stash the original aria-keyshortcuts so we don't destroy real a11y data
+      const prev = el.getAttribute(handleAttr);
+      if (prev && !el.hasAttribute('data-orig-aria-keyshortcuts')) {
+        el.setAttribute('data-orig-aria-keyshortcuts', prev);
+      }
+      el.setAttribute(handleAttr, id);
+    });
+    return n;
+  }, { mmidAttr: MMID_ATTR, handleAttr: HANDLE_ATTR });
+}
+// ── Step 2: Fetch the full AX tree via CDP ───────────────────────────────────
+async function fetchAxTree(page) {
+  // Open a CDP session — Playwright exposes this via context.newCDPSession.
+  // We need the page's context, which lives on the browser context.
+  const session = await page.context().newCDPSession(page);
+  try {
+    await session.send('Accessibility.enable');
+    const { nodes } = await session.send('Accessibility.getFullAXTree');
+    return nodes;
+  } finally {
+    try { await session.detach(); } catch {}
+  }
+}
+// ── Step 3: Reconcile AX tree → flat list keyed by mmid ──────────────────────
+/**
+ * Pull a flat-string value from an AX property bag. AX values come as
+ * { type, value } objects; we just want the value (string|bool).
+ */
+function axProp(node, key) {
+  // Top-level fields like 'name' / 'role' have { type, value } shape.
+  if (node?.[key]?.value !== undefined) return node[key].value;
+  // 'properties' is an array of { name, value: {type, value} }
+  if (Array.isArray(node?.properties)) {
+    const p = node.properties.find((x) => x.name === key);
+    if (p?.value?.value !== undefined) return p.value.value;
+  }
+  return undefined;
+}
+/**
+ * Some labels arrive as the literal mmid we injected (when an element's
+ * name resolves to its own aria-keyshortcuts via labelledby chains).
+ * Strip those — they're our injection, not real labels.
+ */
+function isMmidLiteral(s) {
+  return typeof s === 'string' && /^\d+$/.test(s.trim());
+}
+function reconcile(axNodes) {
+  // Build a map mmid → AX node and mmid → parent group label.
+  // The AX tree is a flat list of nodes with parentId/childIds, so we
+  // first build a parent index then walk to find ancestor radiogroup
+  // / fieldset names for grouped fields.
+  const byId = new Map();
+  for (const n of axNodes) byId.set(n.nodeId, n);
+  // For each node, find the mmid (lives in keyshortcuts because we hijacked it)
+  // and the closest ancestor whose role is 'radiogroup' / 'group' / 'form'.
+  const out = [];
+  for (const n of axNodes) {
+    const mmid = axProp(n, 'keyshortcuts');
+    if (!mmid || !/^\d+$/.test(String(mmid))) continue;
+    const role = (axProp(n, 'role') || '').toString();
+    // Skip nodes that are containers/decorative — we want fillable leaves.
+    // Buttons / links we keep so the planner can decide to click them
+    // (Submit, Next, etc.).
+    if (['generic', 'none', 'presentation'].includes(role)) continue;
+    // Walk up to find a grouping label
+    let groupLabel = null;
+    let cursor = n;
+    let hops = 0;
+    while (cursor && hops < 10) {
+      const parentId = cursor.parentId;
+      if (!parentId) break;
+      const parent = byId.get(parentId);
+      if (!parent) break;
+      const parentRole = (axProp(parent, 'role') || '').toString();
+      if (['radiogroup', 'group', 'form'].includes(parentRole)) {
+        const pname = axProp(parent, 'name');
+        if (pname && !isMmidLiteral(pname)) {
+          groupLabel = String(pname).trim();
+          break;
+        }
+      }
+      cursor = parent;
+      hops += 1;
+    }
+    out.push({
+      mmid: String(mmid),
+      role,
+      // Strip mmid-literal names — that happens when the element has no real
+      // accessible name and the AX tree falls back to our hijacked attr.
+      rawName: (() => {
+        const v = axProp(n, 'name');
+        return v && !isMmidLiteral(v) ? String(v).trim() : '';
+      })(),
+      description: (() => {
+        const v = axProp(n, 'description');
+        return v && !isMmidLiteral(v) ? String(v).trim() : '';
+      })(),
+      required: !!axProp(n, 'required'),
+      disabled: !!axProp(n, 'disabled'),
+      focused: !!axProp(n, 'focused'),
+      checked: axProp(n, 'checked'),
+      selected: !!axProp(n, 'selected'),
+      // valuetext is what the user "sees" in a filled combobox / spinner.
+      // value is the underlying value when it differs (rare for inputs).
+      axValue: axProp(n, 'value'),
+      groupLabel,
+    });
+  }
+  return out;
+}
+// ── Step 4: Enrich each reconciled field with DOM-only signals ───────────────
+/**
+ * The AX tree tells us what the field IS; DOM tells us what we need to
+ * INTERACT with it. We pull:
+ *   - tag, inputType, name, id (for selector building + classification)
+ *   - currentValue (so we can skip already-filled fields)
+ *   - options[] (for native <select> AND custom comboboxes)
+ *   - typeahead heuristics (aria-autocomplete, role=combobox, "Locate me" sibling)
+ *   - iframeSelector when the field lives inside an iframe (iCIMS)
+ *
+ * One round trip per page — pulls all mmids at once for efficiency.
+ */
+async function enrichFromDom(page, axFields) {
+  const mmids = axFields.map((f) => f.mmid);
+  if (mmids.length === 0) return [];
+  const enriched = await page.evaluate(({ mmids, mmidAttr }) => {
+    function safeText(t) { return (t || '').replace(/\s+/g, ' ').trim(); }
+    function cssEscape(s) {
+      // CSS.escape isn't available in some older contexts; polyfill.
+      try { return CSS.escape(s); } catch { return s.replace(/([!"#$%&'()*+,.\/:;<=>?@\[\\\]^`{|}~])/g, '\\$1'); }
+    }
+    return mmids.map((mmid) => {
+      const el = document.querySelector(`[${mmidAttr}="${mmid}"]`);
+      if (!el) return { mmid, missing: true };
+      const tag = el.tagName.toLowerCase();
+      const type = (el.type || '').toLowerCase();
+      const role = (el.getAttribute('role') || '').toLowerCase();
+      const isContentEditable = el.isContentEditable || false;
+      // Skip non-visible elements UNLESS they're radio/checkbox/file (often
+      // styled-hidden but functional) or option lists in a dropdown.
+      let isVisible = true;
+      if (!['radio', 'checkbox', 'file'].includes(type) && role !== 'option') {
+        const rect = el.getBoundingClientRect();
+        if (rect.width === 0 && rect.height === 0) isVisible = false;
+        if (el.offsetParent === null && !el.closest('[role="dialog"]')) isVisible = false;
+      }
+      // Best selector — used by the executor to re-locate the element.
+      // mmid attribute is the most reliable since we control it.
+      let selectorHint = `[${mmidAttr}="${mmid}"]`;
+      if (el.id) selectorHint = `#${cssEscape(el.id)}`;
+      else if (el.name) selectorHint = `${tag}[name="${el.name.replace(/"/g, '\\"')}"]`;
+      // Current value (skip already-filled)
+      const currentValue = (el.value || (isContentEditable ? safeText(el.innerText) : '') || '').trim();
+      // Native <select> options
+      let options = null;
+      if (tag === 'select' && el.options) {
+        options = Array.from(el.options).map((o) => ({ value: o.value, label: safeText(o.text) })).filter((o) => o.label);
+      }
+      // Typeahead heuristic (Greenhouse Location, Lever City, Workday locations).
+      // Same logic as the old filler — kept here so the planner can reason
+      // about HOW to fill, not just WHAT.
+      const isTypeahead = !!(
+        el.getAttribute('aria-autocomplete') === 'list' ||
+        el.getAttribute('aria-autocomplete') === 'both' ||
+        el.getAttribute('aria-haspopup') === 'listbox' ||
+        el.getAttribute('aria-haspopup') === 'true' ||
+        (el.getAttribute('autocomplete') === 'off' && el.getAttribute('aria-expanded') !== null) ||
+        el.closest('[role="combobox"]') ||
+        Array.from((el.closest('div') || el.parentElement || document).querySelectorAll('button, a')).some((b) => /locate\s*me/i.test(b.textContent || ''))
+      );
+      // Iframe context — if the element is inside an iframe (iCIMS), the
+      // executor needs a frame-aware selector. We can't directly serialize
+      // a frame here, so we report the iframe src for matching later.
+      // (When this scanner runs on a frame's page, the frame itself is the
+      // page context — this only matters for the main-frame run.)
+      const ownerFrame = window.frameElement;
+      const iframeSrc = ownerFrame ? ownerFrame.src || ownerFrame.name || '' : '';
+      return {
+        mmid,
+        tag,
+        inputType: type,
+        role,
+        name: el.name || '',
+        id: el.id || '',
+        ariaLabel: el.getAttribute('aria-label') || '',
+        placeholder: el.placeholder || '',
+        selectorHint,
+        currentValue,
+        options,
+        isTypeahead,
+        iframeSrc,
+        isVisible,
+        isContentEditable,
+        // The element's own text content — for <button> / <a> the AX name
+        // sometimes misses the visible label (icons-only buttons surface
+        // weirdly). Keep as a fallback.
+        textContent: safeText(el.innerText || el.textContent || '').slice(0, 120),
+      };
+    });
+  }, { mmids, mmidAttr: MMID_ATTR });
+  // Merge AX side and DOM side by mmid
+  const domByMmid = new Map(enriched.map((e) => [e.mmid, e]));
+  return axFields.map((ax) => {
+    const dom = domByMmid.get(ax.mmid) || {};
+    return { ...ax, ...dom };
+  }).filter((f) => !f.missing && f.isVisible !== false);
+}
+// ── Step 5: Normalize into the agent-facing schema ───────────────────────────
+/**
+ * Collapse AX role + HTML tag/type into one of our canonical interaction
+ * categories. The LLM planner reasons over THIS field, not the raw role,
+ * because the AX tree has 20+ roles but the agent only knows 7 ways to act.
+ */
+function normalizeRole(f) {
+  if (f.inputType === 'file') return 'file_upload';
+  if (f.inputType === 'checkbox' || f.role === 'checkbox' || f.role === 'switch') return 'checkbox';
+  if (f.inputType === 'radio' || f.role === 'radio') return 'radio';
+  if (f.tag === 'select' || f.role === 'combobox' || f.role === 'listbox') return 'combobox';
+  if (f.tag === 'textarea' || f.isContentEditable) return 'textarea';
+  if (f.tag === 'button' || f.role === 'button') return 'button';
+  if (f.role === 'option' || f.role === 'menuitem') return 'option';
+  if (f.tag === 'a') return 'link';
+  // Default: free-text input. Typeahead is still a textbox; planner sees
+  // isTypeahead and chooses type-then-pick-suggestion.
+  return 'textbox';
+}
+function pickLabel(f) {
+  // Priority: AX name → aria-label → placeholder → text content → field id.
+  // (AX name already merges <label for>, aria-labelledby, fieldset/legend
+  //  walks — we get them all for free.)
+  return (f.rawName || f.ariaLabel || f.placeholder || f.textContent || f.id || f.name || '').trim();
+}
+function pickDescription(f) {
+  // AX description first (helper text, hints), placeholder second.
+  return (f.description || (f.rawName && f.placeholder ? f.placeholder : '')).trim();
+}
+// ── Public entry point ───────────────────────────────────────────────────────
+/**
+ * Main scanner. Returns a flat list of fields the LLM planner can plan over.
+ * Iterates same-origin frames so iCIMS/Workday iframes don't get missed.
+ */
+async function scanAccessibility(page) {
+  // Run injection + AX fetch on the main frame AND every accessible same-
+  // origin frame. Cross-origin frames are blocked; we silently skip them.
+  const frames = [page, ...page.frames().filter((f) => f !== page.mainFrame())];
+  const allFields = [];
+  for (const ctx of frames) {
+    try {
+      // Each frame's "page" is the frame itself — playwright Page and Frame
+      // both expose evaluate(). For non-main frames we still need a CDP
+      // session against the underlying page, but the AX tree we fetch
+      // pertains to the entire frame tree from the root. We only fetch it
+      // once (on the main page) and reconcile against all frames' DOMs.
+      const isMain = ctx === page || ctx === page.mainFrame?.();
+      const injected = await (isMain
+        ? injectMmid(page)
+        : ctx.evaluate(({ mmidAttr, handleAttr }) => {
+            const sel = 'input,textarea,select,[contenteditable="true"],[role="textbox"],[role="combobox"],[role="listbox"],[role="radiogroup"],[role="radio"],[role="checkbox"],[role="switch"],[role="button"],[role="option"],button[type="submit"],a[href]';
+            // Continue numbering from a high offset so frames don't collide
+            // with main-frame ids. 100000 * frame index is plenty.
+            let n = Math.floor(Math.random() * 90000) + 100000;
+            const all = document.querySelectorAll(sel);
+            all.forEach((el) => {
+              n += 1; const id = String(n);
+              el.setAttribute(mmidAttr, id);
+              const prev = el.getAttribute(handleAttr);
+              if (prev && !el.hasAttribute('data-orig-aria-keyshortcuts')) {
+                el.setAttribute('data-orig-aria-keyshortcuts', prev);
+              }
+              el.setAttribute(handleAttr, id);
+            });
+            return all.length;
+          }, { mmidAttr: MMID_ATTR, handleAttr: HANDLE_ATTR }));
+      if (!injected || injected === 0) continue;
+      // Only the main page can drive CDP; for frames, AX is reachable from
+      // the same root tree fetched on the main page (it includes all frame
+      // subtrees). Skip the per-frame fetch.
+      if (!isMain) {
+        // For frames, just enrich DOM — we'll re-run reconcile after the
+        // main-page AX fetch. Stash the frame's DOM data only.
+        // (Simpler: skip frames entirely in v1 — the main-frame AX tree
+        // doesn't include cross-realm frame nodes anyway.)
+        continue;
+      }
+      const axNodes = await fetchAxTree(page);
+      const axFields = reconcile(axNodes);
+      const enriched = await enrichFromDom(page, axFields);
+      allFields.push(...enriched);
+    } catch (e) {
+      console.warn(`[scanAx] Frame scan failed: ${e.message}`);
+    }
+  }
+  // Normalize, dedupe by mmid (frames could collide, though we offset above)
+  const seen = new Set();
+  const out = [];
+  for (const f of allFields) {
+    if (seen.has(f.mmid)) continue;
+    seen.add(f.mmid);
+    const label = pickLabel(f);
+    // Filter noise: no label AND no visible role → skip
+    if (!label && !['button', 'link'].includes(normalizeRole(f))) continue;
+    out.push({
+      mmid: f.mmid,
+      role: normalizeRole(f),
+      label,
+      description: pickDescription(f),
+      required: f.required,
+      disabled: f.disabled,
+      options: f.options ? f.options.map((o) => o.label) : null,
+      selectorHint: f.selectorHint,
+      inputType: f.inputType,
+      currentValue: f.currentValue,
+      isTypeahead: f.isTypeahead,
+      groupLabel: f.groupLabel,
+      // Forward the raw text content for buttons/links the planner might click
+      textContent: f.textContent,
+      // Already-filled hint so the planner can skip
+      filledAlready: !!(f.currentValue && f.currentValue.length > 0),
+    });
+  }
+  console.log(`[scanAx] ${out.length} fields detected (${out.filter((f) => f.role === 'textbox').length} text, ${out.filter((f) => f.role === 'combobox').length} dropdowns, ${out.filter((f) => f.role === 'file_upload').length} files, ${out.filter((f) => f.role === 'button').length} buttons)`);
+  return out;
+}
+module.exports = { scanAccessibility };

package/smartFill.js ADDED Viewed

@@ -0,0 +1,362 @@
+'use strict';
+/**
+ * The smart fill loop. One round per page:
+ *   1. Scan the page via scanAccessibility (AX tree + mmid injection).
+ *   2. POST those fields to /smartfill/plan-fill with canonical facts.
+ *   3. Execute the returned action plan deterministically.
+ *
+ * Replaces the old fillFields/fillLocator stack. The action executor here
+ * is the ONLY place that touches the DOM — every "how do I fill X" question
+ * lives in one switch statement instead of being spread across filler.js,
+ * scanPage.js, and the orchestrator.
+ *
+ * Falls back to legacy fillFields when /smartfill/plan-fill returns 502
+ * (LLM down, parser bug, etc.) — so a planner outage degrades to the old
+ * agent rather than freezing.
+ */
+const { scanAccessibility } = require('./scanAccessibility');
+const { fillFields: legacyFillFields } = require('./filler');
+// ── Plan executor ───────────────────────────────────────────────────────────
+/**
+ * Execute ONE plan entry. Returns:
+ *   { ok: true,  reason: '...' }  on success
+ *   { ok: false, reason: '...' }  on failure (executor will continue with next)
+ *   { ok: 'ask_user', reason } when planner asked us to surface
+ */
+async function executePlanItem(page, item, fieldByMmid, ctx) {
+  const field = fieldByMmid.get(item.mmid);
+  if (!field) return { ok: false, reason: 'mmid vanished' };
+  const labelShort = (field.label || field.selectorHint || '?').slice(0, 50);
+  // Re-locate the element via the mmid attribute (the executor's anchor).
+  // Falls back to selectorHint if mmid was wiped (rare; happens on full
+  // re-renders between scan and execute).
+  let locator = page.locator(`[mmid="${item.mmid}"]`).first();
+  let visible = await locator.isVisible({ timeout: 800 }).catch(() => false);
+  if (!visible && field.selectorHint) {
+    locator = page.locator(field.selectorHint).first();
+    visible = await locator.isVisible({ timeout: 800 }).catch(() => false);
+  }
+  if (!visible) {
+    return { ok: false, reason: `element not visible (mmid=${item.mmid})` };
+  }
+  switch (item.action) {
+    case 'skip':
+      return { ok: true, reason: `skip: ${item.reasoning || 'planner skipped'}` };
+    case 'ask_user':
+      return { ok: 'ask_user', reason: item.reasoning || 'planner requested human input' };
+    case 'type': {
+      if (!item.value) return { ok: true, reason: 'skip: empty value' };
+      // Typeahead path: open suggestion list, pick first match.
+      if (field.isTypeahead) {
+        return await typeAndPickSuggestion(page, locator, item.value);
+      }
+      return await reactSafeType(page, locator, item.value);
+    }
+    case 'select_option': {
+      // Native <select>. value is the option LABEL (model was instructed to
+      // pick from the options[] list).
+      try {
+        await locator.selectOption({ label: item.value }).catch(async () => {
+          await locator.selectOption({ value: item.value });
+        });
+        return { ok: true, reason: `select: ${item.value}` };
+      } catch (e) {
+        return { ok: false, reason: `select failed: ${e.message}` };
+      }
+    }
+    case 'click_option': {
+      // Custom dropdown: click trigger, find option by text, click it.
+      try {
+        await locator.click({ timeout: 2500 });
+        await page.waitForTimeout(300);
+        // Look for an option whose visible text equals the planner's pick.
+        const optionSel = '[role="option"], [role="menuitem"], .select__option, li[class*="option"]';
+        const opts = page.locator(optionSel);
+        const count = await opts.count().catch(() => 0);
+        if (count === 0) return { ok: false, reason: 'dropdown opened but no options' };
+        const texts = await opts.allTextContents().catch(() => []);
+        const v = String(item.value).toLowerCase().trim();
+        let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
+        if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().includes(v) || v.includes(t.toLowerCase()));
+        if (idx === -1) {
+          await page.keyboard.press('Escape').catch(() => {});
+          return { ok: false, reason: `option "${item.value}" not in dropdown (have: ${texts.slice(0, 5).join(', ')})` };
+        }
+        await opts.nth(idx).click({ timeout: 2000 });
+        return { ok: true, reason: `clicked option: ${texts[idx]}` };
+      } catch (e) {
+        await page.keyboard.press('Escape').catch(() => {});
+        return { ok: false, reason: `click_option failed: ${e.message}` };
+      }
+    }
+    case 'set_checkbox': {
+      try {
+        const wantChecked = String(item.value).toLowerCase() === 'true';
+        const isChecked = await locator.isChecked().catch(() => false);
+        if (wantChecked !== isChecked) await locator.click({ timeout: 2000 });
+        return { ok: true, reason: `checkbox: ${wantChecked}` };
+      } catch (e) {
+        return { ok: false, reason: `checkbox failed: ${e.message}` };
+      }
+    }
+    case 'set_radio': {
+      // Radios identified by mmid point to ONE option; the planner's value is
+      // the label of the right one. Walk the radiogroup and click the match.
+      try {
+        // Strategy 1: native radio with name= — find by name + label match.
+        const name = field.name;
+        if (name) {
+          const group = page.locator(`input[type="radio"][name="${name}"]`);
+          const gc = await group.count();
+          for (let i = 0; i < gc; i++) {
+            const rad = group.nth(i);
+            const id = await rad.getAttribute('id').catch(() => null);
+            let lbl = '';
+            if (id) {
+              lbl = (await page.locator(`label[for="${id}"]`).first().textContent().catch(() => '') || '').trim();
+            }
+            if (!lbl) lbl = (await rad.evaluate(el => el.value || '').catch(() => '') || '');
+            if (lbl.toLowerCase().includes(String(item.value).toLowerCase())) {
+              await rad.check({ force: true, timeout: 1500 }).catch(() => rad.click({ force: true }));
+              return { ok: true, reason: `radio: ${lbl}` };
+            }
+          }
+        }
+        // Strategy 2: just click the labeled option directly (custom radios).
+        const v = String(item.value);
+        const opt = page.locator(`[role="radio"]:has-text("${v}"), label:has-text("${v}")`).first();
+        if (await opt.isVisible({ timeout: 800 }).catch(() => false)) {
+          await opt.click({ timeout: 1500 });
+          return { ok: true, reason: `radio (label): ${v}` };
+        }
+        return { ok: false, reason: `radio option "${item.value}" not found` };
+      } catch (e) {
+        return { ok: false, reason: `radio failed: ${e.message}` };
+      }
+    }
+    case 'upload_file': {
+      const which = String(item.value).toLowerCase().trim();
+      const path = which === 'cover_letter'
+        ? ctx.coverLetterPath
+        : ctx.resumePath;
+      if (!path) return { ok: false, reason: `no ${which} file available` };
+      try {
+        // Unhide so setInputFiles doesn't reject a 0x0 element
+        await locator.evaluate((el) => {
+          el.style.display = 'block';
+          el.style.opacity = '1';
+          el.style.visibility = 'visible';
+          el.style.width = '1px';
+          el.style.height = '1px';
+        }).catch(() => {});
+        await locator.setInputFiles(path, { timeout: 6000 });
+        return { ok: true, reason: `uploaded ${which}` };
+      } catch (e) {
+        return { ok: false, reason: `upload failed: ${e.message}` };
+      }
+    }
+    default:
+      return { ok: false, reason: `unknown action: ${item.action}` };
+  }
+}
+// ── Strategy helpers (shared by type + typeahead) ────────────────────────────
+/**
+ * React-safe text fill. Same ladder as the legacy filler — fill, then
+ * keyboard.type, then native-setter — but with verify-after-write to catch
+ * React rejecting our value.
+ */
+async function reactSafeType(page, locator, value) {
+  const v = String(value);
+  try {
+    await locator.fill(v, { timeout: 4000 });
+    const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
+    if (got && got.trim()) return { ok: true, reason: `typed (fill): "${v.slice(0, 30)}"` };
+  } catch {}
+  try {
+    await locator.click({ timeout: 1500 });
+    await locator.fill('', { timeout: 1500 }).catch(() => {});
+    await page.keyboard.type(v, { delay: 20 });
+    const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
+    if (got && got.trim()) return { ok: true, reason: `typed (keyboard): "${v.slice(0, 30)}"` };
+  } catch {}
+  try {
+    await locator.evaluate((el, val) => {
+      el.focus();
+      const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
+      const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
+      if (setter) setter.call(el, val); else el.value = val;
+      el.dispatchEvent(new Event('input', { bubbles: true }));
+      el.dispatchEvent(new Event('change', { bubbles: true }));
+      el.blur();
+    }, v);
+    const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
+    if (got && got.trim()) return { ok: true, reason: `typed (native): "${v.slice(0, 30)}"` };
+  } catch {}
+  return { ok: false, reason: 'all type strategies returned empty value' };
+}
+/**
+ * Type-and-pick for typeahead inputs (Greenhouse Location etc).
+ */
+async function typeAndPickSuggestion(page, locator, value) {
+  try {
+    await locator.click({ timeout: 2000 });
+    await locator.press('Meta+A').catch(() => locator.press('Control+A')).catch(() => {});
+    await locator.press('Delete').catch(() => {});
+    const firstChunk = String(value).split(/[,;]/)[0].trim();
+    await page.keyboard.type(firstChunk, { delay: 60 });
+    await page.waitForTimeout(700);
+    const optionSel = '[role="option"], [role="listbox"] li, .select__option, ul[class*="autocomplete"] li';
+    const opts = page.locator(optionSel);
+    const count = await opts.count().catch(() => 0);
+    if (count === 0) {
+      // Some fields accept the typed value directly. Verify.
+      const got = await locator.inputValue({ timeout: 800 }).catch(() => null);
+      if (got && got.trim()) return { ok: true, reason: `typeahead (no suggestion, accepted): "${value.slice(0, 30)}"` };
+      return { ok: false, reason: 'typeahead opened no suggestions' };
+    }
+    const texts = await opts.allTextContents().catch(() => []);
+    const v = firstChunk.toLowerCase();
+    let idx = texts.findIndex((t) => t.toLowerCase().trim() === v);
+    if (idx === -1) idx = texts.findIndex((t) => t.toLowerCase().trim().startsWith(v));
+    if (idx === -1) idx = 0;
+    await opts.nth(idx).click({ timeout: 2000 });
+    await page.waitForTimeout(200);
+    return { ok: true, reason: `typeahead picked: ${texts[idx]}` };
+  } catch (e) {
+    return { ok: false, reason: `typeahead failed: ${e.message}` };
+  }
+}
+// ── Main entry: scan + plan + execute one round on the current page ──────────
+/**
+ * @param {Page} page
+ * @param {object} aep        — the agent execution packet (carries facts)
+ * @param {object} options
+ * @param {object} options.config   — agent config (apiUrl, token)
+ * @param {string} options.jobId
+ * @param {string} options.resumePath
+ * @param {string|null} options.coverLetterPath
+ * @param {object} options.ctx      — fill context (answeredFields map etc)
+ * @returns {Promise<{filled, skipped, failed, askUserReasons, planned}>}
+ */
+async function smartFillPage(page, aep, options) {
+  const { config, jobId, resumePath, coverLetterPath, ctx } = options;
+  // 1. Scan via AX tree
+  const fields = await scanAccessibility(page).catch((e) => {
+    console.warn(`[smartFill] scanAccessibility failed: ${e.message}`);
+    return [];
+  });
+  if (fields.length === 0) {
+    console.warn('[smartFill] no fields detected on page');
+    return { filled: 0, skipped: 0, failed: 0, askUserReasons: [], planned: 0, fallback: false };
+  }
+  // 2. Build already_filled snapshot from ctx for the planner
+  const alreadyFilled = [];
+  if (ctx && ctx.answeredFields) {
+    for (const [label, info] of ctx.answeredFields) {
+      alreadyFilled.push({ label, value: String(info.value || '').slice(0, 80) });
+    }
+  }
+  // 3. Request the plan
+  let plan = null;
+  try {
+    const res = await fetch(`${config.apiUrl}/smartfill/plan-fill`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.token}` },
+      body: JSON.stringify({ job_id: jobId, fields, already_filled: alreadyFilled }),
+    });
+    if (res.ok) {
+      const j = await res.json();
+      plan = Array.isArray(j.plan) ? j.plan : null;
+    } else {
+      console.warn(`[smartFill] plan-fill ${res.status}; falling back to legacy filler`);
+    }
+  } catch (e) {
+    console.warn(`[smartFill] plan-fill threw: ${e.message}; falling back`);
+  }
+  if (!plan) {
+    // Planner unavailable — degrade gracefully.
+    const legacyResult = await legacyFillFields(page, aep, {
+      ats: options.ats || 'generic',
+      ctx,
+      config,
+      jobId,
+    }).catch(() => ({ filled: 0, skipped: 0, failed: 0 }));
+    return { ...legacyResult, askUserReasons: [], planned: 0, fallback: true };
+  }
+  console.log(`[smartFill] planner returned ${plan.length} actions for ${fields.length} fields`);
+  // 4. Execute the plan in DOM order (planner doesn't dictate order; we
+  // mirror the field scan order so dependent fields fill after their parents).
+  const fieldByMmid = new Map(fields.map((f) => [String(f.mmid), f]));
+  const planByMmid = new Map(plan.map((p) => [String(p.mmid), p]));
+  let filled = 0, skipped = 0, failed = 0;
+  const askUserReasons = [];
+  for (const f of fields) {
+    const item = planByMmid.get(String(f.mmid));
+    if (!item) {
+      // Planner didn't include this field — skip silently (planner was told
+      // to return EVERY field, but if it missed one, treat as a skip).
+      continue;
+    }
+    const labelShort = (f.label || f.selectorHint || '?').slice(0, 50);
+    const result = await executePlanItem(page, item, fieldByMmid, { resumePath, coverLetterPath });
+    if (result.ok === 'ask_user') {
+      askUserReasons.push(`${labelShort}: ${result.reason}`);
+      console.warn(`[smartFill] ASK USER: "${labelShort}" — ${result.reason}`);
+      failed += 1;
+      continue;
+    }
+    if (result.ok) {
+      if (item.action === 'skip') {
+        console.log(`[smartFill] skip "${labelShort}" — ${result.reason}`);
+        skipped += 1;
+      } else {
+        console.log(`[smartFill] ${item.action} "${labelShort}" — ${result.reason}${item.reasoning ? ` (why: ${item.reasoning.slice(0, 80)})` : ''}`);
+        filled += 1;
+        // Track in ctx for cross-page dedup
+        if (ctx && ctx.answeredFields) {
+          ctx.answeredFields.set(f.label || f.selectorHint, {
+            value: item.value,
+            pageIndex: ctx.currentPageIndex || 0,
+            source: `planner:${item.action}`,
+          });
+        }
+      }
+      // Small jitter between actions so we don't slam the DOM
+      await page.waitForTimeout(80 + Math.random() * 120);
+    } else {
+      console.warn(`[smartFill] FAIL "${labelShort}" — ${result.reason}`);
+      failed += 1;
+    }
+  }
+  return { filled, skipped, failed, askUserReasons, planned: plan.length, fallback: false };
+}
+module.exports = { smartFillPage };