halo-agent 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/browser.js +157 -0
- package/captcha.js +217 -0
- package/config.js +37 -0
- package/filler.js +987 -0
- package/index.js +360 -0
- package/localServer.js +270 -0
- package/manusAutomate.js +349 -0
- package/orchestrator.js +1122 -0
- package/package.json +49 -0
- package/poller.js +172 -0
- package/scanPage.js +606 -0
- package/vision.js +398 -0
package/scanPage.js
ADDED
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* scanPage.js — Per-ATS page scanner.
|
|
5
|
+
*
|
|
6
|
+
* Returns a normalized field map for every visible fillable field on the page:
|
|
7
|
+
* [ { selector, label, type, tag, inputType, value, category } ]
|
|
8
|
+
*
|
|
9
|
+
* category is one of:
|
|
10
|
+
* 'profile' — standard identity field (name, email, phone, resume, etc.)
|
|
11
|
+
* 'custom' — company-specific question (answered via HALO memory or AI)
|
|
12
|
+
* 'eeo' — Equal Employment Opportunity / demographic
|
|
13
|
+
* 'consent' — checkbox consent, marketing opt-in
|
|
14
|
+
* 'ignore' — captcha, hidden, already filled, utility inputs
|
|
15
|
+
*
|
|
16
|
+
* Each ATS has its own scanner because they embed fields differently:
|
|
17
|
+
* greenhouse — id= attrs are stable across ALL companies
|
|
18
|
+
* lever — name= attrs stable, full name in single field
|
|
19
|
+
* ashby — _systemfield_* system names, UUID names for custom fields
|
|
20
|
+
* workday — data-automation-id for nav/buttons, form fields in iframe (vision)
|
|
21
|
+
* icims — everything inside a named iframe
|
|
22
|
+
* generic — label-text walk for any other ATS
|
|
23
|
+
*
|
|
24
|
+
* IMPORTANT: this scanner runs on the live page via Playwright, not on saved HTML.
|
|
25
|
+
* It always returns what is actually visible right now.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
// ─── Shared helpers ───────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Run in-browser DOM scan. Returns raw field list before enrichment.
|
|
32
|
+
* This is the same getFieldLabel logic from extractPageFields but extended
|
|
33
|
+
* to also capture data-automation-id and to distinguish radio/checkbox groups.
|
|
34
|
+
*/
|
|
35
|
+
async function domScan(page) {
|
|
36
|
+
return page.evaluate(() => {
|
|
37
|
+
function getLabel(el) {
|
|
38
|
+
// 1. Native label[for]
|
|
39
|
+
if (el.id) {
|
|
40
|
+
const lbl = document.querySelector('label[for="' + el.id + '"]');
|
|
41
|
+
if (lbl) return lbl.textContent.replace(/\s+/g, ' ').trim();
|
|
42
|
+
}
|
|
43
|
+
if (el.labels && el.labels[0]) return el.labels[0].textContent.replace(/\s+/g, ' ').trim();
|
|
44
|
+
// 2. aria-label / aria-labelledby
|
|
45
|
+
const ariaLabel = el.getAttribute('aria-label');
|
|
46
|
+
if (ariaLabel) return ariaLabel.trim();
|
|
47
|
+
const ariaLabelledBy = el.getAttribute('aria-labelledby');
|
|
48
|
+
if (ariaLabelledBy) {
|
|
49
|
+
const ids = ariaLabelledBy.split(/\s+/);
|
|
50
|
+
const text = ids.map(id => { const e = document.getElementById(id); return e ? e.textContent.trim() : ''; }).join(' ').trim();
|
|
51
|
+
if (text) return text;
|
|
52
|
+
}
|
|
53
|
+
// 3. data-label / data-title
|
|
54
|
+
if (el.dataset.label) return el.dataset.label.trim();
|
|
55
|
+
if (el.dataset.title) return el.dataset.title.trim();
|
|
56
|
+
// 4. Preceding siblings — prefer headings, then short generic containers.
|
|
57
|
+
// Ashby/Lever put <h3>Question text</h3> before the <textarea>, not a <label>.
|
|
58
|
+
let prev = el.previousElementSibling;
|
|
59
|
+
while (prev) {
|
|
60
|
+
const ptag = prev.tagName;
|
|
61
|
+
if (/^(LABEL|LEGEND)$/.test(ptag)) {
|
|
62
|
+
const t = prev.textContent.replace(/\s+/g, ' ').trim();
|
|
63
|
+
if (t) return t;
|
|
64
|
+
}
|
|
65
|
+
if (/^H[1-6]$/.test(ptag)) {
|
|
66
|
+
const t = prev.textContent.replace(/\s+/g, ' ').trim();
|
|
67
|
+
if (t && t.length < 200) return t;
|
|
68
|
+
}
|
|
69
|
+
if (/^(SPAN|DIV|P|STRONG|B)$/.test(ptag)) {
|
|
70
|
+
const t = prev.textContent.replace(/\s+/g, ' ').trim();
|
|
71
|
+
if (t && t.length > 0 && t.length < 100) return t;
|
|
72
|
+
}
|
|
73
|
+
prev = prev.previousElementSibling;
|
|
74
|
+
}
|
|
75
|
+
// 5. Walk up DOM ancestors — look for heading or label inside each level.
|
|
76
|
+
// Key: Ashby uses <div><h3>Question</h3><p>Description</p><textarea/></div>
|
|
77
|
+
let parent = el.parentElement;
|
|
78
|
+
let depth = 0;
|
|
79
|
+
while (parent && depth < 12) {
|
|
80
|
+
const labelEl = parent.querySelector('label, legend');
|
|
81
|
+
if (labelEl && !labelEl.contains(el)) {
|
|
82
|
+
const t = labelEl.textContent.replace(/\s+/g, ' ').trim();
|
|
83
|
+
if (t && t.length < 200) return t;
|
|
84
|
+
}
|
|
85
|
+
const headingEl = parent.querySelector('h1, h2, h3, h4, h5, h6, strong, b');
|
|
86
|
+
if (headingEl && !headingEl.contains(el)) {
|
|
87
|
+
const t = headingEl.textContent.replace(/\s+/g, ' ').trim();
|
|
88
|
+
if (t && t.length > 3 && t.length < 300) return t;
|
|
89
|
+
}
|
|
90
|
+
const classEl = parent.querySelector('[class*="label"], [class*="title"], [class*="question"], [class*="heading"], [class*="prompt"]');
|
|
91
|
+
if (classEl && !classEl.contains(el)) {
|
|
92
|
+
const t = classEl.textContent.replace(/\s+/g, ' ').trim();
|
|
93
|
+
if (t && t.length > 0 && t.length < 200) return t;
|
|
94
|
+
}
|
|
95
|
+
parent = parent.parentElement;
|
|
96
|
+
depth++;
|
|
97
|
+
}
|
|
98
|
+
// 6. Fallback: placeholder > name > id
|
|
99
|
+
return el.placeholder || el.name || el.id || '';
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const results = [];
|
|
103
|
+
const seen = new Set();
|
|
104
|
+
|
|
105
|
+
document.querySelectorAll('input, textarea, select, [role="combobox"], [contenteditable="true"]').forEach(el => {
|
|
106
|
+
const type = (el.type || '').toLowerCase();
|
|
107
|
+
// Skip utility types
|
|
108
|
+
if (['hidden', 'submit', 'button', 'image', 'reset'].includes(type)) return;
|
|
109
|
+
// Skip invisible (but allow radio/checkbox which may be styled-hidden)
|
|
110
|
+
if (type !== 'radio' && type !== 'checkbox' && type !== 'file') {
|
|
111
|
+
const rect = el.getBoundingClientRect();
|
|
112
|
+
if (rect.width === 0 && rect.height === 0) return;
|
|
113
|
+
if (el.offsetParent === null && !el.closest('[role="dialog"]')) return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const label = getLabel(el).replace(/^\*+|\*+$/g, '').trim();
|
|
117
|
+
const id = el.id || '';
|
|
118
|
+
const name = el.name || '';
|
|
119
|
+
const automationId = el.getAttribute('data-automation-id') || '';
|
|
120
|
+
const testId = el.getAttribute('data-testid') || '';
|
|
121
|
+
const role = el.getAttribute('role') || '';
|
|
122
|
+
const tag = el.tagName.toLowerCase();
|
|
123
|
+
const currentValue = el.value || '';
|
|
124
|
+
|
|
125
|
+
// Build a dedup key
|
|
126
|
+
const dedupKey = id || name || (label + ':' + type) || el.outerHTML.slice(0, 60);
|
|
127
|
+
if (seen.has(dedupKey)) return;
|
|
128
|
+
seen.add(dedupKey);
|
|
129
|
+
|
|
130
|
+
// Build the most stable selector
|
|
131
|
+
let selector = '';
|
|
132
|
+
if (id) selector = `#${id.replace(/([!"#$%&'()*+,.\/:;<=>?@\[\\\]^`{|}~])/g, '\\$1')}`;
|
|
133
|
+
else if (name) selector = `[name="${name}"]`;
|
|
134
|
+
else if (automationId) selector = `[data-automation-id="${automationId}"]`;
|
|
135
|
+
else if (testId) selector = `[data-testid="${testId}"]`;
|
|
136
|
+
else if (label) {
|
|
137
|
+
const escaped = label.replace(/['"\\]/g, '');
|
|
138
|
+
selector = `${tag}[aria-label="${escaped}"]`;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
results.push({
|
|
142
|
+
tag,
|
|
143
|
+
inputType: type,
|
|
144
|
+
role,
|
|
145
|
+
id,
|
|
146
|
+
name,
|
|
147
|
+
automationId,
|
|
148
|
+
testId,
|
|
149
|
+
label,
|
|
150
|
+
selector,
|
|
151
|
+
currentValue,
|
|
152
|
+
isContentEditable: el.isContentEditable || false,
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
return results;
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ─── Profile field classifier ─────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
// Map from normalized label keywords to profile field name.
|
|
163
|
+
// Order matters — more specific patterns first.
|
|
164
|
+
const PROFILE_PATTERNS = [
|
|
165
|
+
// Lever-style: single full name field
|
|
166
|
+
{ field: 'full_name', regex: /^(full\s*name|your\s*name|name)$/i },
|
|
167
|
+
// Standard first/last
|
|
168
|
+
{ field: 'first_name', regex: /first[\s_-]?name|given[\s_-]?name|forename/i },
|
|
169
|
+
{ field: 'last_name', regex: /last[\s_-]?name|family[\s_-]?name|surname/i },
|
|
170
|
+
{ field: 'preferred_name',regex: /preferred[\s_-]?(first\s*)?name|goes\s*by/i },
|
|
171
|
+
{ field: 'email', regex: /e[\s-]?mail/i },
|
|
172
|
+
{ field: 'phone', regex: /phone|mobile|telephone|cell/i },
|
|
173
|
+
{ field: 'linkedin', regex: /linkedin/i },
|
|
174
|
+
{ field: 'github', regex: /github/i },
|
|
175
|
+
{ field: 'twitter', regex: /twitter|x\.com/i },
|
|
176
|
+
{ field: 'portfolio', regex: /portfolio|personal\s*(site|url|website)|website/i },
|
|
177
|
+
{ field: 'location', regex: /^(location|city|current\s*location)$/i },
|
|
178
|
+
{ field: 'address', regex: /address\s*(line\s*1)?|street/i },
|
|
179
|
+
{ field: 'city', regex: /^city$/i },
|
|
180
|
+
{ field: 'state', regex: /^(state|province|region)$/i },
|
|
181
|
+
{ field: 'zip', regex: /zip|postal\s*code/i },
|
|
182
|
+
{ field: 'country', regex: /^country$/i },
|
|
183
|
+
{ field: 'salary', regex: /salary|compensation|expected\s*pay/i },
|
|
184
|
+
{ field: 'start_date', regex: /start\s*date|available|earliest.*start/i },
|
|
185
|
+
{ field: 'school', regex: /school|university|college|institution/i },
|
|
186
|
+
{ field: 'degree', regex: /degree|qualification/i },
|
|
187
|
+
{ field: 'gpa', regex: /gpa|grade\s*point/i },
|
|
188
|
+
{ field: 'org', regex: /^(company|employer|organization|current\s*(company|employer))$/i },
|
|
189
|
+
];
|
|
190
|
+
|
|
191
|
+
// EEO / demographic patterns
|
|
192
|
+
const EEO_PATTERNS = [
|
|
193
|
+
/gender|sex$/i,
|
|
194
|
+
/race|ethnicity|hispanic/i,
|
|
195
|
+
/veteran/i,
|
|
196
|
+
/disability|disabled/i,
|
|
197
|
+
/pronoun/i,
|
|
198
|
+
/equal\s*opportunity/i,
|
|
199
|
+
/voluntary\s*self/i,
|
|
200
|
+
/demographic/i,
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
// Consent / noise patterns
|
|
204
|
+
const CONSENT_PATTERNS = [
|
|
205
|
+
/consent|agree|accept|opt.?in|marketing|newsletter/i,
|
|
206
|
+
/terms\s*(and|&)\s*(conditions|service)/i,
|
|
207
|
+
/privacy\s*policy/i,
|
|
208
|
+
];
|
|
209
|
+
|
|
210
|
+
function classifyField(field) {
|
|
211
|
+
const label = field.label.toLowerCase().trim();
|
|
212
|
+
|
|
213
|
+
// Consent checkboxes
|
|
214
|
+
if (field.inputType === 'checkbox' && CONSENT_PATTERNS.some(r => r.test(label))) {
|
|
215
|
+
return 'consent';
|
|
216
|
+
}
|
|
217
|
+
// EEO
|
|
218
|
+
if (EEO_PATTERNS.some(r => r.test(label))) return 'eeo';
|
|
219
|
+
|
|
220
|
+
// Profile
|
|
221
|
+
for (const { field: name, regex } of PROFILE_PATTERNS) {
|
|
222
|
+
if (regex.test(label)) return 'profile:' + name;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Ashby system fields by name attribute
|
|
226
|
+
if (field.name && field.name.startsWith('_systemfield_')) {
|
|
227
|
+
const sfName = field.name.replace('_systemfield_', '');
|
|
228
|
+
if (['name', 'email', 'phone', 'resume', 'linkedin', 'website'].includes(sfName)) {
|
|
229
|
+
return 'profile:' + (sfName === 'name' ? 'full_name' : sfName);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Custom (company-specific question)
|
|
234
|
+
return 'custom';
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ─── ATS-specific scanners ────────────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Greenhouse scanner.
|
|
241
|
+
* Fields use id= that matches the field semantic name (first_name, last_name, email, phone, resume).
|
|
242
|
+
* Custom questions: id starts with 'question_', label from adjacent <label>.
|
|
243
|
+
*/
|
|
244
|
+
async function scanGreenhouse(page) {
|
|
245
|
+
const raw = await domScan(page);
|
|
246
|
+
return raw.map(f => {
|
|
247
|
+
// Greenhouse id= IS the semantic name for system fields
|
|
248
|
+
const ghSystemIds = { first_name: 'first_name', last_name: 'last_name', preferred_name: 'preferred:preferred_name', email: 'email', phone: 'phone', resume: 'resume', cover_letter: 'cover_letter' };
|
|
249
|
+
if (f.id && ghSystemIds[f.id]) {
|
|
250
|
+
return { ...f, category: f.id === 'resume' || f.id === 'cover_letter' ? 'file:' + f.id : 'profile:' + ghSystemIds[f.id] };
|
|
251
|
+
}
|
|
252
|
+
// Custom question fields: id like question_12345678
|
|
253
|
+
if (f.id && /^question_\d+$/.test(f.id)) {
|
|
254
|
+
return { ...f, category: 'custom' };
|
|
255
|
+
}
|
|
256
|
+
// EEO/demographic dropdowns at bottom
|
|
257
|
+
if (f.id && /^(gender|hispanic_ethnicity|veteran_status|disability_status)$/.test(f.id)) {
|
|
258
|
+
return { ...f, category: 'eeo' };
|
|
259
|
+
}
|
|
260
|
+
// Degree/school fields
|
|
261
|
+
if (f.id && /^(degree|school|major)/.test(f.id)) {
|
|
262
|
+
return { ...f, category: 'profile:' + f.id.split('--')[0] };
|
|
263
|
+
}
|
|
264
|
+
return { ...f, category: classifyField(f) };
|
|
265
|
+
}).filter(f => f.category !== 'ignore' && f.inputType !== 'hidden');
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Lever scanner.
|
|
270
|
+
* name= attributes are stable across ALL companies.
|
|
271
|
+
* Key difference: full name is a SINGLE field (name="name"), NOT first+last.
|
|
272
|
+
* Custom questions: name starts with "cards[" pattern.
|
|
273
|
+
*/
|
|
274
|
+
async function scanLever(page) {
|
|
275
|
+
const raw = await domScan(page);
|
|
276
|
+
return raw.map(f => {
|
|
277
|
+
const leverSystemNames = {
|
|
278
|
+
name: 'profile:full_name',
|
|
279
|
+
email: 'profile:email',
|
|
280
|
+
phone: 'profile:phone',
|
|
281
|
+
location: 'profile:location',
|
|
282
|
+
org: 'profile:org',
|
|
283
|
+
'urls[LinkedIn]': 'profile:linkedin',
|
|
284
|
+
'urls[GitHub]': 'profile:github',
|
|
285
|
+
'urls[Twitter]': 'profile:twitter',
|
|
286
|
+
'urls[Portfolio]': 'profile:portfolio',
|
|
287
|
+
'urls[Other]': 'profile:website',
|
|
288
|
+
comments: 'cover_letter',
|
|
289
|
+
};
|
|
290
|
+
if (f.name && leverSystemNames[f.name]) {
|
|
291
|
+
return { ...f, category: leverSystemNames[f.name] };
|
|
292
|
+
}
|
|
293
|
+
if (f.name === 'resume' || f.inputType === 'file') {
|
|
294
|
+
return { ...f, category: 'file:resume' };
|
|
295
|
+
}
|
|
296
|
+
// EEO selects
|
|
297
|
+
if (f.name && f.name.startsWith('eeo[')) {
|
|
298
|
+
return { ...f, category: 'eeo' };
|
|
299
|
+
}
|
|
300
|
+
// Consent checkboxes
|
|
301
|
+
if (f.name && f.name.startsWith('consent[')) {
|
|
302
|
+
return { ...f, category: 'consent' };
|
|
303
|
+
}
|
|
304
|
+
// Custom questions: name="cards[{uuid}][field{n}]"
|
|
305
|
+
if (f.name && f.name.startsWith('cards[')) {
|
|
306
|
+
return { ...f, category: 'custom' };
|
|
307
|
+
}
|
|
308
|
+
return { ...f, category: classifyField(f) };
|
|
309
|
+
}).filter(f => f.category !== 'ignore');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Ashby scanner.
|
|
314
|
+
* System fields: name="_systemfield_{name}" — stable across ALL companies.
|
|
315
|
+
* Custom fields: name="{uuid}" — company-specific, use label text to identify.
|
|
316
|
+
*/
|
|
317
|
+
async function scanAshby(page) {
|
|
318
|
+
const raw = await domScan(page);
|
|
319
|
+
return raw.map(f => {
|
|
320
|
+
if (f.name && f.name.startsWith('_systemfield_')) {
|
|
321
|
+
const sfField = f.name.replace('_systemfield_', '');
|
|
322
|
+
const profileMap = { name: 'full_name', email: 'email', phone: 'phone', linkedin: 'linkedin', website: 'portfolio', resume: null };
|
|
323
|
+
if (sfField === 'resume' || f.inputType === 'file') return { ...f, category: 'file:resume' };
|
|
324
|
+
if (profileMap[sfField] !== undefined) return { ...f, category: 'profile:' + profileMap[sfField] };
|
|
325
|
+
}
|
|
326
|
+
// UUID-named fields are custom questions
|
|
327
|
+
if (f.name && /^[0-9a-f-]{36}$/.test(f.name)) {
|
|
328
|
+
return { ...f, category: 'custom' };
|
|
329
|
+
}
|
|
330
|
+
if (f.inputType === 'file') return { ...f, category: 'file:resume' };
|
|
331
|
+
return { ...f, category: classifyField(f) };
|
|
332
|
+
}).filter(f => f.category !== 'ignore');
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Workday scanner.
|
|
337
|
+
* Form fields live inside a nested iframe (dynamically injected after auth).
|
|
338
|
+
* We attempt to pierce the iframe and scan its fields.
|
|
339
|
+
* Falls back to outer DOM scan if iframe isn't accessible.
|
|
340
|
+
* Navigation buttons use data-automation-id and are NOT included in the field scan
|
|
341
|
+
* (they're handled by findNextButton/findSubmitButton separately).
|
|
342
|
+
*/
|
|
343
|
+
async function scanWorkday(page) {
|
|
344
|
+
// Try to find the application iframe
|
|
345
|
+
let targetPage = page;
|
|
346
|
+
try {
|
|
347
|
+
const frames = page.frames();
|
|
348
|
+
// Workday application form typically loads in a frame whose URL contains 'wd' or is a child of the main frame
|
|
349
|
+
const appFrame = frames.find(f => {
|
|
350
|
+
const url = f.url();
|
|
351
|
+
return url && url !== 'about:blank' && url !== '' && !url.includes('googleapis') && f !== page.mainFrame();
|
|
352
|
+
});
|
|
353
|
+
if (appFrame) {
|
|
354
|
+
// Use the frame as context for domScan
|
|
355
|
+
const frameFields = await appFrame.evaluate(() => {
|
|
356
|
+
// Same inline DOM walk — can't call domScan directly in frame context
|
|
357
|
+
const results = [];
|
|
358
|
+
document.querySelectorAll('input, textarea, select').forEach(el => {
|
|
359
|
+
const type = (el.type || '').toLowerCase();
|
|
360
|
+
if (['hidden', 'submit', 'button', 'image', 'reset'].includes(type)) return;
|
|
361
|
+
const rect = el.getBoundingClientRect();
|
|
362
|
+
if (rect.width === 0 && rect.height === 0) return;
|
|
363
|
+
const automationId = el.getAttribute('data-automation-id') || '';
|
|
364
|
+
const label = el.getAttribute('aria-label') || el.placeholder || automationId || el.name || el.id || '';
|
|
365
|
+
results.push({
|
|
366
|
+
tag: el.tagName.toLowerCase(),
|
|
367
|
+
inputType: type,
|
|
368
|
+
id: el.id || '',
|
|
369
|
+
name: el.name || '',
|
|
370
|
+
automationId,
|
|
371
|
+
label,
|
|
372
|
+
selector: automationId ? `[data-automation-id="${automationId}"]` : (el.id ? `#${el.id}` : `[name="${el.name}"]`),
|
|
373
|
+
currentValue: el.value || '',
|
|
374
|
+
});
|
|
375
|
+
});
|
|
376
|
+
return results;
|
|
377
|
+
}).catch(() => []);
|
|
378
|
+
|
|
379
|
+
if (frameFields.length > 0) {
|
|
380
|
+
return frameFields.map(f => ({ ...f, frameUrl: appFrame.url(), category: classifyField(f) }));
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
} catch {}
|
|
384
|
+
|
|
385
|
+
// Outer DOM fallback (job listing page / sign-in page)
|
|
386
|
+
const raw = await domScan(page);
|
|
387
|
+
return raw.map(f => ({ ...f, category: classifyField(f) }));
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* iCIMS scanner.
|
|
392
|
+
* All application fields live inside an iframe with a stable title or src pattern.
|
|
393
|
+
* We pierce the iframe using Playwright frameLocator.
|
|
394
|
+
*/
|
|
395
|
+
async function scanICIMS(page) {
|
|
396
|
+
// Try named iframe selectors in priority order
|
|
397
|
+
const iframeSelectors = [
|
|
398
|
+
'iframe[title*="iCIMS"]',
|
|
399
|
+
'iframe[src*="icims.com"]',
|
|
400
|
+
'iframe[id*="icims"]',
|
|
401
|
+
'iframe[name*="icims"]',
|
|
402
|
+
'iframe', // fallback: first iframe
|
|
403
|
+
];
|
|
404
|
+
|
|
405
|
+
for (const iframeSel of iframeSelectors) {
|
|
406
|
+
try {
|
|
407
|
+
const frameCount = await page.locator(iframeSel).count();
|
|
408
|
+
if (frameCount === 0) continue;
|
|
409
|
+
|
|
410
|
+
const frame = page.frameLocator(iframeSel);
|
|
411
|
+
const innerFields = await frame.locator('input, textarea, select').evaluateAll(elements => {
|
|
412
|
+
return elements.map(el => {
|
|
413
|
+
const type = (el.type || '').toLowerCase();
|
|
414
|
+
if (['hidden', 'submit', 'button', 'image'].includes(type)) return null;
|
|
415
|
+
let label = '';
|
|
416
|
+
if (el.id) {
|
|
417
|
+
const lbl = document.querySelector('label[for="' + el.id + '"]');
|
|
418
|
+
if (lbl) label = lbl.textContent.trim();
|
|
419
|
+
}
|
|
420
|
+
if (!label) label = el.getAttribute('aria-label') || el.placeholder || el.name || el.id || '';
|
|
421
|
+
return {
|
|
422
|
+
tag: el.tagName.toLowerCase(),
|
|
423
|
+
inputType: type,
|
|
424
|
+
id: el.id || '',
|
|
425
|
+
name: el.name || '',
|
|
426
|
+
label: label.trim(),
|
|
427
|
+
selector: el.id ? `#${el.id}` : (el.name ? `[name="${el.name}"]` : null),
|
|
428
|
+
currentValue: el.value || '',
|
|
429
|
+
};
|
|
430
|
+
}).filter(Boolean);
|
|
431
|
+
}).catch(() => []);
|
|
432
|
+
|
|
433
|
+
if (innerFields.length > 0) {
|
|
434
|
+
return innerFields.map(f => ({
|
|
435
|
+
...f,
|
|
436
|
+
iframeSelector: iframeSel,
|
|
437
|
+
category: classifyField(f),
|
|
438
|
+
}));
|
|
439
|
+
}
|
|
440
|
+
} catch {}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Fallback to outer DOM
|
|
444
|
+
const raw = await domScan(page);
|
|
445
|
+
return raw.map(f => ({ ...f, category: classifyField(f) }));
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Generic scanner for any ATS not explicitly handled.
|
|
450
|
+
* Uses the full DOM walk with label classification.
|
|
451
|
+
*/
|
|
452
|
+
async function scanGeneric(page) {
|
|
453
|
+
const raw = await domScan(page);
|
|
454
|
+
return raw.map(f => ({ ...f, category: classifyField(f) }));
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// ─── Main export ──────────────────────────────────────────────────────────────
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Scan the current page and return a structured field map.
|
|
461
|
+
*
|
|
462
|
+
* @param {import('playwright').Page} page
|
|
463
|
+
* @param {string} ats - 'greenhouse' | 'lever' | 'ashby' | 'workday' | 'icims' | 'taleo' | ...
|
|
464
|
+
* @returns {Promise<ScannedField[]>}
|
|
465
|
+
*
|
|
466
|
+
* ScannedField shape:
|
|
467
|
+
* {
|
|
468
|
+
* selector: string, CSS selector to locate this element
|
|
469
|
+
* label: string, Human-readable label text
|
|
470
|
+
* tag: string, 'input' | 'textarea' | 'select'
|
|
471
|
+
* inputType: string, 'text' | 'email' | 'radio' | 'checkbox' | 'file' | ...
|
|
472
|
+
* name: string, name= attribute
|
|
473
|
+
* id: string, id= attribute
|
|
474
|
+
* automationId: string, data-automation-id= attribute (Workday)
|
|
475
|
+
* currentValue: string, current field value (empty if unfilled)
|
|
476
|
+
* category: string, 'profile:{field}' | 'custom' | 'eeo' | 'consent' | 'file:{field}'
|
|
477
|
+
* iframeSelector?: string, set if field is inside an iframe (iCIMS)
|
|
478
|
+
* }
|
|
479
|
+
*/
|
|
480
|
+
async function scanPage(page, ats) {
|
|
481
|
+
const platform = (ats || 'generic').toLowerCase();
|
|
482
|
+
let fields;
|
|
483
|
+
|
|
484
|
+
try {
|
|
485
|
+
switch (platform) {
|
|
486
|
+
case 'greenhouse': fields = await scanGreenhouse(page); break;
|
|
487
|
+
case 'lever': fields = await scanLever(page); break;
|
|
488
|
+
case 'ashby': fields = await scanAshby(page); break;
|
|
489
|
+
case 'workday': fields = await scanWorkday(page); break;
|
|
490
|
+
case 'icims': fields = await scanICIMS(page); break;
|
|
491
|
+
default: fields = await scanGeneric(page); break;
|
|
492
|
+
}
|
|
493
|
+
} catch (e) {
|
|
494
|
+
console.warn(`[scanPage] Scanner failed for ${platform}: ${e.message} — falling back to generic`);
|
|
495
|
+
fields = await scanGeneric(page).catch(() => []);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Filter out already-filled fields and utility noise
|
|
499
|
+
const out = fields.filter(f => {
|
|
500
|
+
if (!f.label && !f.selector) return false;
|
|
501
|
+
if (f.inputType === 'hidden') return false;
|
|
502
|
+
// Skip recaptcha textarea
|
|
503
|
+
if (f.name && f.name.includes('g-recaptcha')) return false;
|
|
504
|
+
return true;
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
console.log(`[scanPage] ${platform}: found ${out.length} fields (${out.filter(f => f.category.startsWith('profile')).length} profile, ${out.filter(f => f.category === 'custom').length} custom, ${out.filter(f => f.category === 'eeo').length} eeo)`);
|
|
508
|
+
return out;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/**
|
|
512
|
+
* Given the scanned fields and an AEP profile, return what value to fill for each field.
|
|
513
|
+
* - Profile fields: look up from profile object
|
|
514
|
+
* - Custom fields: look up from field_answers (AI pre-generated or on-demand)
|
|
515
|
+
* - Returns null for fields that have no answer yet (caller fetches AI answer)
|
|
516
|
+
*
|
|
517
|
+
* @param {ScannedField[]} fields
|
|
518
|
+
* @param {object} aep - Agent Execution Packet (profile_fill, field_answers, cover_letter)
|
|
519
|
+
* @returns {{ field: ScannedField, value: string|null, source: string }[]}
|
|
520
|
+
*/
|
|
521
|
+
function resolveFieldValues(fields, aep) {
|
|
522
|
+
const profile = aep.profile_fill || {};
|
|
523
|
+
const fieldAnswers = aep.field_answers || [];
|
|
524
|
+
|
|
525
|
+
// Build label -> answer map from pre-generated AEP answers (case-insensitive)
|
|
526
|
+
const answerMap = new Map();
|
|
527
|
+
for (const fa of fieldAnswers) {
|
|
528
|
+
if (fa.label) answerMap.set(fa.label.toLowerCase().trim(), { value: fa.value, source: fa.source || 'ai' });
|
|
529
|
+
if (fa.field_id) answerMap.set(fa.field_id.toLowerCase().trim(), { value: fa.value, source: fa.source || 'ai' });
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Profile field resolver
|
|
533
|
+
const profileValues = {
|
|
534
|
+
full_name: [profile.first_name, profile.last_name].filter(Boolean).join(' '),
|
|
535
|
+
first_name: profile.first_name || '',
|
|
536
|
+
last_name: profile.last_name || '',
|
|
537
|
+
preferred_name: profile.preferred_name || profile.first_name || '',
|
|
538
|
+
email: profile.email || '',
|
|
539
|
+
phone: profile.phone || '',
|
|
540
|
+
linkedin: profile.linkedin || '',
|
|
541
|
+
github: profile.github || '',
|
|
542
|
+
twitter: profile.twitter || '',
|
|
543
|
+
portfolio: profile.portfolio || profile.website || '',
|
|
544
|
+
website: profile.portfolio || profile.website || '',
|
|
545
|
+
location: profile.city || profile.location || '',
|
|
546
|
+
address: profile.address || '',
|
|
547
|
+
city: profile.city || '',
|
|
548
|
+
state: profile.state || '',
|
|
549
|
+
zip: profile.zip || profile.postal || '',
|
|
550
|
+
country: profile.country || 'United States',
|
|
551
|
+
org: profile.current_company || profile.org || '',
|
|
552
|
+
school: profile.school || '',
|
|
553
|
+
degree: profile.degree || '',
|
|
554
|
+
gpa: profile.gpa || '',
|
|
555
|
+
salary: profile.desired_salary || '',
|
|
556
|
+
start_date: profile.start_date || 'Immediately',
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
return fields.map(field => {
|
|
560
|
+
const cat = field.category;
|
|
561
|
+
|
|
562
|
+
// File upload fields — handled separately by uploadResume()
|
|
563
|
+
if (cat.startsWith('file:')) {
|
|
564
|
+
return { field, value: null, source: 'file_upload' };
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
// EEO/consent — skip (handled by vision or user)
|
|
568
|
+
if (cat === 'eeo' || cat === 'consent') {
|
|
569
|
+
return { field, value: null, source: 'skip' };
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// Profile fields
|
|
573
|
+
if (cat.startsWith('profile:')) {
|
|
574
|
+
const pfKey = cat.slice('profile:'.length);
|
|
575
|
+
const val = profileValues[pfKey] || '';
|
|
576
|
+
// Also check answerMap in case AEP overrides a profile field
|
|
577
|
+
const override = answerMap.get((field.label || '').toLowerCase().trim());
|
|
578
|
+
return { field, value: override?.value || val || null, source: val ? 'profile' : 'missing' };
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Cover letter special case
|
|
582
|
+
if (cat === 'cover_letter') {
|
|
583
|
+
return { field, value: aep.cover_letter || null, source: 'cover_letter' };
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Custom question — look up in answer map by label (exact then fuzzy)
|
|
587
|
+
if (cat === 'custom') {
|
|
588
|
+
const labelKey = (field.label || '').toLowerCase().trim();
|
|
589
|
+
// Exact match
|
|
590
|
+
const exact = answerMap.get(labelKey);
|
|
591
|
+
if (exact) return { field, value: exact.value, source: exact.source };
|
|
592
|
+
// Fuzzy: check if any answer label is contained in this label or vice versa
|
|
593
|
+
for (const [key, ans] of answerMap) {
|
|
594
|
+
if (key.length > 5 && (labelKey.includes(key) || key.includes(labelKey))) {
|
|
595
|
+
return { field, value: ans.value, source: ans.source + ':fuzzy' };
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
// No answer found — needs AI
|
|
599
|
+
return { field, value: null, source: 'needs_ai' };
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return { field, value: null, source: 'unknown' };
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
module.exports = { scanPage, resolveFieldValues, classifyField };
|