halo-agent 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/browser.js +36 -26
  2. package/captcha.js +46 -27
  3. package/index.js +7 -25
  4. package/package.json +1 -1
package/browser.js CHANGED
@@ -3,10 +3,19 @@
3
3
  const { chromium } = require('playwright');
4
4
  const { execSync, spawnSync, spawn } = require('child_process');
5
5
  const fs = require('fs');
6
+ const path = require('path');
7
+ const os = require('os');
6
8
 
7
9
  const CDP_PORT = 9222;
8
10
  const CDP_URL = `http://localhost:${CDP_PORT}`;
9
11
 
12
+ // Dedicated user-data-dir for the agent's Chrome. Fully isolated from the
13
+ // user's real Chrome so it can run side-by-side without profile-lock
14
+ // collisions (which silently disable --remote-debugging-port). The
15
+ // user logs into Workday / LinkedIn here ONCE on first run and the
16
+ // session persists across runs because this dir is on disk, not /tmp.
17
+ const AGENT_PROFILE_DIR = path.join(os.homedir(), '.halo-agent', 'chrome-profile');
18
+
10
19
  /**
11
20
  * Check if Chrome is already running with remote debugging on CDP_PORT.
12
21
  */
@@ -42,12 +51,22 @@ function findChromeMac() {
42
51
  */
43
52
  function launchChrome() {
44
53
  const platform = process.platform;
45
- const flags = [
54
+
55
+ // Ensure the dedicated profile dir exists. Chrome will populate it on
56
+ // first launch (and persist cookies/logins there across runs).
57
+ try { fs.mkdirSync(AGENT_PROFILE_DIR, { recursive: true }); } catch {}
58
+
59
+ // Note: we use --user-data-dir (a full isolated dir), NOT --profile-directory
60
+ // (which is a sub-profile name inside the default user-data-dir and would
61
+ // still collide with the user's running Chrome and silently drop the
62
+ // --remote-debugging-port flag).
63
+ const baseFlags = [
46
64
  `--remote-debugging-port=${CDP_PORT}`,
47
- '--profile-directory=Default',
65
+ `--user-data-dir=${AGENT_PROFILE_DIR}`,
48
66
  '--no-first-run',
49
67
  '--no-default-browser-check',
50
- ].join(' ');
68
+ '--no-default-browser-check',
69
+ ];
51
70
 
52
71
  if (platform === 'darwin') {
53
72
  const chromePath = findChromeMac();
@@ -55,22 +74,11 @@ function launchChrome() {
55
74
  console.error('[halo-agent] Chrome not found. Install Google Chrome from https://www.google.com/chrome/');
56
75
  return;
57
76
  }
58
- // CRITICAL: on macOS, spawning the Chrome executable directly while
59
- // LaunchServices still has a Chrome instance registered (recent quit,
60
- // dock icon still showing, etc.) causes the new launch to REACTIVATE
61
- // the existing instance and silently DROP all --flags. Symptom: Chrome
62
- // opens but without --remote-debugging-port, so the agent can never
63
- // see it. `open -na` forces a fresh app instance and `--args` passes
64
- // flags through reliably — this is the only correct way to launch
65
- // Chrome with custom flags on macOS.
66
- const splitFlags = [
67
- `--remote-debugging-port=${CDP_PORT}`,
68
- '--profile-directory=Default',
69
- '--no-first-run',
70
- '--no-default-browser-check',
71
- '--restore-last-session', // bring back the windows the user just had
72
- ];
73
- spawn('open', ['-na', chromePath, '--args', ...splitFlags], {
77
+ // `open -na` forces a fresh app instance (LaunchServices would otherwise
78
+ // reactivate the existing Chrome and drop our flags). Because we point
79
+ // at a dedicated --user-data-dir, this instance does NOT collide with
80
+ // the user's everyday Chrome they can run side-by-side cleanly.
81
+ spawn('open', ['-na', chromePath, '--args', ...baseFlags], {
74
82
  detached: true,
75
83
  stdio: 'ignore',
76
84
  }).unref();
@@ -82,10 +90,12 @@ function launchChrome() {
82
90
  ];
83
91
  const chromePath = chromePaths.find(p => { try { fs.accessSync(p); return true; } catch { return false; } });
84
92
  if (!chromePath) { console.error('[halo-agent] Chrome not found on Windows.'); return; }
85
- spawn(`"${chromePath}" ${flags}`, [], { shell: true, detached: true, stdio: 'ignore' }).unref();
93
+ const winFlags = baseFlags.map(f => f.includes(' ') ? `"${f}"` : f).join(' ');
94
+ spawn(`"${chromePath}" ${winFlags}`, [], { shell: true, detached: true, stdio: 'ignore' }).unref();
86
95
  } else {
87
96
  // Linux
88
- spawn(`google-chrome ${flags}`, [], { shell: true, detached: true, stdio: 'ignore' }).unref();
97
+ const linuxFlags = baseFlags.map(f => f.includes(' ') ? `"${f}"` : f).join(' ');
98
+ spawn(`google-chrome ${linuxFlags}`, [], { shell: true, detached: true, stdio: 'ignore' }).unref();
89
99
  }
90
100
  }
91
101
 
@@ -166,11 +176,11 @@ async function connectToChrome(retries = 10, opts = {}) {
166
176
 
167
177
  console.log(''); // newline after dots
168
178
  throw new Error(
169
- `Could not connect to Chrome after ${retries} attempts.\n` +
170
- `Chrome may have launched without the --remote-debugging-port=9222 flag\n` +
171
- `(macOS reactivates an existing Chrome instance and drops flags).\n` +
172
- `Try: fully quit Chrome (Cmd+Q in every window + check the dock), then run:\n\n` +
173
- ` halo-agent start\n`
179
+ `Could not connect to the agent's Chrome on port ${CDP_PORT} after ${retries} attempts.\n` +
180
+ `The agent uses an isolated Chrome profile at ${AGENT_PROFILE_DIR}.\n` +
181
+ `If a previous run is still alive, kill it:\n\n` +
182
+ ` pkill -f "user-data-dir=${AGENT_PROFILE_DIR}"\n\n` +
183
+ `Then run \`halo-agent start\` again.\n`
174
184
  );
175
185
  }
176
186
 
package/captcha.js CHANGED
@@ -17,49 +17,56 @@ const CAPSOLVER_API = 'https://api.capsolver.com';
17
17
  * Returns { detected, type, sitekey, pageUrl }
18
18
  */
19
19
  async function detectCaptcha(page) {
20
+ // The pageUrl CapSolver wants is the top-level URL the user sees, not an
21
+ // inner iframe URL. Anchor it here so every branch returns the same thing.
22
+ const pageUrl = page.url();
23
+
20
24
  // First try the top-level frame via evaluate
21
25
  const topResult = await page.evaluate(() => {
22
- // reCAPTCHA v2 via iframe src
26
+ // reCAPTCHA v2 via iframe src — also check size=invisible in the URL
23
27
  const rcFrame = document.querySelector('iframe[src*="recaptcha/api2"], iframe[src*="google.com/recaptcha"]');
24
28
  if (rcFrame) {
25
- const match = (rcFrame.src || '').match(/[?&]k=([^&]+)/);
26
- return { detected: true, type: 'recaptcha_v2', sitekey: match?.[1] || null, pageUrl: location.href };
29
+ const src = rcFrame.src || '';
30
+ const match = src.match(/[?&]k=([^&]+)/);
31
+ const isInvisible = /[?&]size=invisible/.test(src);
32
+ return { detected: true, type: 'recaptcha_v2', sitekey: match?.[1] || null, isInvisible };
27
33
  }
28
- // reCAPTCHA via data-sitekey
34
+ // reCAPTCHA via data-sitekey — check data-size="invisible" on the element
29
35
  const rcEl = document.querySelector('.g-recaptcha[data-sitekey], [data-sitekey]:not(.h-captcha)');
30
36
  if (rcEl) {
31
- return { detected: true, type: 'recaptcha_v2', sitekey: rcEl.getAttribute('data-sitekey'), pageUrl: location.href };
37
+ const isInvisible = rcEl.getAttribute('data-size') === 'invisible';
38
+ return { detected: true, type: 'recaptcha_v2', sitekey: rcEl.getAttribute('data-sitekey'), isInvisible };
32
39
  }
33
40
  // hCAPTCHA
34
41
  const hcEl = document.querySelector('.h-captcha[data-sitekey]');
35
42
  if (hcEl) {
36
- return { detected: true, type: 'hcaptcha', sitekey: hcEl.getAttribute('data-sitekey'), pageUrl: location.href };
43
+ return { detected: true, type: 'hcaptcha', sitekey: hcEl.getAttribute('data-sitekey'), isInvisible: false };
37
44
  }
38
45
  const hcFrame = document.querySelector('iframe[src*="hcaptcha.com"]');
39
46
  if (hcFrame) {
40
47
  const match = (hcFrame.src || '').match(/[?&]sitekey=([^&]+)/);
41
- return { detected: true, type: 'hcaptcha', sitekey: match?.[1] || null, pageUrl: location.href };
48
+ return { detected: true, type: 'hcaptcha', sitekey: match?.[1] || null, isInvisible: false };
42
49
  }
43
50
  // Cloudflare
44
51
  if (document.getElementById('cf-challenge-running') || document.querySelector('.cf-browser-verification')) {
45
- return { detected: true, type: 'cloudflare', sitekey: null, pageUrl: location.href };
52
+ return { detected: true, type: 'cloudflare', sitekey: null, isInvisible: false };
46
53
  }
47
54
  return null;
48
55
  });
49
56
 
50
- if (topResult) return topResult;
57
+ if (topResult) return { ...topResult, pageUrl };
51
58
 
52
- // Search all frames for reCAPTCHA (Ashby loads it inside a sandboxed iframe)
53
- const pageUrl = page.url();
59
+ // Search all frames for reCAPTCHA (Ashby/Greenhouse load it inside a sandboxed iframe)
54
60
  for (const frame of page.frames()) {
55
61
  if (frame === page.mainFrame()) continue;
56
62
  const frameSrc = frame.url();
57
63
 
58
- // If the frame itself is a reCAPTCHA anchor frame, extract sitekey from its URL
64
+ // If the frame itself is a reCAPTCHA anchor frame, extract sitekey + invisible from URL
59
65
  if (frameSrc.includes('recaptcha/api2/anchor') || frameSrc.includes('recaptcha/enterprise/anchor')) {
60
66
  const match = frameSrc.match(/[?&]k=([^&]+)/);
67
+ const isInvisible = /[?&]size=invisible/.test(frameSrc);
61
68
  if (match) {
62
- return { detected: true, type: 'recaptcha_v2', sitekey: match[1], pageUrl };
69
+ return { detected: true, type: 'recaptcha_v2', sitekey: match[1], pageUrl, isInvisible };
63
70
  }
64
71
  }
65
72
 
@@ -67,22 +74,29 @@ async function detectCaptcha(page) {
67
74
  try {
68
75
  const frameResult = await frame.evaluate(() => {
69
76
  const rcEl = document.querySelector('.g-recaptcha[data-sitekey], [data-sitekey]:not(.h-captcha)');
70
- if (rcEl) return { sitekey: rcEl.getAttribute('data-sitekey'), type: 'recaptcha_v2' };
77
+ if (rcEl) {
78
+ return {
79
+ sitekey: rcEl.getAttribute('data-sitekey'),
80
+ type: 'recaptcha_v2',
81
+ isInvisible: rcEl.getAttribute('data-size') === 'invisible',
82
+ };
83
+ }
71
84
  const rcFrame = document.querySelector('iframe[src*="recaptcha"]');
72
85
  if (rcFrame) {
73
- const match = (rcFrame.src || '').match(/[?&]k=([^&]+)/);
74
- return match ? { sitekey: match[1], type: 'recaptcha_v2' } : null;
86
+ const src = rcFrame.src || '';
87
+ const match = src.match(/[?&]k=([^&]+)/);
88
+ return match ? { sitekey: match[1], type: 'recaptcha_v2', isInvisible: /[?&]size=invisible/.test(src) } : null;
75
89
  }
76
90
  return null;
77
91
  }).catch(() => null);
78
92
 
79
93
  if (frameResult?.sitekey) {
80
- return { detected: true, type: frameResult.type, sitekey: frameResult.sitekey, pageUrl };
94
+ return { detected: true, type: frameResult.type, sitekey: frameResult.sitekey, pageUrl, isInvisible: !!frameResult.isInvisible };
81
95
  }
82
96
  } catch {}
83
97
  }
84
98
 
85
- return { detected: false, type: null, sitekey: null, pageUrl };
99
+ return { detected: false, type: null, sitekey: null, pageUrl, isInvisible: false };
86
100
  }
87
101
 
88
102
  /**
@@ -101,21 +115,26 @@ async function solveCaptcha(captchaInfo, apiKey) {
101
115
  ? 'HCaptchaTaskProxyless'
102
116
  : 'ReCaptchaV2TaskProxyless';
103
117
 
104
- console.log(`[captcha] Submitting ${taskType} task to CapSolver (sitekey: ${captchaInfo.sitekey.slice(0, 12)}...)`);
118
+ // CapSolver requires `isInvisible: true` for invisible reCAPTCHA — sending
119
+ // a normal v2 task for an invisible sitekey returns
120
+ // "Invalid input, please check captcha type or pageUrl and invisible".
121
+ const task = {
122
+ type: taskType,
123
+ websiteURL: captchaInfo.pageUrl,
124
+ websiteKey: captchaInfo.sitekey,
125
+ };
126
+ if (captchaInfo.type === 'recaptcha_v2' && captchaInfo.isInvisible) {
127
+ task.isInvisible = true;
128
+ }
129
+
130
+ console.log(`[captcha] Submitting ${taskType}${task.isInvisible ? ' (invisible)' : ''} task to CapSolver (sitekey: ${captchaInfo.sitekey.slice(0, 12)}..., url: ${captchaInfo.pageUrl})`);
105
131
 
106
132
  let taskId;
107
133
  try {
108
134
  const createRes = await fetch(`${CAPSOLVER_API}/createTask`, {
109
135
  method: 'POST',
110
136
  headers: { 'Content-Type': 'application/json' },
111
- body: JSON.stringify({
112
- clientKey: apiKey,
113
- task: {
114
- type: taskType,
115
- websiteURL: captchaInfo.pageUrl,
116
- websiteKey: captchaInfo.sitekey,
117
- },
118
- }),
137
+ body: JSON.stringify({ clientKey: apiKey, task }),
119
138
  });
120
139
  const createData = await createRes.json();
121
140
  if (createData.errorId !== 0) {
package/index.js CHANGED
@@ -453,34 +453,16 @@ async function runStart() {
453
453
  console.log('\nHALO Agent starting...');
454
454
  console.log('Connecting to your Chrome browser...\n');
455
455
 
456
- // Pre-check: Chrome must be running with --remote-debugging-port=9222 for
457
- // CDP to work. If it's running WITHOUT that flag, Chrome can't have it
458
- // added retroactively it's a Chrome architecture limit. We offer to
459
- // restart Chrome gracefully (preserves tabs via Chrome's "Continue where
460
- // you left off" setting) and relaunch with the flag.
461
- const { isChromeDebuggable } = require('./browser');
462
- // weRestartedIt: tracks whether THIS process launched/restarted Chrome.
463
- // When true, connectToChrome must NOT launch again — that's the double-
464
- // launch bug that was spawning extra Chrome windows.
465
- let weRestartedIt = false;
466
- const alreadyDebuggable = await isChromeDebuggable();
467
- if (!alreadyDebuggable) {
468
- const needsRestart = await detectChromeRunningWithoutDebug();
469
- if (needsRestart) {
470
- const ok = await offerChromeRestart();
471
- if (!ok) {
472
- console.error('\nOK — leaving Chrome alone. Run `halo-agent start` again after you’ve quit Chrome.');
473
- process.exit(1);
474
- }
475
- await restartChromeWithDebugFlag();
476
- weRestartedIt = true;
477
- }
478
- }
479
-
456
+ // The agent runs its own isolated Chrome instance (separate --user-data-dir
457
+ // at ~/.halo-agent/chrome-profile) so it never collides with the user's
458
+ // everyday Chrome. First launch will be a blank profile the user logs
459
+ // into Workday / LinkedIn ONCE in the agent's Chrome and those sessions
460
+ // persist across runs. No need to detect or restart the user's Chrome.
480
461
  let chromeConn;
481
462
  try {
482
- chromeConn = await connectToChrome(10, { skipLaunch: weRestartedIt });
463
+ chromeConn = await connectToChrome(10);
483
464
  console.log('\nConnected to Chrome. Polling for queued jobs...');
465
+ console.log('First time? Log into Workday/LinkedIn in this Chrome window — sessions persist.');
484
466
  console.log('Go to your HALO dashboard and click "Auto-Apply" on any job.\n');
485
467
  } catch (err) {
486
468
  console.error('\nCould not connect to Chrome:', err.message);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "halo-agent",
3
- "version": "1.2.2",
3
+ "version": "1.3.1",
4
4
  "description": "HALO local apply agent — auto-fills job applications using your real Chrome session",
5
5
  "main": "index.js",
6
6
  "bin": {