website-api 1.1.3 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +209 -1
  3. package/dist/src/capabilities/browser.d.ts +16 -2
  4. package/dist/src/capabilities/browser.js +158 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +138 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
  32. package/dist/src/sites/claude.ai/index.js +42 -1
  33. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  34. package/dist/src/sites/cursor.com/index.js +12 -1
  35. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  36. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  37. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  38. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  39. package/dist/src/sites/gemini.google.com/index.js +80 -1
  40. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  41. package/dist/src/sites/google.com/google-helpers.js +255 -1
  42. package/dist/src/sites/google.com/index.js +253 -1
  43. package/dist/src/sites/google.com/openapi.yaml +59 -0
  44. package/dist/src/sites/microcenter.com/openapi.yaml +44 -0
  45. package/dist/src/sites/ollama.com/index.js +43 -1
  46. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  47. package/dist/src/sites/perplexity.ai/index.js +253 -1
  48. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  49. package/dist/src/sites/pseg.com/index.js +243 -1
  50. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  51. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  52. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  53. package/dist/src/sites/voice.google.com/index.js +122 -0
  54. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  55. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  56. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  57. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  58. package/dist/src/sites/zillow.com/index.js +303 -0
  59. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  60. package/dist/src/types.d.ts +7 -0
  61. package/dist/src/types.js +1 -1
  62. package/dist/src/util/args-parser.js +150 -1
  63. package/dist/src/util/google-json.js +74 -1
  64. package/dist/src/website-api.d.ts +7 -7
  65. package/dist/src/website-api.js +13 -1
  66. package/package.json +38 -10
@@ -1 +1,32 @@
1
- import t from"node:fs/promises";import e from"node:path";export function createSaver(r,o=process.cwd()){const n=e.resolve(o,r??".");let i=!1;return async function(r,o){i||(await t.mkdir(n,{recursive:!0}),i=!0);const s=e.join(n,e.basename(r));return await t.writeFile(s,o),s}}export function assertNotHtml(t,e){const r=String(t??"").trimStart();if(/^<!doctype html/i.test(r)||/^<html/i.test(r))throw new Error(`Download for ${e} returned HTML instead of data. The session may have expired.`);return t}
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ /**
4
+ * Builds a `save(filename, content)` function bound to a target directory.
5
+ * Creates the directory on first write and returns the absolute path written.
6
+ * `outDir` of null means "current working directory".
7
+ */
8
+ export function createSaver(outDir, cwd = process.cwd()) {
9
+ const targetDir = path.resolve(cwd, outDir ?? ".");
10
+ let ensured = false;
11
+ return async function save(filename, content) {
12
+ if (!ensured) {
13
+ await fs.mkdir(targetDir, { recursive: true });
14
+ ensured = true;
15
+ }
16
+ // Defend against path traversal: only the basename is honored.
17
+ const filePath = path.join(targetDir, path.basename(filename));
18
+ await fs.writeFile(filePath, content);
19
+ return filePath;
20
+ };
21
+ }
22
+ /**
23
+ * Guards a downloaded payload that should be data but may be an HTML error page
24
+ * (a common symptom of an expired session). Throws a clear, actionable error.
25
+ */
26
+ export function assertNotHtml(text, label) {
27
+ const trimmed = String(text ?? "").trimStart();
28
+ if (/^<!doctype html/i.test(trimmed) || /^<html/i.test(trimmed)) {
29
+ throw new Error(`Download for ${label} returned HTML instead of data. The session may have expired.`);
30
+ }
31
+ return text;
32
+ }
@@ -1 +1,62 @@
1
- const n={languages:["en-US","en"]};export function buildFingerprintScript(n){return`(() => {\n const cfg = ${JSON.stringify(n)};\n const def = (obj, prop, get) => {\n try { Object.defineProperty(obj, prop, { get, configurable: true }); } catch (_) {}\n };\n // Hide the automation flag.\n def(navigator, "webdriver", () => undefined);\n if (cfg.languages) {\n def(navigator, "languages", () => cfg.languages);\n }\n if (cfg.platform) def(navigator, "platform", () => cfg.platform);\n if (typeof cfg.hardwareConcurrency === "number") {\n def(navigator, "hardwareConcurrency", () => cfg.hardwareConcurrency);\n }\n if (typeof cfg.deviceMemory === "number") {\n def(navigator, "deviceMemory", () => cfg.deviceMemory);\n }\n // Present a non-empty plugins list, as headless/automation often reports none.\n try {\n if (!navigator.plugins || navigator.plugins.length === 0) {\n def(navigator, "plugins", () => [1, 2, 3, 4, 5]);\n }\n } catch (_) {}\n // Ensure window.chrome exists, as some sites probe for it.\n try {\n if (!window.chrome) { window.chrome = { runtime: {} }; }\n } catch (_) {}\n })();`}export function resolveFingerprint(e,r){if(!1===e)return null;const t="stealth"===e?{...n}:{...n,...e};return r&&!t.userAgent&&(t.userAgent=r),t}export async function applyFingerprint(n,e,r){const t=resolveFingerprint(e,r);t&&await n.addInitScript(buildFingerprintScript(t))}
1
+ /**
2
+ * The default "stealth" profile: minimal, well-tested anti-bot evasions that
3
+ * keep an attached real-Chrome session from being flagged as automated.
4
+ */
5
+ const STEALTH_DEFAULTS = {
6
+ languages: ["en-US", "en"],
7
+ };
8
+ /**
9
+ * Builds the init script that shapes the page's fingerprint. Pure and exported
10
+ * so it can be unit-tested without a real browser.
11
+ */
12
+ export function buildFingerprintScript(config) {
13
+ // Serialized into the page; only plain data from `config` is interpolated.
14
+ const cfg = JSON.stringify(config);
15
+ return `(() => {
16
+ const cfg = ${cfg};
17
+ const def = (obj, prop, get) => {
18
+ try { Object.defineProperty(obj, prop, { get, configurable: true }); } catch (_) {}
19
+ };
20
+ // Hide the automation flag.
21
+ def(navigator, "webdriver", () => undefined);
22
+ if (cfg.languages) {
23
+ def(navigator, "languages", () => cfg.languages);
24
+ }
25
+ if (cfg.platform) def(navigator, "platform", () => cfg.platform);
26
+ if (typeof cfg.hardwareConcurrency === "number") {
27
+ def(navigator, "hardwareConcurrency", () => cfg.hardwareConcurrency);
28
+ }
29
+ if (typeof cfg.deviceMemory === "number") {
30
+ def(navigator, "deviceMemory", () => cfg.deviceMemory);
31
+ }
32
+ // Present a non-empty plugins list, as headless/automation often reports none.
33
+ try {
34
+ if (!navigator.plugins || navigator.plugins.length === 0) {
35
+ def(navigator, "plugins", () => [1, 2, 3, 4, 5]);
36
+ }
37
+ } catch (_) {}
38
+ // Ensure window.chrome exists, as some sites probe for it.
39
+ try {
40
+ if (!window.chrome) { window.chrome = { runtime: {} }; }
41
+ } catch (_) {}
42
+ })();`;
43
+ }
44
+ /** Resolves the effective fingerprint config from the site's option. */
45
+ export function resolveFingerprint(option, userAgent) {
46
+ if (option === false)
47
+ return null;
48
+ const base = option === "stealth" ? { ...STEALTH_DEFAULTS } : { ...STEALTH_DEFAULTS, ...option };
49
+ if (userAgent && !base.userAgent)
50
+ base.userAgent = userAgent;
51
+ return base;
52
+ }
53
+ /**
54
+ * Applies the fingerprint to a page via an init script (runs before page
55
+ * scripts on the next navigation). No-op when fingerprinting is disabled.
56
+ */
57
+ export async function applyFingerprint(page, option, userAgent) {
58
+ const config = resolveFingerprint(option, userAgent);
59
+ if (!config)
60
+ return;
61
+ await page.addInitScript(buildFingerprintScript(config));
62
+ }
@@ -1 +1,101 @@
1
- export function parseSSE(t){const e=[],s=t.replace(/\r\n/g,"\n");for(const t of s.split("\n\n")){const s=[];for(const e of t.split("\n"))e.startsWith("data:")&&s.push(e.slice(5).trimStart());if(s.length)try{const t=JSON.parse(s.join("\n"));t&&("object"!=typeof t||Object.keys(t).length>0)&&e.push(t)}catch{}}return e}export function createHttp(t){const e=t.fetchImpl??fetch;function s(e){const s=new Headers(e?.headers);if(!s.has("Cookie")){const e=t.cookieString();e&&s.set("Cookie",e)}return s.has("User-Agent")||s.set("User-Agent",t.userAgent()),s}async function n(n,a){const r={...a,headers:s(a)};t.debug&&console.log("[debug] Request:",{url:n,init:r});const o=await e(n,r),c=await o.text();if(t.debug&&console.log("[debug] Response:",{url:n,status:o.status,statusText:o.statusText,headers:Array.from(o.headers.entries()),body:c}),!o.ok)throw new Error(`HTTP ${o.status}: ${o.statusText}`);return{response:o,text:c}}return{raw:n,text:async(t,e)=>(await n(t,e)).text,async html(t,e){const s=new Headers(e?.headers);return s.has("Accept")||s.set("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),(await n(t,{...e,headers:s})).text},async json(t,e){const s=new Headers(e?.headers);s.has("Accept")||s.set("Accept","application/json, text/plain, */*");const{text:a}=await n(t,{...e,headers:s});try{return JSON.parse(a)}catch{return{response:a}}},async sse(t,e){const s=new Headers(e?.headers);s.has("Accept")||s.set("Accept","text/event-stream");const{response:a,text:r}=await n(t,{...e,headers:s});return{status:a.status,contentType:a.headers.get("content-type")||"",frames:parseSSE(r),raw:r}}}}
1
+ /** Parses an SSE body into the JSON payloads of its `data:` frames. */
2
+ export function parseSSE(raw) {
3
+ const frames = [];
4
+ const normalized = raw.replace(/\r\n/g, "\n");
5
+ for (const block of normalized.split("\n\n")) {
6
+ const dataLines = [];
7
+ for (const line of block.split("\n")) {
8
+ if (line.startsWith("data:"))
9
+ dataLines.push(line.slice(5).trimStart());
10
+ }
11
+ if (!dataLines.length)
12
+ continue;
13
+ try {
14
+ const data = JSON.parse(dataLines.join("\n"));
15
+ if (data && (typeof data !== "object" || Object.keys(data).length > 0)) {
16
+ frames.push(data);
17
+ }
18
+ }
19
+ catch {
20
+ // Skip non-JSON frames (comments, keep-alives, partials).
21
+ }
22
+ }
23
+ return frames;
24
+ }
25
+ /**
26
+ * Builds the HTTP capability. Every request merges the resolved Chrome cookie
27
+ * string and User-Agent unless the caller already set those headers.
28
+ */
29
+ export function createHttp(deps) {
30
+ const fetchImpl = deps.fetchImpl ?? fetch;
31
+ function mergeHeaders(init) {
32
+ const headers = new Headers(init?.headers);
33
+ if (!headers.has("Cookie")) {
34
+ const cookie = deps.cookieString();
35
+ if (cookie)
36
+ headers.set("Cookie", cookie);
37
+ }
38
+ if (!headers.has("User-Agent")) {
39
+ headers.set("User-Agent", deps.userAgent());
40
+ }
41
+ return headers;
42
+ }
43
+ async function raw(url, init) {
44
+ const request = { ...init, headers: mergeHeaders(init) };
45
+ if (deps.debug)
46
+ console.log("[debug] Request:", { url, init: request });
47
+ const response = await fetchImpl(url, request);
48
+ const text = await response.text();
49
+ if (deps.debug) {
50
+ console.log("[debug] Response:", {
51
+ url,
52
+ status: response.status,
53
+ statusText: response.statusText,
54
+ headers: Array.from(response.headers.entries()),
55
+ body: text,
56
+ });
57
+ }
58
+ if (!response.ok) {
59
+ const reason = response.statusText || text.slice(0, 200) || "request failed";
60
+ throw new Error(`HTTP ${response.status}: ${reason}`);
61
+ }
62
+ return { response, text };
63
+ }
64
+ return {
65
+ raw,
66
+ async text(url, init) {
67
+ return (await raw(url, init)).text;
68
+ },
69
+ async html(url, init) {
70
+ const headers = new Headers(init?.headers);
71
+ if (!headers.has("Accept")) {
72
+ headers.set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
73
+ }
74
+ return (await raw(url, { ...init, headers })).text;
75
+ },
76
+ async json(url, init) {
77
+ const headers = new Headers(init?.headers);
78
+ if (!headers.has("Accept"))
79
+ headers.set("Accept", "application/json, text/plain, */*");
80
+ const { text } = await raw(url, { ...init, headers });
81
+ try {
82
+ return JSON.parse(text);
83
+ }
84
+ catch {
85
+ return { response: text };
86
+ }
87
+ },
88
+ async sse(url, init) {
89
+ const headers = new Headers(init?.headers);
90
+ if (!headers.has("Accept"))
91
+ headers.set("Accept", "text/event-stream");
92
+ const { response, text } = await raw(url, { ...init, headers });
93
+ return {
94
+ status: response.status,
95
+ contentType: response.headers.get("content-type") || "",
96
+ frames: parseSSE(text),
97
+ raw: text,
98
+ };
99
+ },
100
+ };
101
+ }
@@ -1 +1,185 @@
1
- const t=['input[type="password"]'],e=['button[type="submit"]','input[type="submit"]'];async function i(t,e){for(const i of e)try{if(await t.locator(i).first().isVisible())return i}catch{}return null}function o(t,...e){const i=[t,...e.flatMap(t=>t??[])];return Array.from(new Set(i.filter(Boolean)))}export async function performFormLogin(a){const{page:r,emailSelector:n,passwordSelector:l,submitButtonSelector:s,delayMs:c=1e3,intendedUrl:w,expectedRedirectUrlPattern:f=w,debug:g=!1,pwdSelector:d='input[type="password"]',dashboardSelectors:u=[]}=a;if(w){if(!r.url().includes(w)){g&&console.log(`[LoginHelper] Navigating to intended URL: ${w}`);try{await r.goto(w,{waitUntil:"domcontentloaded"})}catch(t){g&&console.warn(`[LoginHelper] Warning navigating to ${w}:`,t)}}}try{await r.waitForLoadState("domcontentloaded")}catch{}let p=!1,m=r;g&&console.log("[LoginHelper] Waiting for session state to settle...");const b=Date.now();let y=!1;for(;Date.now()-b<15e3;){let t=!1;for(const e of u)try{if(await r.locator(e).first().isVisible()){p=!1,t=!0;break}}catch{}if(t){y=!0;break}try{if(await r.locator(d).first().isVisible()){p=!0,m=r,y=!0;break}}catch{}let e=!1;for(const t of r.frames())try{if(await t.locator(d).first().isVisible()){p=!0,m=t,e=!0;break}}catch{}if(e){y=!0;break}await r.waitForTimeout(500)}if(!y)try{await r.waitForSelector(n,{state:"visible",timeout:1e3}),p=!0,m=r}catch{try{await r.waitForSelector(l,{state:"visible",timeout:1e3}),p=!0,m=r}catch{p=!1}}if(!p)return g&&console.log("[LoginHelper] Already logged in. Skipping login flow."),!1;let L=a.emailValue,h=a.passwordValue;if((!L||!h)&&a.getCredentials){const t=a.getCredentials();L=t.username,h=t.password}if(!L||!h)throw new Error("[LoginHelper] Login required but no credentials were provided");const S=await i(m,o(n,a.usernameSelectors))||n,k=await i(m,o(l,a.passwordSelectors,t))||l,H=await i(m,o(s,a.submitSelectors,e))||s;if(g&&console.log(`[LoginHelper] Login form detected. email='${S}', password='${k}', submit='${H}'`),await m.fill(S,L),await m.fill(k,h),c>0&&(g&&console.log(`[LoginHelper] Waiting ${c}ms before submission...`),await m.waitForTimeout(c)),g&&console.log(`[LoginHelper] Clicking submit '${H}'...`),await m.click(H),f){g&&console.log(`[LoginHelper] Waiting for redirection matching '${f}'...`);try{await r.waitForURL(f,{timeout:15e3})}catch{try{await r.waitForNavigation({waitUntil:"networkidle",timeout:8e3})}catch{}}}return!0}
1
+ // IMPORTANT: This helper is universal and shared across every site. Do NOT
2
+ // hardcode site-specific selectors here. Sites that need extra resilience pass
3
+ // their own fallbacks via the *Selectors arrays.
4
+ //
5
+ // SCOPE: single-page username+password forms (both fields visible at once). It
6
+ // does NOT handle identifier-first multi-step flows (email -> Next -> password,
7
+ // e.g. Google/Microsoft/Okta) or 2FA/OTP — those need a dedicated LoginStrategy.
8
+ /**
9
+ * Genuinely cross-site-safe fallbacks. A visible password input is almost
10
+ * always THE password input; a submit button is rarely ambiguous. There is no
11
+ * safe universal *username* fallback (a bare text input could be search, OTP,
12
+ * etc.), so username relies solely on adapter-provided selectors.
13
+ */
14
+ const UNIVERSAL_PASSWORD_SELECTORS = ['input[type="password"]'];
15
+ const UNIVERSAL_SUBMIT_SELECTORS = ['button[type="submit"]', 'input[type="submit"]'];
16
+ /** Returns the first visible selector from the candidate list, or null. */
17
+ async function firstVisibleSelector(frame, candidates) {
18
+ for (const candidate of candidates) {
19
+ try {
20
+ if (await frame.locator(candidate).first().isVisible())
21
+ return candidate;
22
+ }
23
+ catch { }
24
+ }
25
+ return null;
26
+ }
27
+ /** Builds an ordered, de-duplicated candidate list: primary, then fallbacks. */
28
+ function buildCandidates(primary, ...fallbackGroups) {
29
+ const all = [primary, ...fallbackGroups.flatMap((group) => group ?? [])];
30
+ return Array.from(new Set(all.filter(Boolean)));
31
+ }
32
+ /**
33
+ * Automates a form login via Playwright: detects whether login is required,
34
+ * fills credentials, waits, submits, and awaits redirection. Idempotent — when
35
+ * a dashboard is already showing it skips login and returns false.
36
+ */
37
+ export async function performFormLogin(options) {
38
+ const { page, emailSelector, passwordSelector, submitButtonSelector, delayMs = 1000, intendedUrl, expectedRedirectUrlPattern = intendedUrl, debug = false, pwdSelector = 'input[type="password"]', dashboardSelectors = [], } = options;
39
+ if (intendedUrl) {
40
+ const currentUrl = page.url();
41
+ if (!currentUrl.includes(intendedUrl)) {
42
+ if (debug)
43
+ console.log(`[LoginHelper] Navigating to intended URL: ${intendedUrl}`);
44
+ try {
45
+ await page.goto(intendedUrl, { waitUntil: "domcontentloaded" });
46
+ }
47
+ catch (err) {
48
+ if (debug)
49
+ console.warn(`[LoginHelper] Warning navigating to ${intendedUrl}:`, err);
50
+ }
51
+ }
52
+ }
53
+ try {
54
+ await page.waitForLoadState("domcontentloaded");
55
+ }
56
+ catch (err) {
57
+ if (debug)
58
+ console.warn(`[LoginHelper] waitForLoadState failed (continuing):`, err);
59
+ }
60
+ let needsLogin = false;
61
+ let activeFrame = page;
62
+ if (debug)
63
+ console.log(`[LoginHelper] Waiting for session state to settle...`);
64
+ const startTime = Date.now();
65
+ const maxWaitMs = 15000;
66
+ let settled = false;
67
+ while (Date.now() - startTime < maxWaitMs) {
68
+ if (typeof page.isClosed === "function" && page.isClosed()) {
69
+ throw new Error("[LoginHelper] Page closed while waiting for the session state to settle");
70
+ }
71
+ // A. Dashboard markers win.
72
+ let foundDashboard = false;
73
+ for (const dashSel of dashboardSelectors) {
74
+ try {
75
+ if (await page.locator(dashSel).first().isVisible()) {
76
+ needsLogin = false;
77
+ foundDashboard = true;
78
+ break;
79
+ }
80
+ }
81
+ catch { }
82
+ }
83
+ if (foundDashboard) {
84
+ settled = true;
85
+ break;
86
+ }
87
+ // B. Login form in the main frame?
88
+ try {
89
+ if (await page.locator(pwdSelector).first().isVisible()) {
90
+ needsLogin = true;
91
+ activeFrame = page;
92
+ settled = true;
93
+ break;
94
+ }
95
+ }
96
+ catch { }
97
+ // C. Login form in any subframe?
98
+ let foundInFrame = false;
99
+ for (const frame of page.frames()) {
100
+ try {
101
+ if (await frame.locator(pwdSelector).first().isVisible()) {
102
+ needsLogin = true;
103
+ activeFrame = frame;
104
+ foundInFrame = true;
105
+ break;
106
+ }
107
+ }
108
+ catch { }
109
+ }
110
+ if (foundInFrame) {
111
+ settled = true;
112
+ break;
113
+ }
114
+ await page.waitForTimeout(500);
115
+ }
116
+ if (!settled) {
117
+ try {
118
+ await page.waitForSelector(emailSelector, { state: "visible", timeout: 1000 });
119
+ needsLogin = true;
120
+ activeFrame = page;
121
+ }
122
+ catch {
123
+ try {
124
+ await page.waitForSelector(passwordSelector, { state: "visible", timeout: 1000 });
125
+ needsLogin = true;
126
+ activeFrame = page;
127
+ }
128
+ catch {
129
+ needsLogin = false;
130
+ }
131
+ }
132
+ }
133
+ if (!needsLogin) {
134
+ if (debug)
135
+ console.log(`[LoginHelper] Already logged in. Skipping login flow.`);
136
+ return false;
137
+ }
138
+ // Resolve credentials lazily, only now that login is confirmed necessary.
139
+ let emailValue = options.emailValue;
140
+ let passwordValue = options.passwordValue;
141
+ if ((!emailValue || !passwordValue) && options.getCredentials) {
142
+ const credentials = options.getCredentials();
143
+ emailValue = credentials.username;
144
+ passwordValue = credentials.password;
145
+ }
146
+ if (!emailValue || !passwordValue) {
147
+ throw new Error("[LoginHelper] Login required but no credentials were provided");
148
+ }
149
+ const activeEmailSelector = (await firstVisibleSelector(activeFrame, buildCandidates(emailSelector, options.usernameSelectors))) ||
150
+ emailSelector;
151
+ const activePasswordSelector = (await firstVisibleSelector(activeFrame, buildCandidates(passwordSelector, options.passwordSelectors, UNIVERSAL_PASSWORD_SELECTORS))) || passwordSelector;
152
+ const activeSubmitSelector = (await firstVisibleSelector(activeFrame, buildCandidates(submitButtonSelector, options.submitSelectors, UNIVERSAL_SUBMIT_SELECTORS))) || submitButtonSelector;
153
+ if (debug) {
154
+ console.log(`[LoginHelper] Login form detected. email='${activeEmailSelector}', password='${activePasswordSelector}', submit='${activeSubmitSelector}'`);
155
+ }
156
+ await activeFrame.fill(activeEmailSelector, emailValue);
157
+ await activeFrame.fill(activePasswordSelector, passwordValue);
158
+ if (delayMs > 0) {
159
+ if (debug)
160
+ console.log(`[LoginHelper] Waiting ${delayMs}ms before submission...`);
161
+ await activeFrame.waitForTimeout(delayMs);
162
+ }
163
+ if (debug)
164
+ console.log(`[LoginHelper] Clicking submit '${activeSubmitSelector}'...`);
165
+ await activeFrame.click(activeSubmitSelector);
166
+ if (expectedRedirectUrlPattern) {
167
+ if (debug)
168
+ console.log(`[LoginHelper] Waiting for redirection matching '${expectedRedirectUrlPattern}'...`);
169
+ try {
170
+ await page.waitForURL(expectedRedirectUrlPattern, { timeout: 15000 });
171
+ }
172
+ catch (urlErr) {
173
+ if (debug)
174
+ console.warn(`[LoginHelper] No redirect to '${expectedRedirectUrlPattern}':`, urlErr);
175
+ try {
176
+ await page.waitForNavigation({ waitUntil: "networkidle", timeout: 8000 });
177
+ }
178
+ catch (navErr) {
179
+ if (debug)
180
+ console.warn(`[LoginHelper] No post-submit navigation either (continuing):`, navErr);
181
+ }
182
+ }
183
+ }
184
+ return true;
185
+ }
@@ -1 +1,36 @@
1
- import{performFormLogin as e}from"./login-helper.js";export class FormLoginStrategy{config;constructor(e){this.config=e}ensureLoggedIn(t){return e({...this.config,page:t.page,debug:t.debug,getCredentials:t.getCredentials})}}export function isLoginStrategy(e){return"object"==typeof e&&null!==e&&"function"==typeof e.ensureLoggedIn}export function toLoginStrategy(e){return isLoginStrategy(e)?e:new FormLoginStrategy(e)}
1
+ // Login strategies (Strategy pattern).
2
+ //
3
+ // A LoginStrategy encapsulates *how* a site authenticates. Sites declare a
4
+ // strategy declaratively (selectors / URLs only) — or just pass a plain
5
+ // FormLoginConfig as `auth` and let the framework wrap it. The runtime drives
6
+ // it from the browser capability; sites never resolve credentials themselves.
7
+ import { performFormLogin } from "./login-helper.js";
8
+ /**
9
+ * Standard username/password form login. Wraps the universal `performFormLogin`
10
+ * engine, which auto-detects whether the form is present and skips login when a
11
+ * dashboard is already showing.
12
+ */
13
+ export class FormLoginStrategy {
14
+ config;
15
+ constructor(config) {
16
+ this.config = config;
17
+ }
18
+ ensureLoggedIn(ctx) {
19
+ return performFormLogin({
20
+ ...this.config,
21
+ page: ctx.page,
22
+ debug: ctx.debug,
23
+ getCredentials: ctx.getCredentials,
24
+ });
25
+ }
26
+ }
27
+ /** Type guard: distinguishes a ready strategy from a plain config object. */
28
+ export function isLoginStrategy(value) {
29
+ return (typeof value === "object" &&
30
+ value !== null &&
31
+ typeof value.ensureLoggedIn === "function");
32
+ }
33
+ /** Normalizes `auth` (config or strategy) into a LoginStrategy. */
34
+ export function toLoginStrategy(auth) {
35
+ return isLoginStrategy(auth) ? auth : new FormLoginStrategy(auth);
36
+ }
@@ -0,0 +1,62 @@
1
+ import type { Page } from "playwright-core";
2
+ /**
3
+ * PerimeterX / HUMAN bot-challenge handling.
4
+ *
5
+ * Sites fronted by PerimeterX (e.g. bloomberg.com) intermittently interrupt a
6
+ * navigation with one of two interstitials:
7
+ *
8
+ * • "press & hold" — a `#px-captcha` widget the user must hold for a few
9
+ * seconds. Solvable from a real (CDP-attached) browser by synthesizing a
10
+ * human-like press-and-hold.
11
+ * • "hard block" — a flat deny page carrying a "Block reference ID". Not
12
+ * solvable by interaction; the IP reputation has to cool down.
13
+ *
14
+ * This module auto-detects which one is showing and solves the press-and-hold.
15
+ */
16
+ export type ChallengeKind = "none" | "press-and-hold" | "hard-block";
17
+ export interface ChallengeStatus {
18
+ kind: ChallengeKind;
19
+ /** Block reference ID, when a hard block is shown. */
20
+ referenceId?: string;
21
+ /**
22
+ * True when the press-&-hold renders inside the cross-origin "Human
23
+ * verification challenge" iframe — HUMAN's hardened deployment (e.g. Zillow).
24
+ * These analyze pointer-event biometrics and reject CDP-synthesized holds, so
25
+ * `solvePerimeterX` will usually fail and you must fall back to
26
+ * {@link waitForManualSolve} (a real human hold in the attached browser).
27
+ * The lighter inline variant (e.g. Bloomberg) is solvable synthetically.
28
+ */
29
+ hardened?: boolean;
30
+ }
31
+ export interface SolveOptions {
32
+ /** Total hold duration in ms. PerimeterX fills over a few seconds; ~11s is safe. */
33
+ holdMs?: number;
34
+ /** How many detect→solve cycles to attempt before giving up. */
35
+ attempts?: number;
36
+ /** Optional logger (e.g. console.log when ctx.debug). */
37
+ log?: (msg: string) => void;
38
+ }
39
+ export interface SolveResult extends ChallengeStatus {
40
+ /** True once no challenge remains on the page. */
41
+ cleared: boolean;
42
+ /** Number of press-and-hold attempts performed. */
43
+ tries: number;
44
+ }
45
+ /** Inspect the current page and classify any PerimeterX interstitial. */
46
+ export declare function detectChallenge(page: Page): Promise<ChallengeStatus>;
47
+ /**
48
+ * Detect and solve a PerimeterX challenge on the current page. Returns once the
49
+ * page is clear, after exhausting attempts, or immediately on a hard block
50
+ * (which interaction cannot resolve).
51
+ */
52
+ export declare function solvePerimeterX(page: Page, opts?: SolveOptions): Promise<SolveResult>;
53
+ /**
54
+ * Fallback for hardened HUMAN challenges that reject synthetic holds: poll until
55
+ * the page is clear because a human completed the press-&-hold in the attached
56
+ * browser (or the cookie otherwise resolved). Returns true once cleared.
57
+ */
58
+ export declare function waitForManualSolve(page: Page, opts?: {
59
+ timeoutMs?: number;
60
+ pollMs?: number;
61
+ log?: (msg: string) => void;
62
+ }): Promise<boolean>;
@@ -0,0 +1,112 @@
1
+ const REL = (ms) => new Promise((r) => setTimeout(r, ms));
2
+ /** Inspect the current page and classify any PerimeterX interstitial. */
3
+ export async function detectChallenge(page) {
4
+ return page.evaluate(() => {
5
+ const px = document.querySelector("#px-captcha");
6
+ if (px) {
7
+ // Hardened HUMAN renders the button inside a cross-origin iframe titled
8
+ // "Human verification challenge"; the lighter variant is inline.
9
+ const hardened = !!px.querySelector('iframe[title*="erification" i], iframe[title*="uman" i]');
10
+ return { kind: "press-and-hold", hardened };
11
+ }
12
+ const bodyText = document.body?.innerText ?? "";
13
+ const blocked = /detected unusual activity|Are you a robot|Block reference ID/i.test(bodyText) ||
14
+ /Are you a robot/i.test(document.title);
15
+ if (blocked) {
16
+ const m = bodyText.match(/Block reference ID:\s*([\w-]+)/i);
17
+ return { kind: "hard-block", referenceId: m?.[1] };
18
+ }
19
+ return { kind: "none" };
20
+ });
21
+ }
22
+ /** Center the mouse on `#px-captcha`, press, hold with jitter, release. */
23
+ async function pressAndHold(page, holdMs) {
24
+ const box = await page.evaluate(() => {
25
+ const el = document.querySelector("#px-captcha");
26
+ if (!el)
27
+ return null;
28
+ const r = el.getBoundingClientRect();
29
+ return { cx: Math.round(r.x + r.width / 2), cy: Math.round(r.y + r.height / 2) };
30
+ });
31
+ if (!box)
32
+ return false;
33
+ const { cx, cy } = box;
34
+ // Approach the button along a short, non-linear path so it doesn't read as a teleport.
35
+ await page.mouse.move(cx - 240, cy + 130, { steps: 8 });
36
+ await page.mouse.move(cx - 50, cy + 35, { steps: 6 });
37
+ await page.mouse.move(cx, cy, { steps: 5 });
38
+ await page.mouse.down();
39
+ const start = Date.now();
40
+ while (Date.now() - start < holdMs) {
41
+ const dx = Math.floor(Math.random() * 3) - 1;
42
+ const dy = Math.floor(Math.random() * 3) - 1;
43
+ await page.mouse.move(cx + dx, cy + dy);
44
+ await REL(450 + Math.floor(Math.random() * 250));
45
+ }
46
+ await page.mouse.up();
47
+ return true;
48
+ }
49
+ /**
50
+ * Detect and solve a PerimeterX challenge on the current page. Returns once the
51
+ * page is clear, after exhausting attempts, or immediately on a hard block
52
+ * (which interaction cannot resolve).
53
+ */
54
+ export async function solvePerimeterX(page, opts = {}) {
55
+ const holdMs = opts.holdMs ?? 11_000;
56
+ const attempts = opts.attempts ?? 2;
57
+ const log = opts.log ?? (() => { });
58
+ let status = await detectChallenge(page);
59
+ if (status.kind === "none")
60
+ return { ...status, cleared: true, tries: 0 };
61
+ if (status.kind === "hard-block") {
62
+ log(`[perimeterx] hard block (reference ${status.referenceId ?? "?"}) — cannot solve by interaction.`);
63
+ return { ...status, cleared: false, tries: 0 };
64
+ }
65
+ let tries = 0;
66
+ for (let i = 0; i < attempts; i++) {
67
+ log(`[perimeterx] press-&-hold challenge detected${status.hardened ? " (hardened iframe variant)" : ""}` +
68
+ ` — solving (attempt ${i + 1}/${attempts})`);
69
+ tries++;
70
+ const pressed = await pressAndHold(page, holdMs);
71
+ if (!pressed)
72
+ break;
73
+ await REL(3000); // let the page reload/settle after release
74
+ status = await detectChallenge(page);
75
+ if (status.kind === "none") {
76
+ log("[perimeterx] cleared.");
77
+ return { ...status, cleared: true, tries };
78
+ }
79
+ if (status.kind === "hard-block") {
80
+ log(`[perimeterx] escalated to hard block (reference ${status.referenceId ?? "?"}).`);
81
+ return { ...status, cleared: false, tries };
82
+ }
83
+ }
84
+ if (status.hardened) {
85
+ log("[perimeterx] hardened HUMAN press-&-hold rejected the synthetic hold. " +
86
+ "Fall back to waitForManualSolve() — a real human hold in the attached browser " +
87
+ "sets the _px3/_pxhd clearance cookies for the session.");
88
+ }
89
+ return { ...status, cleared: false, tries };
90
+ }
91
+ /**
92
+ * Fallback for hardened HUMAN challenges that reject synthetic holds: poll until
93
+ * the page is clear because a human completed the press-&-hold in the attached
94
+ * browser (or the cookie otherwise resolved). Returns true once cleared.
95
+ */
96
+ export async function waitForManualSolve(page, opts = {}) {
97
+ const timeoutMs = opts.timeoutMs ?? 120_000;
98
+ const pollMs = opts.pollMs ?? 1500;
99
+ const log = opts.log ?? (() => { });
100
+ log("[perimeterx] waiting for a human to complete the press-&-hold in the browser...");
101
+ const start = Date.now();
102
+ while (Date.now() - start < timeoutMs) {
103
+ const status = await detectChallenge(page);
104
+ if (status.kind === "none") {
105
+ log("[perimeterx] cleared by human.");
106
+ return true;
107
+ }
108
+ await REL(pollMs);
109
+ }
110
+ log("[perimeterx] timed out waiting for the human hold.");
111
+ return false;
112
+ }