website-api 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -62,6 +62,11 @@ function printWebsiteHelp(adapter) {
62
62
  type: "boolean",
63
63
  description: "Show the managed Chrome window (default headless; reuses an already-open session)",
64
64
  },
65
+ {
66
+ name: "proxy",
67
+ type: "string",
68
+ description: 'Route the managed browser through a proxy: "default", a port, host:port, or scheme://host:port',
69
+ },
65
70
  { name: "help", type: "boolean", description: "Show help for this website site", short: "h" },
66
71
  ];
67
72
  for (const param of allParams) {
@@ -8,6 +8,14 @@ export interface BrowserOptions {
8
8
  cdpEndpoint?: string;
9
9
  /** Launch the managed browser headless. Ignored when `cdpEndpoint` is set. */
10
10
  headless?: boolean;
11
+ /**
12
+ * Route the managed browser through a proxy. `true` / "default" → the default
13
+ * SOCKS5 proxy (socks5://127.0.0.1:1080); a port ("1080"), "host:port", or
14
+ * full "scheme://host:port" is accepted. Forwarded to chrome-cdp-manager's
15
+ * `launch({ proxy })`, so it applies only on a fresh launch (an already-running
16
+ * browser or an explicit `cdpEndpoint` is used as-is). Ignored when falsy.
17
+ */
18
+ proxy?: string | boolean;
11
19
  /** Close a tab opened by this session on dispose. Defaults to true. */
12
20
  close?: boolean;
13
21
  debug?: boolean;
@@ -31,20 +31,34 @@ async function loadCdpManager() {
31
31
  * `CDP_ENDPOINT`) wins and is used as-is, so users can still point at a Chrome
32
32
  * they manage themselves. Otherwise chrome-cdp-manager ensures a managed
33
33
  * browser is running (launching it if needed) and returns its endpoint.
34
+ *
35
+ * When a proxy is requested, chrome-cdp-manager probes it: an unreachable proxy
36
+ * is dropped and the browser launches direct (we surface a "connecting directly"
37
+ * notice). A reachable proxy forces a fresh launch so it actually takes effect.
34
38
  */
35
39
  async function resolveEndpoint(options) {
36
40
  const explicit = options.cdpEndpoint || process.env.CDP_ENDPOINT;
37
41
  if (explicit)
38
- return explicit;
42
+ return { endpoint: explicit, managed: false, proxyApplied: false, cdpPort: 0 };
39
43
  const { launch } = await loadCdpManager();
40
- const { endpoint, launched } = await launch({ headless: !!options.headless });
44
+ const { endpoint, launched, config, proxyRequested, proxyReachable } = (await launch({
45
+ headless: !!options.headless,
46
+ proxy: options.proxy || undefined,
47
+ }));
48
+ const proxyApplied = !!config?.proxy;
49
+ // Situation 1: a proxy was asked for but nothing was listening — we fell back
50
+ // to a direct connection. Surface it (stderr, so JSON stdout stays clean).
51
+ if (proxyRequested && !proxyReachable) {
52
+ console.error(`Proxy ${options.proxy} not reachable — connecting directly (no proxy).`);
53
+ }
41
54
  if (options.debug) {
42
55
  const mode = options.headless ? "headless" : "headed";
56
+ const via = proxyApplied ? ` via proxy ${config.proxy}` : "";
43
57
  console.log(launched
44
- ? `Launched managed Chrome (${mode}) at ${endpoint}`
45
- : `Attached to managed Chrome at ${endpoint}`);
58
+ ? `Launched managed Chrome (${mode})${via} at ${endpoint}`
59
+ : `Attached to managed Chrome at ${endpoint} (already running)`);
46
60
  }
47
- return endpoint;
61
+ return { endpoint, managed: true, proxyApplied, cdpPort: config?.cdpPort ?? 9222 };
48
62
  }
49
63
  /**
50
64
  * Connects to an existing Chrome over CDP and reuses (or opens) a tab for the
@@ -53,7 +67,7 @@ async function resolveEndpoint(options) {
53
67
  */
54
68
  export const connectChrome = async (targetUrl, options = {}) => {
55
69
  const debug = !!options.debug;
56
- const endpoint = await resolveEndpoint(options);
70
+ const { endpoint, managed, proxyApplied, cdpPort } = await resolveEndpoint(options);
57
71
  const chromium = await loadChromium();
58
72
  const browser = await chromium.connectOverCDP(endpoint);
59
73
  const context = browser.contexts()[0];
@@ -79,9 +93,33 @@ export const connectChrome = async (targetUrl, options = {}) => {
79
93
  if (debug)
80
94
  console.log(`Opening a new tab for ${targetUrl}`);
81
95
  page = await context.newPage();
82
- await page.goto(targetUrl, { waitUntil: "domcontentloaded" });
83
96
  opened = true;
84
97
  }
98
+ // Headless Chrome leaks "HeadlessChrome" in its network User-Agent header (the
99
+ // JS-level fingerprint init script only changes navigator.userAgent, not the
100
+ // request header). Some sites — e.g. Micro Center — serve a blank/blocked page
101
+ // to it, so the product grid never appears. Mirror what
102
+ // chrome-cdp-manager/playwright's connect() does and override the network UA to
103
+ // a de-headlessed value, before any navigation.
104
+ if (options.headless) {
105
+ try {
106
+ const liveUa = await page.evaluate(() => navigator.userAgent);
107
+ const ua = liveUa.replace(/HeadlessChrome/g, "Chrome");
108
+ if (ua && ua !== liveUa) {
109
+ await context.setExtraHTTPHeaders({ "user-agent": ua });
110
+ const cdp = await context.newCDPSession(page);
111
+ await cdp.send("Network.setUserAgentOverride", { userAgent: ua });
112
+ if (debug)
113
+ console.log(`De-headlessed network User-Agent → ${ua}`);
114
+ }
115
+ }
116
+ catch {
117
+ // best-effort; ignore
118
+ }
119
+ }
120
+ if (opened) {
121
+ await page.goto(targetUrl, { waitUntil: "domcontentloaded" });
122
+ }
85
123
  return {
86
124
  page,
87
125
  browser,
@@ -101,6 +139,20 @@ export const connectChrome = async (targetUrl, options = {}) => {
101
139
  catch {
102
140
  // ignore
103
141
  }
142
+ // Situation 3: a proxy only takes effect at launch, so a proxied run owns
143
+ // an ephemeral browser — fully stop it once the request completes, leaving
144
+ // a clean slate for the next (possibly differently-proxied) run.
145
+ if (managed && proxyApplied) {
146
+ try {
147
+ const { closeBrowser } = await loadCdpManager();
148
+ await closeBrowser(cdpPort);
149
+ if (debug)
150
+ console.log(`Stopped managed Chrome on :${cdpPort} (proxied run complete)`);
151
+ }
152
+ catch {
153
+ // ignore
154
+ }
155
+ }
104
156
  },
105
157
  };
106
158
  };
@@ -74,6 +74,7 @@ export function createContext(site, options = {}, providers = {}) {
74
74
  session = await connect(site.landingUrl, {
75
75
  cdpEndpoint: env.CDP_ENDPOINT,
76
76
  headless,
77
+ proxy: options.proxy ?? env.CDP_PROXY,
77
78
  close: shouldClose,
78
79
  debug,
79
80
  });
@@ -4,7 +4,7 @@ info:
4
4
  title: Bloomberg Billionaires Index
5
5
  description: Extracts the full Bloomberg Billionaires Index (window.top500) via the browser,
6
6
  auto-solving the PerimeterX challenge.
7
- version: 1.1.3
7
+ version: 1.1.4
8
8
  servers:
9
9
  - url: https://bloomberg.com
10
10
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Chase Bank
5
5
  description: Logs into Chase, lists downloadable accounts, and downloads statement/transaction CSV files.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://chase.com
9
9
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: ChatGPT / Codex Usage
5
5
  description: Fetches ChatGPT rate limit usage and quota details from the private wham/usage API.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://chatgpt.com
9
9
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Claude Organizations
5
5
  description: Lists the Claude organizations available to the logged-in browser session.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://claude.ai
9
9
  paths:
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Cursor Usage
5
5
  description: Fetches the active Cursor usage summary from the private usage-summary API.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://cursor.com
9
9
  paths:
@@ -4,7 +4,7 @@ info:
4
4
  title: E-ZPass New York
5
5
  description: Fetches E-ZPass NY toll/payment history, lists account statements, and downloads
6
6
  statement PDFs (browser transport, logs in fresh each run via your saved Chrome password).
7
- version: 1.1.3
7
+ version: 1.1.4
8
8
  servers:
9
9
  - url: https://e-zpassny.com
10
10
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Gemini Usage
5
5
  description: Fetches Gemini account usage/quota details directly over HTTP (no browser).
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://gemini.google.com
9
9
  paths:
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Google AI Overview
5
5
  description: Fetches Google's AI Overview and AI Mode answers using browser-attached Playwright.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://google.com
9
9
  - url: https://www.google.com
@@ -0,0 +1,44 @@
1
+ # Generated by `pnpm generate:openapi` — do not edit by hand.
2
+ openapi: 3.1.0
3
+ info:
4
+ title: Micro Center
5
+ description: Scrape Micro Center Apple search results — Mac desktops and/or MacBooks — into
6
+ structured JSON (browser transport, parses with cheerio). Defaults to both categories; pick one
7
+ with --mac / --macbook.
8
+ version: 1.1.4
9
+ servers:
10
+ - url: https://microcenter.com
11
+ paths: {}
12
+ components:
13
+ securitySchemes:
14
+ chromeSession:
15
+ type: apiKey
16
+ in: cookie
17
+ name: session
18
+ description: "Authenticated via the user's real Chrome session: website-api injects decrypted Chrome
19
+ cookies for microcenter.com into every request."
20
+ x-website-api:
21
+ id: microcenter
22
+ domain: microcenter.com
23
+ cookieDomain: microcenter.com
24
+ transport: browser
25
+ cookies: optional
26
+ requiresLogin: false
27
+ imperative: true
28
+ cli:
29
+ command: website-api microcenter
30
+ positionals: []
31
+ parameters:
32
+ - flag: --mac
33
+ type: boolean
34
+ description: Scrape Mac desktops (mini, Studio, Pro, iMac)
35
+ required: false
36
+ - flag: --macbook
37
+ type: boolean
38
+ description: Scrape MacBooks (Air, Pro)
39
+ required: false
40
+ - flag: --store
41
+ type: string
42
+ description: Micro Center store id for price/stock (default 075)
43
+ default: "075"
44
+ required: false
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Ollama Usage
5
5
  description: Fetches Ollama plan and usage details from the authenticated settings page.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://ollama.com
9
9
  paths:
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Perplexity AI Ask
5
5
  description: Fetches live streaming answers from Perplexity AI using its private REST/SSE API.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://perplexity.ai
9
9
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: PSEG Usage
5
5
  description: Downloads PSEG Smart Energy usage data (CSV) or lists available properties.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://mysmartenergy.nj.pseg.com
9
9
  paths: {}
@@ -3,7 +3,7 @@ openapi: 3.1.0
3
3
  info:
4
4
  title: Google Voice
5
5
  description: List threads and read conversations over HTTP (no browser); send SMS via the attached browser.
6
- version: 1.1.3
6
+ version: 1.1.4
7
7
  servers:
8
8
  - url: https://voice.google.com
9
9
  paths:
@@ -4,7 +4,7 @@ info:
4
4
  title: Zillow
5
5
  description: Search Zillow listings, list trending homes, and resolve listing URLs (browser
6
6
  transport, auto-handles the PerimeterX challenge).
7
- version: 1.1.3
7
+ version: 1.1.4
8
8
  servers:
9
9
  - url: https://zillow.com
10
10
  paths: {}
@@ -21,6 +21,11 @@ const GLOBAL_PARAMETERS = [
21
21
  type: "boolean",
22
22
  description: "Show the managed Chrome window (default headless; reuses an already-open session)",
23
23
  },
24
+ {
25
+ name: "proxy",
26
+ type: "string",
27
+ description: 'Route the managed browser through a proxy: "default" (socks5://127.0.0.1:1080), a port, host:port, or scheme://host:port (applies on a fresh launch)',
28
+ },
24
29
  {
25
30
  name: "out",
26
31
  type: "string",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "website-api",
3
- "version": "1.1.4",
3
+ "version": "1.1.6",
4
4
  "description": "CLI and library to query website private APIs with your real logged-in Chrome session",
5
5
  "main": "./dist/src/website-api.js",
6
6
  "types": "./dist/src/website-api.d.ts",
@@ -48,8 +48,9 @@
48
48
  },
49
49
  "dependencies": {
50
50
  "chalk": "^5.6.2",
51
- "chrome-cdp-manager": "link:../chrome-cdp-manager",
52
- "chrome-tools": "link:../chrome_tools",
51
+ "cheerio": "^1.2.0",
52
+ "chrome-cdp-manager": "^1.2.9",
53
+ "chrome-tools": "^1.1.5",
53
54
  "cli-table3": "^0.6.5",
54
55
  "commander": "^14.0.3"
55
56
  },