website-api 1.1.3 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +209 -1
  3. package/dist/src/capabilities/browser.d.ts +16 -2
  4. package/dist/src/capabilities/browser.js +158 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +138 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
  32. package/dist/src/sites/claude.ai/index.js +42 -1
  33. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  34. package/dist/src/sites/cursor.com/index.js +12 -1
  35. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  36. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  37. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  38. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  39. package/dist/src/sites/gemini.google.com/index.js +80 -1
  40. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  41. package/dist/src/sites/google.com/google-helpers.js +255 -1
  42. package/dist/src/sites/google.com/index.js +253 -1
  43. package/dist/src/sites/google.com/openapi.yaml +59 -0
  44. package/dist/src/sites/microcenter.com/openapi.yaml +44 -0
  45. package/dist/src/sites/ollama.com/index.js +43 -1
  46. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  47. package/dist/src/sites/perplexity.ai/index.js +253 -1
  48. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  49. package/dist/src/sites/pseg.com/index.js +243 -1
  50. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  51. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  52. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  53. package/dist/src/sites/voice.google.com/index.js +122 -0
  54. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  55. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  56. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  57. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  58. package/dist/src/sites/zillow.com/index.js +303 -0
  59. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  60. package/dist/src/types.d.ts +7 -0
  61. package/dist/src/types.js +1 -1
  62. package/dist/src/util/args-parser.js +150 -1
  63. package/dist/src/util/google-json.js +74 -1
  64. package/dist/src/website-api.d.ts +7 -7
  65. package/dist/src/website-api.js +13 -1
  66. package/package.json +38 -10
package/README.md CHANGED
@@ -1,3 +1,143 @@
1
1
  # website-api
2
2
 
3
- CLI and library to fetch website API data
3
+ Query websites' private APIs with your **real logged-in Chrome session** — as a CLI or a Node.js library.
4
+
5
+ One site definition describes *what* to fetch; the runtime assembles *how*: plain HTTP with your decrypted
6
+ Chrome cookies injected, or a real fingerprinted Chrome tab over CDP, with login, downloads, and in-page
7
+ scripts available as composable capabilities. See [DESIGN.md](DESIGN.md) for the architecture.
8
+
9
+ > macOS-focused: cookie/credential decryption uses [chrome-tools](https://www.npmjs.com/package/chrome-tools),
10
+ > which reads Chrome's local encrypted storage via the macOS keychain.
11
+
12
+ ## Install
13
+
14
+ ```sh
15
+ npm install -g website-api # CLI
16
+ npm install website-api # library
17
+ ```
18
+
19
+ Requires Node ≥ 22. `playwright-core` is an **optional** dependency — HTTP-only sites work without it;
20
+ browser-transport sites (`[p]` in `list`) will tell you to install it if it's missing.
21
+
22
+ Browser-transport sites no longer need you to start Chrome by hand:
23
+ [chrome-cdp-manager](https://www.npmjs.com/package/chrome-cdp-manager) launches (or attaches to) a
24
+ dedicated, isolated CDP browser automatically on first use. It runs **headless by default**, but if a
25
+ CDP session is already open it reuses that one as-is. Pass `--headed` to force a visible window (e.g. to
26
+ solve a captcha that needs a real press-and-hold). To attach to a Chrome you manage yourself, set
27
+ `CDP_ENDPOINT` (e.g. `http://localhost:9222`) and that endpoint is used directly.
28
+
29
+ ## CLI quickstart
30
+
31
+ ```sh
32
+ website-api list # all bundled + installed sites
33
+ website-api codex-usage # ChatGPT/Codex usage via your session
34
+ website-api perplexity "what is pnpm?" # positional args
35
+ website-api claude-usage --org my-org # site-specific flags
36
+ website-api chatgpt.com --help # per-site help
37
+ website-api example.com # no definition? universal cookie-aware GET
38
+ ```
39
+
40
+ Useful global flags: `--profile <name>` (Chrome profile), `--debug` (full request/response dump),
41
+ `--keep-open` (leave the browser tab open), `--headed` (show the managed Chrome window; default headless),
42
+ `--out <file>`.
43
+
44
+ ### Installing more sites
45
+
46
+ Sites can be installed from a public registry (a GitHub repo of prebuilt site modules):
47
+
48
+ ```sh
49
+ website-api ext search zillow
50
+ website-api ext install zillow
51
+ website-api ext list / remove / update
52
+ ```
53
+
54
+ **Security note:** an installed site is code that runs with access to your Chrome cookies for its domain.
55
+ Installs are SHA256-verified against the registry catalog and require confirmation — only install sites
56
+ from registries you trust.
57
+
58
+ ## Library usage
59
+
60
+ Everything the CLI does is available programmatically:
61
+
62
+ ```ts
63
+ import { queryWebsite } from "website-api";
64
+
65
+ // By site id — same resolution as the CLI
66
+ const usage = await queryWebsite("codex-usage", { profile: "Default" });
67
+ ```
68
+
69
+ Import a bundled site directly and run it — handy for other packages that want
70
+ one site's features without the registry:
71
+
72
+ ```ts
73
+ import { runSite } from "website-api";
74
+ import zillow from "website-api/sites/zillow.com";
75
+
76
+ const homes = await runSite(zillow, { query: "Seattle, WA" });
77
+ ```
78
+
79
+ Or bring your own definition — `runSite` accepts a plain object:
80
+
81
+ ```ts
82
+ import { runSite } from "website-api";
83
+
84
+ const result = await runSite({
85
+ id: "example",
86
+ name: "Example",
87
+ domain: "example.com",
88
+ description: "JSON endpoint with my session cookies",
89
+ endpoints: [{ url: "https://example.com/api/me" }],
90
+ });
91
+ ```
92
+
93
+ Tests and embedders can inject fakes for every capability (fetch, browser, cookie store, fs) via the
94
+ third `providers` argument — see `ContextProviders`.
95
+
96
+ ## Writing your own site
97
+
98
+ Drop a folder in `~/.config/website-api/extensions/` — no imports, no build step:
99
+
100
+ ```js
101
+ // ~/.config/website-api/extensions/example.com/index.mjs
102
+ export default {
103
+ id: "example",
104
+ name: "Example",
105
+ domain: "example.com",
106
+ description: "Example data",
107
+ endpoints: [{ url: "https://example.com/api/data" }],
108
+ };
109
+ ```
110
+
111
+ It shows up in `website-api list` immediately (marked `[x]`). Sites needing a real browser set
112
+ `transport: "browser"` and use `ctx.browser()` / `ctx.eval()` inside a `run(ctx)` function; login flows,
113
+ downloads, and SSE parsing are provided by the context. Develop iteratively with
114
+ `website-api ext test ./my-site` (runs a local file without installing). Full authoring guide:
115
+ [DESIGN.md](DESIGN.md).
116
+
117
+ ## OpenAPI specs
118
+
119
+ Every bundled site ships a generated `openapi.yaml` next to its module (`dist/src/sites/<site>/openapi.yaml`
120
+ in the published package) describing its endpoints and CLI surface, including the `x-website-api` extension
121
+ block. Regenerate with `pnpm generate:openapi` after a build.
122
+
123
+ ## Security model
124
+
125
+ - Cookies and credentials are read from Chrome's local encrypted storage and **only sent to the target
126
+ site's own domain**. They are never written to disk or sent anywhere else.
127
+ - `--debug` prints raw requests/responses (including cookie headers) to your terminal — don't paste that
128
+ output into bug reports.
129
+ - Registry installs run third-party code; they are integrity-checked (SHA256) and gated behind an explicit
130
+ confirmation that names the source repo.
131
+
132
+ ## Development
133
+
134
+ ```sh
135
+ pnpm install # local dev links chrome-tools from ../chrome_tools (pnpm-workspace.yaml)
136
+ pnpm build # tsc → dist (readable, unminified)
137
+ pnpm test # node:test — offline, no Chrome needed
138
+ pnpm lint # biome
139
+ pnpm typecheck
140
+ pnpm generate:openapi
141
+ ```
142
+
143
+ MIT © guocity
package/dist/bin/cli.js CHANGED
@@ -1,2 +1,210 @@
1
1
  #!/usr/bin/env node
2
- import{readFileSync as e,writeFileSync as o}from"node:fs";import{dirname as n,join as s}from"node:path";import{fileURLToPath as t}from"node:url";import{program as r}from"commander";import i from"chalk";import l from"cli-table3";import{getDefaultChromeDir as a}from"chrome-tools";import{queryWebsite as c,sites as p,loadSites as d,getSite as u}from"../src/website-api.js";import{registerExtCommands as g}from"../src/cli/ext.js";import{parseArgsForWebsite as f}from"../src/util/args-parser.js";const m=s(n(t(import.meta.url)),"..","..","package.json"),{version:b}=JSON.parse(e(m,"utf8"));process.on("unhandledRejection",e=>{console.error(e instanceof Error?e.message:"command not found"),process.exit(1)}),async function(){const e=process.argv.slice(2),n=e.find(e=>!e.startsWith("-"));if(n&&"list"!==n){await d();const s=u(n);if(s){const t=e.filter((o,s)=>s!==e.indexOf(n));let r;try{r=f(s.positionals,s.parameters,t)}catch(e){console.error(i.red(`Error: ${e instanceof Error?e.message:String(e)}`)),console.log(`Run ${i.cyan(`npx website-api ${s.id} --help`)} for usage details.`),process.exit(1)}if(r.helpRequested)return void function(e){console.log(i.bold.green(`\n🌐 Website API: ${i.white(e.name)} (${i.yellow(e.id)})\n`)),console.log(` ${i.italic(e.description)}\n`);let o=`npx website-api ${e.id}`;if(e.positionals&&e.positionals.length>0)for(const n of e.positionals)o+=n.required?` <${n.name}>`:` [${n.name}]`;if(o+=" [options]",console.log(`${i.bold("Usage:")} ${i.cyan(o)}\n`),e.positionals&&e.positionals.length>0){console.log(i.bold("Positional Arguments:"));for(const o of e.positionals)console.log(` ${i.cyan(o.name.padEnd(15))} ${o.description}`);console.log()}console.log(i.bold("Options:"));const n=[...e.parameters||[],{name:"profile",type:"string",description:"specific Chrome profile directory (e.g., 'Default')"},{name:"user-agent",type:"string",description:"custom User-Agent header for HTTP requests",short:"u"},{name:"debug",type:"boolean",description:"Print full HTTP request and response bodies for debugging"},{name:"keep-open",type:"boolean",description:"Leave the browser tab open after running (preserve the logged-in session)"},{name:"help",type:"boolean",description:"Show help for this website site",short:"h"}];for(const e of n){let o=`--${e.name}`;"boolean"!==e.type&&(o+=" <value>"),o=e.short?`-${e.short}, ${o}`:` ${o}`;const n=void 0!==e.default?` (default: ${e.default})`:"";console.log(` ${i.yellow(o.padEnd(28))} ${e.description}${i.gray(n)}`)}console.log()}(s);try{const e=await c(s.id,r.options);let n;if(r.options.text&&e&&"object"==typeof e){const o=e.answer||e.text;n=void 0!==o?String(o):JSON.stringify(e,null,2)}else n="string"==typeof e?e:JSON.stringify(e,null,2);r.options.out?(o(r.options.out,n+"\n","utf8"),console.log(i.green(`Success! Decoded response written to ${r.options.out}`))):console.log(n)}catch(e){console.error(i.red(e instanceof Error?e.message:"command not found")),process.exit(1)}return}}r.name("website-api").description("CLI to query website APIs using decrypted Chrome cookies on macOS").version(b),r.option("--profile <name>","specific Chrome profile directory (e.g., 'Default', 'Profile 1')").option("--current-profile","Show the currently resolved/selected Chrome profile directory and name").option("-u, --user-agent <string>","custom User-Agent header for HTTP requests"),r.option("--debug","Print full HTTP request and response bodies for debugging"),r.command("list").description("List all supported website API sites").action(async()=>{await d(),console.log(i.bold.green("\n🌐 Supported Website APIs:\n"));const e=new l({head:[i.bold.cyan("ID"),i.bold.cyan("Name"),i.bold.cyan("Domain"),i.bold.cyan("Description")],colWidths:[18,25,20,50],wordWrap:!0,style:{head:[],border:[]}});for(const o of p){const n=[];"browser"===o.transport&&n.push(i.magenta("[p]")),o.auth&&n.push(i.red("[l]")),"extension"===o.origin&&n.push(i.blue("[x]"));const s=n.length?`${o.name} ${n.join(" ")}`:o.name;e.push([i.yellow(o.id),s,i.underline(o.domain),o.description])}console.log(e.toString()),console.log(`\n${i.magenta("[p]")} requires a running Chrome (Playwright) ${i.red("[l]")} requires login ${i.blue("[x]")} user extension`),console.log(`\nTo run an API query, execute: ${i.bold.cyan("npx website-api <id>")}\n`)}),g(r),r.argument("[website]","website ID or domain to query (e.g. 'chatgpt.com')").action(async e=>{const o=r.opts();if(o.currentProfile){const e=process.env.PROFILE_PATH||process.env.CHROME_PROFILE_PATH||a(),n=o.profile||process.env.PROFILE_NAME||"Default";return console.log(i.bold.green("\n👤 Currently Resolved Profile:\n")),console.log(` ${i.bold("Path:")} ${e}`),void console.log(` ${i.bold("Name:")} ${n}\n`)}e?(console.error(i.red(`Error: website adapter "${e}" not found.`)),console.log(`Run ${i.cyan("npx website-api list")} to see all supported adapters.`),process.exit(1)):r.outputHelp()}),r.parse(process.argv)}().catch(e=>{console.error(e instanceof Error?e.message:"command not found"),process.exit(1)});
2
+ import { readFileSync, writeFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import chalk from "chalk";
6
+ import { getDefaultChromeDir } from "chrome-tools";
7
+ import Table from "cli-table3";
8
+ import { program } from "commander";
9
+ import { registerExtCommands } from "../src/cli/ext.js";
10
+ import { parseArgsForWebsite } from "../src/util/args-parser.js";
11
+ import { getSite, loadSites, queryWebsite, sites } from "../src/website-api.js";
12
+ const packageJsonPath = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "package.json");
13
+ const { version: packageVersion } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
14
+ // Handle unhandled promise rejections cleanly
15
+ process.on("unhandledRejection", (reason) => {
16
+ console.error(reason instanceof Error ? reason.message : "command not found");
17
+ process.exit(1);
18
+ });
19
+ /**
20
+ * Renders a premium, comprehensive help page for a specific website adapter.
21
+ */
22
+ function printWebsiteHelp(adapter) {
23
+ console.log(chalk.bold.green(`\n🌐 Website API: ${chalk.white(adapter.name)} (${chalk.yellow(adapter.id)})\n`));
24
+ console.log(` ${chalk.italic(adapter.description)}\n`);
25
+ let usageStr = `npx website-api ${adapter.id}`;
26
+ if (adapter.positionals && adapter.positionals.length > 0) {
27
+ for (const pos of adapter.positionals) {
28
+ usageStr += pos.required ? ` <${pos.name}>` : ` [${pos.name}]`;
29
+ }
30
+ }
31
+ usageStr += " [options]";
32
+ console.log(`${chalk.bold("Usage:")} ${chalk.cyan(usageStr)}\n`);
33
+ if (adapter.positionals && adapter.positionals.length > 0) {
34
+ console.log(chalk.bold("Positional Arguments:"));
35
+ for (const pos of adapter.positionals) {
36
+ console.log(` ${chalk.cyan(pos.name.padEnd(15))} ${pos.description}`);
37
+ }
38
+ console.log();
39
+ }
40
+ console.log(chalk.bold("Options:"));
41
+ const allParams = [
42
+ ...(adapter.parameters || []),
43
+ { name: "profile", type: "string", description: "specific Chrome profile directory (e.g., 'Default')" },
44
+ {
45
+ name: "user-agent",
46
+ type: "string",
47
+ description: "custom User-Agent header for HTTP requests",
48
+ short: "u",
49
+ },
50
+ {
51
+ name: "debug",
52
+ type: "boolean",
53
+ description: "Print full HTTP request and response bodies for debugging",
54
+ },
55
+ {
56
+ name: "keep-open",
57
+ type: "boolean",
58
+ description: "Leave the browser tab open after running (preserve the logged-in session)",
59
+ },
60
+ {
61
+ name: "headed",
62
+ type: "boolean",
63
+ description: "Show the managed Chrome window (default headless; reuses an already-open session)",
64
+ },
65
+ {
66
+ name: "proxy",
67
+ type: "string",
68
+ description: 'Route the managed browser through a proxy: "default", a port, host:port, or scheme://host:port',
69
+ },
70
+ { name: "help", type: "boolean", description: "Show help for this website site", short: "h" },
71
+ ];
72
+ for (const param of allParams) {
73
+ let flag = `--${param.name}`;
74
+ if (param.type !== "boolean") {
75
+ flag += ` <value>`;
76
+ }
77
+ if (param.short) {
78
+ flag = `-${param.short}, ${flag}`;
79
+ }
80
+ else {
81
+ flag = ` ${flag}`;
82
+ }
83
+ const defStr = param.default !== undefined ? ` (default: ${param.default})` : "";
84
+ console.log(` ${chalk.yellow(flag.padEnd(28))} ${param.description}${chalk.gray(defStr)}`);
85
+ }
86
+ console.log();
87
+ }
88
+ async function runCli() {
89
+ const argv = process.argv.slice(2);
90
+ // Check if first positional is a website adapter (avoiding commands and global flags)
91
+ const firstPositional = argv.find((arg) => !arg.startsWith("-"));
92
+ if (firstPositional && firstPositional !== "list") {
93
+ await loadSites();
94
+ const adapter = getSite(firstPositional);
95
+ if (adapter) {
96
+ // Bypasses standard commander parser to allow website-specific options
97
+ const websiteArgs = argv.filter((_, i) => i !== argv.indexOf(firstPositional));
98
+ let parsed;
99
+ try {
100
+ parsed = parseArgsForWebsite(adapter.positionals, adapter.parameters, websiteArgs);
101
+ }
102
+ catch (err) {
103
+ console.error(chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`));
104
+ console.log(`Run ${chalk.cyan(`npx website-api ${adapter.id} --help`)} for usage details.`);
105
+ process.exit(1);
106
+ }
107
+ if (parsed.helpRequested) {
108
+ printWebsiteHelp(adapter);
109
+ return;
110
+ }
111
+ try {
112
+ const data = await queryWebsite(adapter.id, parsed.options);
113
+ let output;
114
+ if (parsed.options.text && data && typeof data === "object") {
115
+ const ans = data.answer || data.text;
116
+ output = ans !== undefined ? String(ans) : JSON.stringify(data, null, 2);
117
+ }
118
+ else {
119
+ output = typeof data === "string" ? data : JSON.stringify(data, null, 2);
120
+ }
121
+ if (parsed.options.out) {
122
+ writeFileSync(parsed.options.out, output + "\n", "utf8");
123
+ console.log(chalk.green(`Success! Decoded response written to ${parsed.options.out}`));
124
+ }
125
+ else {
126
+ console.log(output);
127
+ }
128
+ }
129
+ catch (err) {
130
+ console.error(chalk.red(err instanceof Error ? err.message : "command not found"));
131
+ process.exit(1);
132
+ }
133
+ return;
134
+ }
135
+ }
136
+ // Fallback to Commander for global commands and options
137
+ program
138
+ .name("website-api")
139
+ .description("CLI to query website APIs using decrypted Chrome cookies on macOS")
140
+ .version(packageVersion);
141
+ // Global options
142
+ program
143
+ .option("--profile <name>", "specific Chrome profile directory (e.g., 'Default', 'Profile 1')")
144
+ .option("--current-profile", "Show the currently resolved/selected Chrome profile directory and name")
145
+ .option("-u, --user-agent <string>", "custom User-Agent header for HTTP requests");
146
+ program.option("--debug", "Print full HTTP request and response bodies for debugging");
147
+ program.option("--headed", "Show the managed Chrome window (default headless; reuses an already-open session)");
148
+ // List command
149
+ program
150
+ .command("list")
151
+ .description("List all supported website API sites")
152
+ .action(async () => {
153
+ await loadSites();
154
+ console.log(chalk.bold.green("\n🌐 Supported Website APIs:\n"));
155
+ const table = new Table({
156
+ head: [
157
+ chalk.bold.cyan("ID"),
158
+ chalk.bold.cyan("Name"),
159
+ chalk.bold.cyan("Domain"),
160
+ chalk.bold.cyan("Description"),
161
+ ],
162
+ colWidths: [18, 25, 20, 50],
163
+ wordWrap: true,
164
+ style: { head: [], border: [] },
165
+ });
166
+ for (const web of sites) {
167
+ const markers = [];
168
+ if (web.transport === "browser")
169
+ markers.push(chalk.magenta("[p]"));
170
+ if (web.auth)
171
+ markers.push(chalk.red("[l]"));
172
+ if (web.origin === "extension")
173
+ markers.push(chalk.blue("[x]"));
174
+ const nameCell = markers.length ? `${web.name} ${markers.join(" ")}` : web.name;
175
+ table.push([chalk.yellow(web.id), nameCell, chalk.underline(web.domain), web.description]);
176
+ }
177
+ console.log(table.toString());
178
+ console.log(`\n${chalk.magenta("[p]")} requires a running Chrome (Playwright) ${chalk.red("[l]")} requires login ${chalk.blue("[x]")} user extension`);
179
+ console.log(`\nTo run an API query, execute: ${chalk.bold.cyan("npx website-api <id>")}\n`);
180
+ });
181
+ // Extension registry commands: `ext search|info|install|list|remove|update|registry`
182
+ registerExtCommands(program);
183
+ // Default command: fallback error or help
184
+ program
185
+ .argument("[website]", "website ID or domain to query (e.g. 'chatgpt.com')")
186
+ .action(async (website) => {
187
+ const globalOpts = program.opts();
188
+ if (globalOpts.currentProfile) {
189
+ const profilePath = process.env.PROFILE_PATH || process.env.CHROME_PROFILE_PATH || getDefaultChromeDir();
190
+ const profileName = globalOpts.profile || process.env.PROFILE_NAME || "Default";
191
+ console.log(chalk.bold.green("\n👤 Currently Resolved Profile:\n"));
192
+ console.log(` ${chalk.bold("Path:")} ${profilePath}`);
193
+ console.log(` ${chalk.bold("Name:")} ${profileName}\n`);
194
+ return;
195
+ }
196
+ if (!website) {
197
+ program.outputHelp();
198
+ return;
199
+ }
200
+ // If website not found
201
+ console.error(chalk.red(`Error: website adapter "${website}" not found.`));
202
+ console.log(`Run ${chalk.cyan("npx website-api list")} to see all supported adapters.`);
203
+ process.exit(1);
204
+ });
205
+ program.parse(process.argv);
206
+ }
207
+ runCli().catch((err) => {
208
+ console.error(err instanceof Error ? err.message : "command not found");
209
+ process.exit(1);
210
+ });
@@ -1,7 +1,21 @@
1
- import { type Browser, type Page } from "playwright-core";
1
+ import type { Browser, Page } from "playwright-core";
2
2
  export interface BrowserOptions {
3
- /** CDP endpoint of a running Chrome. Defaults to env or localhost:9222. */
3
+ /**
4
+ * CDP endpoint of an already-running Chrome. When set (or via the
5
+ * `CDP_ENDPOINT` env var), we attach to it directly and skip launching. When
6
+ * unset, chrome-cdp-manager launches/attaches a managed browser for us.
7
+ */
4
8
  cdpEndpoint?: string;
9
+ /** Launch the managed browser headless. Ignored when `cdpEndpoint` is set. */
10
+ headless?: boolean;
11
+ /**
12
+ * Route the managed browser through a proxy. `true` / "default" → the default
13
+ * SOCKS5 proxy (socks5://127.0.0.1:1080); a port ("1080"), "host:port", or
14
+ * full "scheme://host:port" is accepted. Forwarded to chrome-cdp-manager's
15
+ * `launch({ proxy })`, so it applies only on a fresh launch (an already-running
16
+ * browser or an explicit `cdpEndpoint` is used as-is). Ignored when falsy.
17
+ */
18
+ proxy?: string | boolean;
5
19
  /** Close a tab opened by this session on dispose. Defaults to true. */
6
20
  close?: boolean;
7
21
  debug?: boolean;
@@ -1 +1,158 @@
1
- import{chromium as t}from"playwright-core";export const connectChrome=async(e,o={})=>{const n=o.cdpEndpoint||process.env.CDP_ENDPOINT||"http://localhost:9222",r=!!o.debug,a=await t.connectOverCDP(n),c=a.contexts()[0];if(!c)throw new Error("No active browser context found. Is Chrome running with remote debugging enabled?");let s=!1,i=c.pages().find(t=>{try{const o=new URL(e).hostname.replace("www.","");return new URL(t.url()).hostname.endsWith(o)||t.url().startsWith(e)}catch{return t.url().startsWith(e)}});return i?r&&console.log(`Reusing existing tab for ${e}`):(r&&console.log(`Opening a new tab for ${e}`),i=await c.newPage(),await i.goto(e,{waitUntil:"domcontentloaded"}),s=!0),{page:i,browser:a,opened:s,async dispose(){if(s&&!1!==o.close)try{await i.close()}catch{}try{await a.close()}catch{}}}};
1
+ /**
2
+ * playwright-core is an optional dependency: HTTP-only installs work without
3
+ * it, and it's loaded here on the first browser connection.
4
+ */
5
+ async function loadChromium() {
6
+ try {
7
+ return (await import("playwright-core")).chromium;
8
+ }
9
+ catch {
10
+ throw new Error('This site needs a browser, which requires the optional "playwright-core" dependency. ' +
11
+ "Install it with: npm install playwright-core");
12
+ }
13
+ }
14
+ /**
15
+ * chrome-cdp-manager launches (or attaches to) a launcher-managed CDP browser
16
+ * so the user never has to start Chrome with `--remote-debugging-port` by hand.
17
+ * Loaded lazily — HTTP-only runs never pay for it, and the os-restricted package
18
+ * surfaces a clear error if it's missing on an unsupported platform.
19
+ */
20
+ async function loadCdpManager() {
21
+ try {
22
+ return await import("chrome-cdp-manager");
23
+ }
24
+ catch {
25
+ throw new Error('This site needs a browser, which is managed by the "chrome-cdp-manager" dependency. ' +
26
+ "Install it with: npm install chrome-cdp-manager (macOS/Windows only).");
27
+ }
28
+ }
29
+ /**
30
+ * Resolves the CDP endpoint to connect to. An explicit endpoint (option or
31
+ * `CDP_ENDPOINT`) wins and is used as-is, so users can still point at a Chrome
32
+ * they manage themselves. Otherwise chrome-cdp-manager ensures a managed
33
+ * browser is running (launching it if needed) and returns its endpoint.
34
+ *
35
+ * When a proxy is requested, chrome-cdp-manager probes it: an unreachable proxy
36
+ * is dropped and the browser launches direct (we surface a "connecting directly"
37
+ * notice). A reachable proxy forces a fresh launch so it actually takes effect.
38
+ */
39
+ async function resolveEndpoint(options) {
40
+ const explicit = options.cdpEndpoint || process.env.CDP_ENDPOINT;
41
+ if (explicit)
42
+ return { endpoint: explicit, managed: false, proxyApplied: false, cdpPort: 0 };
43
+ const { launch } = await loadCdpManager();
44
+ const { endpoint, launched, config, proxyRequested, proxyReachable } = (await launch({
45
+ headless: !!options.headless,
46
+ proxy: options.proxy || undefined,
47
+ }));
48
+ const proxyApplied = !!config?.proxy;
49
+ // Situation 1: a proxy was asked for but nothing was listening — we fell back
50
+ // to a direct connection. Surface it (stderr, so JSON stdout stays clean).
51
+ if (proxyRequested && !proxyReachable) {
52
+ console.error(`Proxy ${options.proxy} not reachable — connecting directly (no proxy).`);
53
+ }
54
+ if (options.debug) {
55
+ const mode = options.headless ? "headless" : "headed";
56
+ const via = proxyApplied ? ` via proxy ${config.proxy}` : "";
57
+ console.log(launched
58
+ ? `Launched managed Chrome (${mode})${via} at ${endpoint}`
59
+ : `Attached to managed Chrome at ${endpoint} (already running)`);
60
+ }
61
+ return { endpoint, managed: true, proxyApplied, cdpPort: config?.cdpPort ?? 9222 };
62
+ }
63
+ /**
64
+ * Connects to an existing Chrome over CDP and reuses (or opens) a tab for the
65
+ * target URL. Returns a session with an explicit `dispose()` the runtime calls
66
+ * during teardown — sites never manage the connection themselves.
67
+ */
68
+ export const connectChrome = async (targetUrl, options = {}) => {
69
+ const debug = !!options.debug;
70
+ const { endpoint, managed, proxyApplied, cdpPort } = await resolveEndpoint(options);
71
+ const chromium = await loadChromium();
72
+ const browser = await chromium.connectOverCDP(endpoint);
73
+ const context = browser.contexts()[0];
74
+ if (!context) {
75
+ throw new Error("No active browser context found. Is Chrome running with remote debugging enabled?");
76
+ }
77
+ let opened = false;
78
+ let page = context.pages().find((p) => {
79
+ try {
80
+ const targetHost = new URL(targetUrl).hostname.replace("www.", "");
81
+ const pHost = new URL(p.url()).hostname;
82
+ return pHost.endsWith(targetHost) || p.url().startsWith(targetUrl);
83
+ }
84
+ catch {
85
+ return p.url().startsWith(targetUrl);
86
+ }
87
+ });
88
+ if (page) {
89
+ if (debug)
90
+ console.log(`Reusing existing tab for ${targetUrl}`);
91
+ }
92
+ else {
93
+ if (debug)
94
+ console.log(`Opening a new tab for ${targetUrl}`);
95
+ page = await context.newPage();
96
+ opened = true;
97
+ }
98
+ // Headless Chrome leaks "HeadlessChrome" in its network User-Agent header (the
99
+ // JS-level fingerprint init script only changes navigator.userAgent, not the
100
+ // request header). Some sites — e.g. Micro Center — serve a blank/blocked page
101
+ // to it, so the product grid never appears. Mirror what
102
+ // chrome-cdp-manager/playwright's connect() does and override the network UA to
103
+ // a de-headlessed value, before any navigation.
104
+ if (options.headless) {
105
+ try {
106
+ const liveUa = await page.evaluate(() => navigator.userAgent);
107
+ const ua = liveUa.replace(/HeadlessChrome/g, "Chrome");
108
+ if (ua && ua !== liveUa) {
109
+ await context.setExtraHTTPHeaders({ "user-agent": ua });
110
+ const cdp = await context.newCDPSession(page);
111
+ await cdp.send("Network.setUserAgentOverride", { userAgent: ua });
112
+ if (debug)
113
+ console.log(`De-headlessed network User-Agent → ${ua}`);
114
+ }
115
+ }
116
+ catch {
117
+ // best-effort; ignore
118
+ }
119
+ }
120
+ if (opened) {
121
+ await page.goto(targetUrl, { waitUntil: "domcontentloaded" });
122
+ }
123
+ return {
124
+ page,
125
+ browser,
126
+ opened,
127
+ async dispose() {
128
+ if (opened && options.close !== false) {
129
+ try {
130
+ await page.close();
131
+ }
132
+ catch {
133
+ // ignore
134
+ }
135
+ }
136
+ try {
137
+ await browser.close();
138
+ }
139
+ catch {
140
+ // ignore
141
+ }
142
+ // Situation 3: a proxy only takes effect at launch, so a proxied run owns
143
+ // an ephemeral browser — fully stop it once the request completes, leaving
144
+ // a clean slate for the next (possibly differently-proxied) run.
145
+ if (managed && proxyApplied) {
146
+ try {
147
+ const { closeBrowser } = await loadCdpManager();
148
+ await closeBrowser(cdpPort);
149
+ if (debug)
150
+ console.log(`Stopped managed Chrome on :${cdpPort} (proxied run complete)`);
151
+ }
152
+ catch {
153
+ // ignore
154
+ }
155
+ }
156
+ },
157
+ };
158
+ };
@@ -1,4 +1,4 @@
1
- import { getCookies as realGetCookies, getPasswords as realGetPasswords, type CookieEntry } from "chrome-tools";
1
+ import { type CookieEntry, getCookies as realGetCookies, getPasswords as realGetPasswords } from "chrome-tools";
2
2
  import type { Credentials, QueryOptions } from "../types.js";
3
3
  export declare const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
4
4
  /**
@@ -20,5 +20,11 @@ export declare function resolveCredentials(domain: string, options: QueryOptions
20
20
  /**
21
21
  * Resolves decrypted Chrome cookies for a domain. When `required` is false a
22
22
  * missing login yields an empty array instead of throwing.
23
+ *
24
+ * For required sites the two failure modes are reported distinctly:
25
+ * - `getCookies` throws → Chrome's cookie store couldn't be read at all
26
+ * (e.g. keychain access denied, missing/locked profile). → "No login found".
27
+ * - it returns no rows → the store was read fine but holds no cookies for the
28
+ * domain: the user simply isn't signed in there. → "No cookies found".
23
29
  */
24
30
  export declare function resolveCookies(domain: string, options: QueryOptions, required: boolean, providers?: CookieProviders): CookieEntry[];
@@ -1 +1,68 @@
1
- import{getCookies as e,getPasswords as r}from"chrome-tools";export const DEFAULT_USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";function o(e,r){return e.profilePath||r.PROFILE_PATH||r.CHROME_PROFILE_PATH||void 0}function n(e,r){return e.profile||r.PROFILE_NAME||void 0}export function resolveUserAgent(e,r=process.env){return e.userAgent||r.userAgent||r.USER_AGENT||DEFAULT_USER_AGENT}export function buildCookieString(e){return e.map(e=>`${e.name}=${e.value}`).join("; ")}export function resolveCredentials(e,t,s={}){const i=s.env??process.env,a=s.getPasswords??r,c=o(t,i),l=n(t,i);let p=a({chromeDir:c,profile:l,search:e});if(!p||0===p.length){const r=e.split(".");p=a({chromeDir:c,profile:l,search:r[r.length-2]||e})}if(!p||0===p.length)throw new Error(`No saved passwords found in Chrome for '${e}'`);const{username:u,password:f}=p[0];if(!u||!f)throw new Error(`Found credentials for '${e}' but username or password was empty`);return{username:u,password:f}}export function resolveCookies(r,t,s,i={}){const a=i.env??process.env,c=i.getCookies??e,l=o(t,a),p=n(t,a);let u=[];try{u=c({chromeDir:l,profile:p,domain:r,decrypt:!0})}catch{if(s)throw new Error("No login found in browser")}if((!u||0===u.length)&&s)throw new Error("No login found in browser");return u??[]}
1
+ import { getCookies as realGetCookies, getPasswords as realGetPasswords, } from "chrome-tools";
2
+ export const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
3
+ function resolveChromeDir(options, env) {
4
+ return options.profilePath || env.PROFILE_PATH || env.CHROME_PROFILE_PATH || undefined;
5
+ }
6
+ function resolveProfile(options, env) {
7
+ return options.profile || env.PROFILE_NAME || undefined;
8
+ }
9
+ export function resolveUserAgent(options, env = process.env) {
10
+ return options.userAgent || env.userAgent || env.USER_AGENT || DEFAULT_USER_AGENT;
11
+ }
12
+ export function buildCookieString(cookies) {
13
+ return cookies.map((c) => `${c.name}=${c.value}`).join("; ");
14
+ }
15
+ /**
16
+ * Resolves saved Chrome credentials for a domain. Searches the full domain
17
+ * first, then falls back to the registrable name (e.g. "pseg" from "pseg.com").
18
+ */
19
+ export function resolveCredentials(domain, options, providers = {}) {
20
+ const env = providers.env ?? process.env;
21
+ const getPasswords = providers.getPasswords ?? realGetPasswords;
22
+ const chromeDir = resolveChromeDir(options, env);
23
+ const profile = resolveProfile(options, env);
24
+ let credentials = getPasswords({ chromeDir, profile, search: domain });
25
+ if (!credentials || credentials.length === 0) {
26
+ const parts = domain.split(".");
27
+ const name = parts[parts.length - 2] || domain;
28
+ credentials = getPasswords({ chromeDir, profile, search: name });
29
+ }
30
+ if (!credentials || credentials.length === 0) {
31
+ throw new Error(`No saved passwords found in Chrome for '${domain}'`);
32
+ }
33
+ const { username, password } = credentials[0];
34
+ if (!username || !password) {
35
+ throw new Error(`Found credentials for '${domain}' but username or password was empty`);
36
+ }
37
+ return { username, password };
38
+ }
39
+ /**
40
+ * Resolves decrypted Chrome cookies for a domain. When `required` is false a
41
+ * missing login yields an empty array instead of throwing.
42
+ *
43
+ * For required sites the two failure modes are reported distinctly:
44
+ * - `getCookies` throws → Chrome's cookie store couldn't be read at all
45
+ * (e.g. keychain access denied, missing/locked profile). → "No login found".
46
+ * - it returns no rows → the store was read fine but holds no cookies for the
47
+ * domain: the user simply isn't signed in there. → "No cookies found".
48
+ */
49
+ export function resolveCookies(domain, options, required, providers = {}) {
50
+ const env = providers.env ?? process.env;
51
+ const getCookies = providers.getCookies ?? realGetCookies;
52
+ const chromeDir = resolveChromeDir(options, env);
53
+ const profile = resolveProfile(options, env);
54
+ let cookies;
55
+ try {
56
+ cookies = getCookies({ chromeDir, profile, domain, decrypt: true });
57
+ }
58
+ catch (err) {
59
+ if (!required)
60
+ return [];
61
+ const detail = err instanceof Error ? err.message : String(err);
62
+ throw new Error(`No login found in browser: could not read Chrome cookies for ${domain} (${detail})`);
63
+ }
64
+ if ((!cookies || cookies.length === 0) && required) {
65
+ throw new Error(`No cookies found in browser for ${domain}. Sign in to ${domain} in Chrome and try again.`);
66
+ }
67
+ return cookies ?? [];
68
+ }