website-api 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +204 -1
  3. package/dist/src/capabilities/browser.d.ts +8 -2
  4. package/dist/src/capabilities/browser.js +106 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +137 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
  32. package/dist/src/sites/claude.ai/index.js +42 -1
  33. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  34. package/dist/src/sites/cursor.com/index.js +12 -1
  35. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  36. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  37. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  38. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  39. package/dist/src/sites/gemini.google.com/index.js +80 -1
  40. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  41. package/dist/src/sites/google.com/google-helpers.js +255 -1
  42. package/dist/src/sites/google.com/index.js +253 -1
  43. package/dist/src/sites/google.com/openapi.yaml +59 -0
  44. package/dist/src/sites/ollama.com/index.js +43 -1
  45. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  46. package/dist/src/sites/perplexity.ai/index.js +253 -1
  47. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  48. package/dist/src/sites/pseg.com/index.js +243 -1
  49. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  50. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  51. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  52. package/dist/src/sites/voice.google.com/index.js +122 -0
  53. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  54. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  55. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  56. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  57. package/dist/src/sites/zillow.com/index.js +303 -0
  58. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  59. package/dist/src/types.d.ts +7 -0
  60. package/dist/src/types.js +1 -1
  61. package/dist/src/util/args-parser.js +145 -1
  62. package/dist/src/util/google-json.js +74 -1
  63. package/dist/src/website-api.d.ts +7 -7
  64. package/dist/src/website-api.js +13 -1
  65. package/package.json +37 -10
package/README.md CHANGED
@@ -1,3 +1,143 @@
1
1
  # website-api
2
2
 
3
- CLI and library to fetch website API data
3
+ Query websites' private APIs with your **real logged-in Chrome session** — as a CLI or a Node.js library.
4
+
5
+ One site definition describes *what* to fetch; the runtime assembles *how*: plain HTTP with your decrypted
6
+ Chrome cookies injected, or a real fingerprinted Chrome tab over CDP, with login, downloads, and in-page
7
+ scripts available as composable capabilities. See [DESIGN.md](DESIGN.md) for the architecture.
8
+
9
+ > macOS-focused: cookie/credential decryption uses [chrome-tools](https://www.npmjs.com/package/chrome-tools),
10
+ > which reads Chrome's local encrypted storage via the macOS keychain.
11
+
12
+ ## Install
13
+
14
+ ```sh
15
+ npm install -g website-api # CLI
16
+ npm install website-api # library
17
+ ```
18
+
19
+ Requires Node ≥ 22. `playwright-core` is an **optional** dependency — HTTP-only sites work without it;
20
+ browser-transport sites (`[p]` in `list`) will tell you to install it if it's missing.
21
+
22
+ Browser-transport sites no longer need you to start Chrome by hand:
23
+ [chrome-cdp-manager](https://www.npmjs.com/package/chrome-cdp-manager) launches (or attaches to) a
24
+ dedicated, isolated CDP browser automatically on first use. It runs **headless by default**, but if a
25
+ CDP session is already open it reuses that one as-is. Pass `--headed` to force a visible window (e.g. to
26
+ solve a captcha that needs a real press-and-hold). To attach to a Chrome you manage yourself, set
27
+ `CDP_ENDPOINT` (e.g. `http://localhost:9222`) and that endpoint is used directly.
28
+
29
+ ## CLI quickstart
30
+
31
+ ```sh
32
+ website-api list # all bundled + installed sites
33
+ website-api codex-usage # ChatGPT/Codex usage via your session
34
+ website-api perplexity "what is pnpm?" # positional args
35
+ website-api claude-usage --org my-org # site-specific flags
36
+ website-api chatgpt.com --help # per-site help
37
+ website-api example.com # no definition? universal cookie-aware GET
38
+ ```
39
+
40
+ Useful global flags: `--profile <name>` (Chrome profile), `--debug` (full request/response dump),
41
+ `--keep-open` (leave the browser tab open), `--headed` (show the managed Chrome window; default headless),
42
+ `--out <file>`.
43
+
44
+ ### Installing more sites
45
+
46
+ Sites can be installed from a public registry (a GitHub repo of prebuilt site modules):
47
+
48
+ ```sh
49
+ website-api ext search zillow
50
+ website-api ext install zillow
51
+ website-api ext list / remove / update
52
+ ```
53
+
54
+ **Security note:** an installed site is code that runs with access to your Chrome cookies for its domain.
55
+ Installs are SHA256-verified against the registry catalog and require confirmation — only install sites
56
+ from registries you trust.
57
+
58
+ ## Library usage
59
+
60
+ Everything the CLI does is available programmatically:
61
+
62
+ ```ts
63
+ import { queryWebsite } from "website-api";
64
+
65
+ // By site id — same resolution as the CLI
66
+ const usage = await queryWebsite("codex-usage", { profile: "Default" });
67
+ ```
68
+
69
+ Import a bundled site directly and run it — handy for other packages that want
70
+ one site's features without the registry:
71
+
72
+ ```ts
73
+ import { runSite } from "website-api";
74
+ import zillow from "website-api/sites/zillow.com";
75
+
76
+ const homes = await runSite(zillow, { query: "Seattle, WA" });
77
+ ```
78
+
79
+ Or bring your own definition — `runSite` accepts a plain object:
80
+
81
+ ```ts
82
+ import { runSite } from "website-api";
83
+
84
+ const result = await runSite({
85
+ id: "example",
86
+ name: "Example",
87
+ domain: "example.com",
88
+ description: "JSON endpoint with my session cookies",
89
+ endpoints: [{ url: "https://example.com/api/me" }],
90
+ });
91
+ ```
92
+
93
+ Tests and embedders can inject fakes for every capability (fetch, browser, cookie store, fs) via the
94
+ third `providers` argument — see `ContextProviders`.
95
+
96
+ ## Writing your own site
97
+
98
+ Drop a folder in `~/.config/website-api/extensions/` — no imports, no build step:
99
+
100
+ ```js
101
+ // ~/.config/website-api/extensions/example.com/index.mjs
102
+ export default {
103
+ id: "example",
104
+ name: "Example",
105
+ domain: "example.com",
106
+ description: "Example data",
107
+ endpoints: [{ url: "https://example.com/api/data" }],
108
+ };
109
+ ```
110
+
111
+ It shows up in `website-api list` immediately (marked `[x]`). Sites needing a real browser set
112
+ `transport: "browser"` and use `ctx.browser()` / `ctx.eval()` inside a `run(ctx)` function; login flows,
113
+ downloads, and SSE parsing are provided by the context. Develop iteratively with
114
+ `website-api ext test ./my-site` (runs a local file without installing). Full authoring guide:
115
+ [DESIGN.md](DESIGN.md).
116
+
117
+ ## OpenAPI specs
118
+
119
+ Every bundled site ships a generated `openapi.yaml` next to its module (`dist/src/sites/<site>/openapi.yaml`
120
+ in the published package) describing its endpoints and CLI surface, including the `x-website-api` extension
121
+ block. Regenerate with `pnpm generate:openapi` after a build.
122
+
123
+ ## Security model
124
+
125
+ - Cookies and credentials are read from Chrome's local encrypted storage and **only sent to the target
126
+ site's own domain**. They are never written to disk or sent anywhere else.
127
+ - `--debug` prints raw requests/responses (including cookie headers) to your terminal — don't paste that
128
+ output into bug reports.
129
+ - Registry installs run third-party code; they are integrity-checked (SHA256) and gated behind an explicit
130
+ confirmation that names the source repo.
131
+
132
+ ## Development
133
+
134
+ ```sh
135
+ pnpm install # local dev links chrome-tools from ../chrome_tools (pnpm-workspace.yaml)
136
+ pnpm build # tsc → dist (readable, unminified)
137
+ pnpm test # node:test — offline, no Chrome needed
138
+ pnpm lint # biome
139
+ pnpm typecheck
140
+ pnpm generate:openapi
141
+ ```
142
+
143
+ MIT © guocity
package/dist/bin/cli.js CHANGED
@@ -1,2 +1,205 @@
1
1
  #!/usr/bin/env node
2
- import{readFileSync as e,writeFileSync as o}from"node:fs";import{dirname as n,join as s}from"node:path";import{fileURLToPath as t}from"node:url";import{program as r}from"commander";import i from"chalk";import l from"cli-table3";import{getDefaultChromeDir as a}from"chrome-tools";import{queryWebsite as c,sites as p,loadSites as d,getSite as u}from"../src/website-api.js";import{registerExtCommands as g}from"../src/cli/ext.js";import{parseArgsForWebsite as f}from"../src/util/args-parser.js";const m=s(n(t(import.meta.url)),"..","..","package.json"),{version:b}=JSON.parse(e(m,"utf8"));process.on("unhandledRejection",e=>{console.error(e instanceof Error?e.message:"command not found"),process.exit(1)}),async function(){const e=process.argv.slice(2),n=e.find(e=>!e.startsWith("-"));if(n&&"list"!==n){await d();const s=u(n);if(s){const t=e.filter((o,s)=>s!==e.indexOf(n));let r;try{r=f(s.positionals,s.parameters,t)}catch(e){console.error(i.red(`Error: ${e instanceof Error?e.message:String(e)}`)),console.log(`Run ${i.cyan(`npx website-api ${s.id} --help`)} for usage details.`),process.exit(1)}if(r.helpRequested)return void function(e){console.log(i.bold.green(`\n🌐 Website API: ${i.white(e.name)} (${i.yellow(e.id)})\n`)),console.log(` ${i.italic(e.description)}\n`);let o=`npx website-api ${e.id}`;if(e.positionals&&e.positionals.length>0)for(const n of e.positionals)o+=n.required?` <${n.name}>`:` [${n.name}]`;if(o+=" [options]",console.log(`${i.bold("Usage:")} ${i.cyan(o)}\n`),e.positionals&&e.positionals.length>0){console.log(i.bold("Positional Arguments:"));for(const o of e.positionals)console.log(` ${i.cyan(o.name.padEnd(15))} ${o.description}`);console.log()}console.log(i.bold("Options:"));const n=[...e.parameters||[],{name:"profile",type:"string",description:"specific Chrome profile directory (e.g., 'Default')"},{name:"user-agent",type:"string",description:"custom User-Agent header for HTTP requests",short:"u"},{name:"debug",type:"boolean",description:"Print full HTTP request and response bodies for debugging"},{name:"keep-open",type:"boolean",description:"Leave the browser tab open after running (preserve the logged-in session)"},{name:"help",type:"boolean",description:"Show help for this website site",short:"h"}];for(const e of n){let o=`--${e.name}`;"boolean"!==e.type&&(o+=" <value>"),o=e.short?`-${e.short}, ${o}`:` ${o}`;const n=void 0!==e.default?` (default: ${e.default})`:"";console.log(` ${i.yellow(o.padEnd(28))} ${e.description}${i.gray(n)}`)}console.log()}(s);try{const e=await c(s.id,r.options);let n;if(r.options.text&&e&&"object"==typeof e){const o=e.answer||e.text;n=void 0!==o?String(o):JSON.stringify(e,null,2)}else n="string"==typeof e?e:JSON.stringify(e,null,2);r.options.out?(o(r.options.out,n+"\n","utf8"),console.log(i.green(`Success! Decoded response written to ${r.options.out}`))):console.log(n)}catch(e){console.error(i.red(e instanceof Error?e.message:"command not found")),process.exit(1)}return}}r.name("website-api").description("CLI to query website APIs using decrypted Chrome cookies on macOS").version(b),r.option("--profile <name>","specific Chrome profile directory (e.g., 'Default', 'Profile 1')").option("--current-profile","Show the currently resolved/selected Chrome profile directory and name").option("-u, --user-agent <string>","custom User-Agent header for HTTP requests"),r.option("--debug","Print full HTTP request and response bodies for debugging"),r.command("list").description("List all supported website API sites").action(async()=>{await d(),console.log(i.bold.green("\n🌐 Supported Website APIs:\n"));const e=new l({head:[i.bold.cyan("ID"),i.bold.cyan("Name"),i.bold.cyan("Domain"),i.bold.cyan("Description")],colWidths:[18,25,20,50],wordWrap:!0,style:{head:[],border:[]}});for(const o of p){const n=[];"browser"===o.transport&&n.push(i.magenta("[p]")),o.auth&&n.push(i.red("[l]")),"extension"===o.origin&&n.push(i.blue("[x]"));const s=n.length?`${o.name} ${n.join(" ")}`:o.name;e.push([i.yellow(o.id),s,i.underline(o.domain),o.description])}console.log(e.toString()),console.log(`\n${i.magenta("[p]")} requires a running Chrome (Playwright) ${i.red("[l]")} requires login ${i.blue("[x]")} user extension`),console.log(`\nTo run an API query, execute: ${i.bold.cyan("npx website-api <id>")}\n`)}),g(r),r.argument("[website]","website ID or domain to query (e.g. 'chatgpt.com')").action(async e=>{const o=r.opts();if(o.currentProfile){const e=process.env.PROFILE_PATH||process.env.CHROME_PROFILE_PATH||a(),n=o.profile||process.env.PROFILE_NAME||"Default";return console.log(i.bold.green("\n👤 Currently Resolved Profile:\n")),console.log(` ${i.bold("Path:")} ${e}`),void console.log(` ${i.bold("Name:")} ${n}\n`)}e?(console.error(i.red(`Error: website adapter "${e}" not found.`)),console.log(`Run ${i.cyan("npx website-api list")} to see all supported adapters.`),process.exit(1)):r.outputHelp()}),r.parse(process.argv)}().catch(e=>{console.error(e instanceof Error?e.message:"command not found"),process.exit(1)});
2
+ import { readFileSync, writeFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+ import chalk from "chalk";
6
+ import { getDefaultChromeDir } from "chrome-tools";
7
+ import Table from "cli-table3";
8
+ import { program } from "commander";
9
+ import { registerExtCommands } from "../src/cli/ext.js";
10
+ import { parseArgsForWebsite } from "../src/util/args-parser.js";
11
+ import { getSite, loadSites, queryWebsite, sites } from "../src/website-api.js";
12
+ const packageJsonPath = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "package.json");
13
+ const { version: packageVersion } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
14
+ // Handle unhandled promise rejections cleanly
15
+ process.on("unhandledRejection", (reason) => {
16
+ console.error(reason instanceof Error ? reason.message : "command not found");
17
+ process.exit(1);
18
+ });
19
+ /**
20
+ * Renders a premium, comprehensive help page for a specific website adapter.
21
+ */
22
+ function printWebsiteHelp(adapter) {
23
+ console.log(chalk.bold.green(`\n🌐 Website API: ${chalk.white(adapter.name)} (${chalk.yellow(adapter.id)})\n`));
24
+ console.log(` ${chalk.italic(adapter.description)}\n`);
25
+ let usageStr = `npx website-api ${adapter.id}`;
26
+ if (adapter.positionals && adapter.positionals.length > 0) {
27
+ for (const pos of adapter.positionals) {
28
+ usageStr += pos.required ? ` <${pos.name}>` : ` [${pos.name}]`;
29
+ }
30
+ }
31
+ usageStr += " [options]";
32
+ console.log(`${chalk.bold("Usage:")} ${chalk.cyan(usageStr)}\n`);
33
+ if (adapter.positionals && adapter.positionals.length > 0) {
34
+ console.log(chalk.bold("Positional Arguments:"));
35
+ for (const pos of adapter.positionals) {
36
+ console.log(` ${chalk.cyan(pos.name.padEnd(15))} ${pos.description}`);
37
+ }
38
+ console.log();
39
+ }
40
+ console.log(chalk.bold("Options:"));
41
+ const allParams = [
42
+ ...(adapter.parameters || []),
43
+ { name: "profile", type: "string", description: "specific Chrome profile directory (e.g., 'Default')" },
44
+ {
45
+ name: "user-agent",
46
+ type: "string",
47
+ description: "custom User-Agent header for HTTP requests",
48
+ short: "u",
49
+ },
50
+ {
51
+ name: "debug",
52
+ type: "boolean",
53
+ description: "Print full HTTP request and response bodies for debugging",
54
+ },
55
+ {
56
+ name: "keep-open",
57
+ type: "boolean",
58
+ description: "Leave the browser tab open after running (preserve the logged-in session)",
59
+ },
60
+ {
61
+ name: "headed",
62
+ type: "boolean",
63
+ description: "Show the managed Chrome window (default headless; reuses an already-open session)",
64
+ },
65
+ { name: "help", type: "boolean", description: "Show help for this website site", short: "h" },
66
+ ];
67
+ for (const param of allParams) {
68
+ let flag = `--${param.name}`;
69
+ if (param.type !== "boolean") {
70
+ flag += ` <value>`;
71
+ }
72
+ if (param.short) {
73
+ flag = `-${param.short}, ${flag}`;
74
+ }
75
+ else {
76
+ flag = ` ${flag}`;
77
+ }
78
+ const defStr = param.default !== undefined ? ` (default: ${param.default})` : "";
79
+ console.log(` ${chalk.yellow(flag.padEnd(28))} ${param.description}${chalk.gray(defStr)}`);
80
+ }
81
+ console.log();
82
+ }
83
+ async function runCli() {
84
+ const argv = process.argv.slice(2);
85
+ // Check if first positional is a website adapter (avoiding commands and global flags)
86
+ const firstPositional = argv.find((arg) => !arg.startsWith("-"));
87
+ if (firstPositional && firstPositional !== "list") {
88
+ await loadSites();
89
+ const adapter = getSite(firstPositional);
90
+ if (adapter) {
91
+ // Bypasses standard commander parser to allow website-specific options
92
+ const websiteArgs = argv.filter((_, i) => i !== argv.indexOf(firstPositional));
93
+ let parsed;
94
+ try {
95
+ parsed = parseArgsForWebsite(adapter.positionals, adapter.parameters, websiteArgs);
96
+ }
97
+ catch (err) {
98
+ console.error(chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`));
99
+ console.log(`Run ${chalk.cyan(`npx website-api ${adapter.id} --help`)} for usage details.`);
100
+ process.exit(1);
101
+ }
102
+ if (parsed.helpRequested) {
103
+ printWebsiteHelp(adapter);
104
+ return;
105
+ }
106
+ try {
107
+ const data = await queryWebsite(adapter.id, parsed.options);
108
+ let output;
109
+ if (parsed.options.text && data && typeof data === "object") {
110
+ const ans = data.answer || data.text;
111
+ output = ans !== undefined ? String(ans) : JSON.stringify(data, null, 2);
112
+ }
113
+ else {
114
+ output = typeof data === "string" ? data : JSON.stringify(data, null, 2);
115
+ }
116
+ if (parsed.options.out) {
117
+ writeFileSync(parsed.options.out, output + "\n", "utf8");
118
+ console.log(chalk.green(`Success! Decoded response written to ${parsed.options.out}`));
119
+ }
120
+ else {
121
+ console.log(output);
122
+ }
123
+ }
124
+ catch (err) {
125
+ console.error(chalk.red(err instanceof Error ? err.message : "command not found"));
126
+ process.exit(1);
127
+ }
128
+ return;
129
+ }
130
+ }
131
+ // Fallback to Commander for global commands and options
132
+ program
133
+ .name("website-api")
134
+ .description("CLI to query website APIs using decrypted Chrome cookies on macOS")
135
+ .version(packageVersion);
136
+ // Global options
137
+ program
138
+ .option("--profile <name>", "specific Chrome profile directory (e.g., 'Default', 'Profile 1')")
139
+ .option("--current-profile", "Show the currently resolved/selected Chrome profile directory and name")
140
+ .option("-u, --user-agent <string>", "custom User-Agent header for HTTP requests");
141
+ program.option("--debug", "Print full HTTP request and response bodies for debugging");
142
+ program.option("--headed", "Show the managed Chrome window (default headless; reuses an already-open session)");
143
+ // List command
144
+ program
145
+ .command("list")
146
+ .description("List all supported website API sites")
147
+ .action(async () => {
148
+ await loadSites();
149
+ console.log(chalk.bold.green("\n🌐 Supported Website APIs:\n"));
150
+ const table = new Table({
151
+ head: [
152
+ chalk.bold.cyan("ID"),
153
+ chalk.bold.cyan("Name"),
154
+ chalk.bold.cyan("Domain"),
155
+ chalk.bold.cyan("Description"),
156
+ ],
157
+ colWidths: [18, 25, 20, 50],
158
+ wordWrap: true,
159
+ style: { head: [], border: [] },
160
+ });
161
+ for (const web of sites) {
162
+ const markers = [];
163
+ if (web.transport === "browser")
164
+ markers.push(chalk.magenta("[p]"));
165
+ if (web.auth)
166
+ markers.push(chalk.red("[l]"));
167
+ if (web.origin === "extension")
168
+ markers.push(chalk.blue("[x]"));
169
+ const nameCell = markers.length ? `${web.name} ${markers.join(" ")}` : web.name;
170
+ table.push([chalk.yellow(web.id), nameCell, chalk.underline(web.domain), web.description]);
171
+ }
172
+ console.log(table.toString());
173
+ console.log(`\n${chalk.magenta("[p]")} requires a running Chrome (Playwright) ${chalk.red("[l]")} requires login ${chalk.blue("[x]")} user extension`);
174
+ console.log(`\nTo run an API query, execute: ${chalk.bold.cyan("npx website-api <id>")}\n`);
175
+ });
176
+ // Extension registry commands: `ext search|info|install|list|remove|update|registry`
177
+ registerExtCommands(program);
178
+ // Default command: fallback error or help
179
+ program
180
+ .argument("[website]", "website ID or domain to query (e.g. 'chatgpt.com')")
181
+ .action(async (website) => {
182
+ const globalOpts = program.opts();
183
+ if (globalOpts.currentProfile) {
184
+ const profilePath = process.env.PROFILE_PATH || process.env.CHROME_PROFILE_PATH || getDefaultChromeDir();
185
+ const profileName = globalOpts.profile || process.env.PROFILE_NAME || "Default";
186
+ console.log(chalk.bold.green("\n👤 Currently Resolved Profile:\n"));
187
+ console.log(` ${chalk.bold("Path:")} ${profilePath}`);
188
+ console.log(` ${chalk.bold("Name:")} ${profileName}\n`);
189
+ return;
190
+ }
191
+ if (!website) {
192
+ program.outputHelp();
193
+ return;
194
+ }
195
+ // If website not found
196
+ console.error(chalk.red(`Error: website adapter "${website}" not found.`));
197
+ console.log(`Run ${chalk.cyan("npx website-api list")} to see all supported adapters.`);
198
+ process.exit(1);
199
+ });
200
+ program.parse(process.argv);
201
+ }
202
+ runCli().catch((err) => {
203
+ console.error(err instanceof Error ? err.message : "command not found");
204
+ process.exit(1);
205
+ });
@@ -1,7 +1,13 @@
1
- import { type Browser, type Page } from "playwright-core";
1
+ import type { Browser, Page } from "playwright-core";
2
2
  export interface BrowserOptions {
3
- /** CDP endpoint of a running Chrome. Defaults to env or localhost:9222. */
3
+ /**
4
+ * CDP endpoint of an already-running Chrome. When set (or via the
5
+ * `CDP_ENDPOINT` env var), we attach to it directly and skip launching. When
6
+ * unset, chrome-cdp-manager launches/attaches a managed browser for us.
7
+ */
4
8
  cdpEndpoint?: string;
9
+ /** Launch the managed browser headless. Ignored when `cdpEndpoint` is set. */
10
+ headless?: boolean;
5
11
  /** Close a tab opened by this session on dispose. Defaults to true. */
6
12
  close?: boolean;
7
13
  debug?: boolean;
@@ -1 +1,106 @@
1
- import{chromium as t}from"playwright-core";export const connectChrome=async(e,o={})=>{const n=o.cdpEndpoint||process.env.CDP_ENDPOINT||"http://localhost:9222",r=!!o.debug,a=await t.connectOverCDP(n),c=a.contexts()[0];if(!c)throw new Error("No active browser context found. Is Chrome running with remote debugging enabled?");let s=!1,i=c.pages().find(t=>{try{const o=new URL(e).hostname.replace("www.","");return new URL(t.url()).hostname.endsWith(o)||t.url().startsWith(e)}catch{return t.url().startsWith(e)}});return i?r&&console.log(`Reusing existing tab for ${e}`):(r&&console.log(`Opening a new tab for ${e}`),i=await c.newPage(),await i.goto(e,{waitUntil:"domcontentloaded"}),s=!0),{page:i,browser:a,opened:s,async dispose(){if(s&&!1!==o.close)try{await i.close()}catch{}try{await a.close()}catch{}}}};
1
+ /**
2
+ * playwright-core is an optional dependency: HTTP-only installs work without
3
+ * it, and it's loaded here on the first browser connection.
4
+ */
5
+ async function loadChromium() {
6
+ try {
7
+ return (await import("playwright-core")).chromium;
8
+ }
9
+ catch {
10
+ throw new Error('This site needs a browser, which requires the optional "playwright-core" dependency. ' +
11
+ "Install it with: npm install playwright-core");
12
+ }
13
+ }
14
+ /**
15
+ * chrome-cdp-manager launches (or attaches to) a launcher-managed CDP browser
16
+ * so the user never has to start Chrome with `--remote-debugging-port` by hand.
17
+ * Loaded lazily — HTTP-only runs never pay for it, and the os-restricted package
18
+ * surfaces a clear error if it's missing on an unsupported platform.
19
+ */
20
+ async function loadCdpManager() {
21
+ try {
22
+ return await import("chrome-cdp-manager");
23
+ }
24
+ catch {
25
+ throw new Error('This site needs a browser, which is managed by the "chrome-cdp-manager" dependency. ' +
26
+ "Install it with: npm install chrome-cdp-manager (macOS/Windows only).");
27
+ }
28
+ }
29
+ /**
30
+ * Resolves the CDP endpoint to connect to. An explicit endpoint (option or
31
+ * `CDP_ENDPOINT`) wins and is used as-is, so users can still point at a Chrome
32
+ * they manage themselves. Otherwise chrome-cdp-manager ensures a managed
33
+ * browser is running (launching it if needed) and returns its endpoint.
34
+ */
35
+ async function resolveEndpoint(options) {
36
+ const explicit = options.cdpEndpoint || process.env.CDP_ENDPOINT;
37
+ if (explicit)
38
+ return explicit;
39
+ const { launch } = await loadCdpManager();
40
+ const { endpoint, launched } = await launch({ headless: !!options.headless });
41
+ if (options.debug) {
42
+ const mode = options.headless ? "headless" : "headed";
43
+ console.log(launched
44
+ ? `Launched managed Chrome (${mode}) at ${endpoint}`
45
+ : `Attached to managed Chrome at ${endpoint}`);
46
+ }
47
+ return endpoint;
48
+ }
49
+ /**
50
+ * Connects to an existing Chrome over CDP and reuses (or opens) a tab for the
51
+ * target URL. Returns a session with an explicit `dispose()` the runtime calls
52
+ * during teardown — sites never manage the connection themselves.
53
+ */
54
+ export const connectChrome = async (targetUrl, options = {}) => {
55
+ const debug = !!options.debug;
56
+ const endpoint = await resolveEndpoint(options);
57
+ const chromium = await loadChromium();
58
+ const browser = await chromium.connectOverCDP(endpoint);
59
+ const context = browser.contexts()[0];
60
+ if (!context) {
61
+ throw new Error("No active browser context found. Is Chrome running with remote debugging enabled?");
62
+ }
63
+ let opened = false;
64
+ let page = context.pages().find((p) => {
65
+ try {
66
+ const targetHost = new URL(targetUrl).hostname.replace("www.", "");
67
+ const pHost = new URL(p.url()).hostname;
68
+ return pHost.endsWith(targetHost) || p.url().startsWith(targetUrl);
69
+ }
70
+ catch {
71
+ return p.url().startsWith(targetUrl);
72
+ }
73
+ });
74
+ if (page) {
75
+ if (debug)
76
+ console.log(`Reusing existing tab for ${targetUrl}`);
77
+ }
78
+ else {
79
+ if (debug)
80
+ console.log(`Opening a new tab for ${targetUrl}`);
81
+ page = await context.newPage();
82
+ await page.goto(targetUrl, { waitUntil: "domcontentloaded" });
83
+ opened = true;
84
+ }
85
+ return {
86
+ page,
87
+ browser,
88
+ opened,
89
+ async dispose() {
90
+ if (opened && options.close !== false) {
91
+ try {
92
+ await page.close();
93
+ }
94
+ catch {
95
+ // ignore
96
+ }
97
+ }
98
+ try {
99
+ await browser.close();
100
+ }
101
+ catch {
102
+ // ignore
103
+ }
104
+ },
105
+ };
106
+ };
@@ -1,4 +1,4 @@
1
- import { getCookies as realGetCookies, getPasswords as realGetPasswords, type CookieEntry } from "chrome-tools";
1
+ import { type CookieEntry, getCookies as realGetCookies, getPasswords as realGetPasswords } from "chrome-tools";
2
2
  import type { Credentials, QueryOptions } from "../types.js";
3
3
  export declare const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
4
4
  /**
@@ -20,5 +20,11 @@ export declare function resolveCredentials(domain: string, options: QueryOptions
20
20
  /**
21
21
  * Resolves decrypted Chrome cookies for a domain. When `required` is false a
22
22
  * missing login yields an empty array instead of throwing.
23
+ *
24
+ * For required sites the two failure modes are reported distinctly:
25
+ * - `getCookies` throws → Chrome's cookie store couldn't be read at all
26
+ * (e.g. keychain access denied, missing/locked profile). → "No login found".
27
+ * - it returns no rows → the store was read fine but holds no cookies for the
28
+ * domain: the user simply isn't signed in there. → "No cookies found".
23
29
  */
24
30
  export declare function resolveCookies(domain: string, options: QueryOptions, required: boolean, providers?: CookieProviders): CookieEntry[];
@@ -1 +1,68 @@
1
- import{getCookies as e,getPasswords as r}from"chrome-tools";export const DEFAULT_USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";function o(e,r){return e.profilePath||r.PROFILE_PATH||r.CHROME_PROFILE_PATH||void 0}function n(e,r){return e.profile||r.PROFILE_NAME||void 0}export function resolveUserAgent(e,r=process.env){return e.userAgent||r.userAgent||r.USER_AGENT||DEFAULT_USER_AGENT}export function buildCookieString(e){return e.map(e=>`${e.name}=${e.value}`).join("; ")}export function resolveCredentials(e,t,s={}){const i=s.env??process.env,a=s.getPasswords??r,c=o(t,i),l=n(t,i);let p=a({chromeDir:c,profile:l,search:e});if(!p||0===p.length){const r=e.split(".");p=a({chromeDir:c,profile:l,search:r[r.length-2]||e})}if(!p||0===p.length)throw new Error(`No saved passwords found in Chrome for '${e}'`);const{username:u,password:f}=p[0];if(!u||!f)throw new Error(`Found credentials for '${e}' but username or password was empty`);return{username:u,password:f}}export function resolveCookies(r,t,s,i={}){const a=i.env??process.env,c=i.getCookies??e,l=o(t,a),p=n(t,a);let u=[];try{u=c({chromeDir:l,profile:p,domain:r,decrypt:!0})}catch{if(s)throw new Error("No login found in browser")}if((!u||0===u.length)&&s)throw new Error("No login found in browser");return u??[]}
1
+ import { getCookies as realGetCookies, getPasswords as realGetPasswords, } from "chrome-tools";
2
+ export const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
3
+ function resolveChromeDir(options, env) {
4
+ return options.profilePath || env.PROFILE_PATH || env.CHROME_PROFILE_PATH || undefined;
5
+ }
6
+ function resolveProfile(options, env) {
7
+ return options.profile || env.PROFILE_NAME || undefined;
8
+ }
9
+ export function resolveUserAgent(options, env = process.env) {
10
+ return options.userAgent || env.userAgent || env.USER_AGENT || DEFAULT_USER_AGENT;
11
+ }
12
+ export function buildCookieString(cookies) {
13
+ return cookies.map((c) => `${c.name}=${c.value}`).join("; ");
14
+ }
15
+ /**
16
+ * Resolves saved Chrome credentials for a domain. Searches the full domain
17
+ * first, then falls back to the registrable name (e.g. "pseg" from "pseg.com").
18
+ */
19
+ export function resolveCredentials(domain, options, providers = {}) {
20
+ const env = providers.env ?? process.env;
21
+ const getPasswords = providers.getPasswords ?? realGetPasswords;
22
+ const chromeDir = resolveChromeDir(options, env);
23
+ const profile = resolveProfile(options, env);
24
+ let credentials = getPasswords({ chromeDir, profile, search: domain });
25
+ if (!credentials || credentials.length === 0) {
26
+ const parts = domain.split(".");
27
+ const name = parts[parts.length - 2] || domain;
28
+ credentials = getPasswords({ chromeDir, profile, search: name });
29
+ }
30
+ if (!credentials || credentials.length === 0) {
31
+ throw new Error(`No saved passwords found in Chrome for '${domain}'`);
32
+ }
33
+ const { username, password } = credentials[0];
34
+ if (!username || !password) {
35
+ throw new Error(`Found credentials for '${domain}' but username or password was empty`);
36
+ }
37
+ return { username, password };
38
+ }
39
+ /**
40
+ * Resolves decrypted Chrome cookies for a domain. When `required` is false a
41
+ * missing login yields an empty array instead of throwing.
42
+ *
43
+ * For required sites the two failure modes are reported distinctly:
44
+ * - `getCookies` throws → Chrome's cookie store couldn't be read at all
45
+ * (e.g. keychain access denied, missing/locked profile). → "No login found".
46
+ * - it returns no rows → the store was read fine but holds no cookies for the
47
+ * domain: the user simply isn't signed in there. → "No cookies found".
48
+ */
49
+ export function resolveCookies(domain, options, required, providers = {}) {
50
+ const env = providers.env ?? process.env;
51
+ const getCookies = providers.getCookies ?? realGetCookies;
52
+ const chromeDir = resolveChromeDir(options, env);
53
+ const profile = resolveProfile(options, env);
54
+ let cookies;
55
+ try {
56
+ cookies = getCookies({ chromeDir, profile, domain, decrypt: true });
57
+ }
58
+ catch (err) {
59
+ if (!required)
60
+ return [];
61
+ const detail = err instanceof Error ? err.message : String(err);
62
+ throw new Error(`No login found in browser: could not read Chrome cookies for ${domain} (${detail})`);
63
+ }
64
+ if ((!cookies || cookies.length === 0) && required) {
65
+ throw new Error(`No cookies found in browser for ${domain}. Sign in to ${domain} in Chrome and try again.`);
66
+ }
67
+ return cookies ?? [];
68
+ }
@@ -1 +1,32 @@
1
- import t from"node:fs/promises";import e from"node:path";export function createSaver(r,o=process.cwd()){const n=e.resolve(o,r??".");let i=!1;return async function(r,o){i||(await t.mkdir(n,{recursive:!0}),i=!0);const s=e.join(n,e.basename(r));return await t.writeFile(s,o),s}}export function assertNotHtml(t,e){const r=String(t??"").trimStart();if(/^<!doctype html/i.test(r)||/^<html/i.test(r))throw new Error(`Download for ${e} returned HTML instead of data. The session may have expired.`);return t}
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ /**
4
+ * Builds a `save(filename, content)` function bound to a target directory.
5
+ * Creates the directory on first write and returns the absolute path written.
6
+ * `outDir` of null means "current working directory".
7
+ */
8
+ export function createSaver(outDir, cwd = process.cwd()) {
9
+ const targetDir = path.resolve(cwd, outDir ?? ".");
10
+ let ensured = false;
11
+ return async function save(filename, content) {
12
+ if (!ensured) {
13
+ await fs.mkdir(targetDir, { recursive: true });
14
+ ensured = true;
15
+ }
16
+ // Defend against path traversal: only the basename is honored.
17
+ const filePath = path.join(targetDir, path.basename(filename));
18
+ await fs.writeFile(filePath, content);
19
+ return filePath;
20
+ };
21
+ }
22
+ /**
23
+ * Guards a downloaded payload that should be data but may be an HTML error page
24
+ * (a common symptom of an expired session). Throws a clear, actionable error.
25
+ */
26
+ export function assertNotHtml(text, label) {
27
+ const trimmed = String(text ?? "").trimStart();
28
+ if (/^<!doctype html/i.test(trimmed) || /^<html/i.test(trimmed)) {
29
+ throw new Error(`Download for ${label} returned HTML instead of data. The session may have expired.`);
30
+ }
31
+ return text;
32
+ }