website-api 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -1
- package/dist/bin/cli.js +204 -1
- package/dist/src/capabilities/browser.d.ts +8 -2
- package/dist/src/capabilities/browser.js +106 -1
- package/dist/src/capabilities/cookies.d.ts +7 -1
- package/dist/src/capabilities/cookies.js +68 -1
- package/dist/src/capabilities/download.js +32 -1
- package/dist/src/capabilities/fingerprint.js +62 -1
- package/dist/src/capabilities/http.js +101 -1
- package/dist/src/capabilities/login/login-helper.js +185 -1
- package/dist/src/capabilities/login/login-strategy.js +36 -1
- package/dist/src/challenges/perimeterx.d.ts +62 -0
- package/dist/src/challenges/perimeterx.js +112 -0
- package/dist/src/cli/ext.js +338 -1
- package/dist/src/core/context.d.ts +2 -2
- package/dist/src/core/context.js +137 -1
- package/dist/src/core/define-site.js +74 -1
- package/dist/src/core/loader.js +142 -1
- package/dist/src/core/registry.js +332 -1
- package/dist/src/core/runtime.d.ts +12 -4
- package/dist/src/core/runtime.js +98 -1
- package/dist/src/env.js +34 -1
- package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
- package/dist/src/sites/bloomberg.com/index.js +49 -0
- package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
- package/dist/src/sites/chase.com/download-helper.js +266 -1
- package/dist/src/sites/chase.com/index.js +87 -1
- package/dist/src/sites/chase.com/openapi.yaml +76 -0
- package/dist/src/sites/chatgpt.com/index.js +24 -1
- package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
- package/dist/src/sites/claude.ai/claude-helpers.d.ts +20 -0
- package/dist/src/sites/claude.ai/claude-helpers.js +26 -0
- package/dist/src/sites/claude.ai/index.d.ts +2 -0
- package/dist/src/sites/claude.ai/index.js +42 -0
- package/dist/src/sites/claude.ai/openapi.yaml +54 -0
- package/dist/src/sites/cursor.com/index.js +12 -1
- package/dist/src/sites/cursor.com/openapi.yaml +39 -0
- package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
- package/dist/src/sites/e-zpassny.com/index.js +344 -0
- package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
- package/dist/src/sites/gemini.google.com/index.d.ts +11 -0
- package/dist/src/sites/gemini.google.com/index.js +80 -1
- package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
- package/dist/src/sites/google.com/google-helpers.js +255 -1
- package/dist/src/sites/google.com/index.js +253 -1
- package/dist/src/sites/google.com/openapi.yaml +59 -0
- package/dist/src/sites/ollama.com/index.js +43 -1
- package/dist/src/sites/ollama.com/openapi.yaml +39 -0
- package/dist/src/sites/perplexity.ai/index.js +253 -1
- package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
- package/dist/src/sites/pseg.com/index.js +243 -1
- package/dist/src/sites/pseg.com/openapi.yaml +42 -0
- package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
- package/dist/src/sites/voice.google.com/index.d.ts +2 -0
- package/dist/src/sites/voice.google.com/index.js +122 -0
- package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
- package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
- package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
- package/dist/src/sites/zillow.com/index.d.ts +2 -0
- package/dist/src/sites/zillow.com/index.js +303 -0
- package/dist/src/sites/zillow.com/openapi.yaml +55 -0
- package/dist/src/types.d.ts +16 -0
- package/dist/src/types.js +1 -1
- package/dist/src/util/args-parser.js +145 -1
- package/dist/src/util/google-json.js +74 -1
- package/dist/src/website-api.d.ts +7 -7
- package/dist/src/website-api.js +13 -1
- package/package.json +37 -10
package/README.md
CHANGED
|
@@ -1,3 +1,143 @@
|
|
|
1
1
|
# website-api
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Query websites' private APIs with your **real logged-in Chrome session** — as a CLI or a Node.js library.
|
|
4
|
+
|
|
5
|
+
One site definition describes *what* to fetch; the runtime assembles *how*: plain HTTP with your decrypted
|
|
6
|
+
Chrome cookies injected, or a real fingerprinted Chrome tab over CDP, with login, downloads, and in-page
|
|
7
|
+
scripts available as composable capabilities. See [DESIGN.md](DESIGN.md) for the architecture.
|
|
8
|
+
|
|
9
|
+
> macOS-focused: cookie/credential decryption uses [chrome-tools](https://www.npmjs.com/package/chrome-tools),
|
|
10
|
+
> which reads Chrome's local encrypted storage via the macOS keychain.
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```sh
|
|
15
|
+
npm install -g website-api # CLI
|
|
16
|
+
npm install website-api # library
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Requires Node ≥ 22. `playwright-core` is an **optional** dependency — HTTP-only sites work without it;
|
|
20
|
+
browser-transport sites (`[p]` in `list`) will tell you to install it if it's missing.
|
|
21
|
+
|
|
22
|
+
Browser-transport sites no longer need you to start Chrome by hand:
|
|
23
|
+
[chrome-cdp-manager](https://www.npmjs.com/package/chrome-cdp-manager) launches (or attaches to) a
|
|
24
|
+
dedicated, isolated CDP browser automatically on first use. It runs **headless by default**, but if a
|
|
25
|
+
CDP session is already open it reuses that one as-is. Pass `--headed` to force a visible window (e.g. to
|
|
26
|
+
solve a captcha that needs a real press-and-hold). To attach to a Chrome you manage yourself, set
|
|
27
|
+
`CDP_ENDPOINT` (e.g. `http://localhost:9222`) and that endpoint is used directly.
|
|
28
|
+
|
|
29
|
+
## CLI quickstart
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
website-api list # all bundled + installed sites
|
|
33
|
+
website-api codex-usage # ChatGPT/Codex usage via your session
|
|
34
|
+
website-api perplexity "what is pnpm?" # positional args
|
|
35
|
+
website-api claude-usage --org my-org # site-specific flags
|
|
36
|
+
website-api chatgpt.com --help # per-site help
|
|
37
|
+
website-api example.com # no definition? universal cookie-aware GET
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Useful global flags: `--profile <name>` (Chrome profile), `--debug` (full request/response dump),
|
|
41
|
+
`--keep-open` (leave the browser tab open), `--headed` (show the managed Chrome window; default headless),
|
|
42
|
+
`--out <file>`.
|
|
43
|
+
|
|
44
|
+
### Installing more sites
|
|
45
|
+
|
|
46
|
+
Sites can be installed from a public registry (a GitHub repo of prebuilt site modules):
|
|
47
|
+
|
|
48
|
+
```sh
|
|
49
|
+
website-api ext search zillow
|
|
50
|
+
website-api ext install zillow
|
|
51
|
+
website-api ext list / remove / update
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Security note:** an installed site is code that runs with access to your Chrome cookies for its domain.
|
|
55
|
+
Installs are SHA256-verified against the registry catalog and require confirmation — only install sites
|
|
56
|
+
from registries you trust.
|
|
57
|
+
|
|
58
|
+
## Library usage
|
|
59
|
+
|
|
60
|
+
Everything the CLI does is available programmatically:
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { queryWebsite } from "website-api";
|
|
64
|
+
|
|
65
|
+
// By site id — same resolution as the CLI
|
|
66
|
+
const usage = await queryWebsite("codex-usage", { profile: "Default" });
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Import a bundled site directly and run it — handy for other packages that want
|
|
70
|
+
one site's features without the registry:
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
import { runSite } from "website-api";
|
|
74
|
+
import zillow from "website-api/sites/zillow.com";
|
|
75
|
+
|
|
76
|
+
const homes = await runSite(zillow, { query: "Seattle, WA" });
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Or bring your own definition — `runSite` accepts a plain object:
|
|
80
|
+
|
|
81
|
+
```ts
|
|
82
|
+
import { runSite } from "website-api";
|
|
83
|
+
|
|
84
|
+
const result = await runSite({
|
|
85
|
+
id: "example",
|
|
86
|
+
name: "Example",
|
|
87
|
+
domain: "example.com",
|
|
88
|
+
description: "JSON endpoint with my session cookies",
|
|
89
|
+
endpoints: [{ url: "https://example.com/api/me" }],
|
|
90
|
+
});
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Tests and embedders can inject fakes for every capability (fetch, browser, cookie store, fs) via the
|
|
94
|
+
third `providers` argument — see `ContextProviders`.
|
|
95
|
+
|
|
96
|
+
## Writing your own site
|
|
97
|
+
|
|
98
|
+
Drop a folder in `~/.config/website-api/extensions/` — no imports, no build step:
|
|
99
|
+
|
|
100
|
+
```js
|
|
101
|
+
// ~/.config/website-api/extensions/example.com/index.mjs
|
|
102
|
+
export default {
|
|
103
|
+
id: "example",
|
|
104
|
+
name: "Example",
|
|
105
|
+
domain: "example.com",
|
|
106
|
+
description: "Example data",
|
|
107
|
+
endpoints: [{ url: "https://example.com/api/data" }],
|
|
108
|
+
};
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
It shows up in `website-api list` immediately (marked `[x]`). Sites needing a real browser set
|
|
112
|
+
`transport: "browser"` and use `ctx.browser()` / `ctx.eval()` inside a `run(ctx)` function; login flows,
|
|
113
|
+
downloads, and SSE parsing are provided by the context. Develop iteratively with
|
|
114
|
+
`website-api ext test ./my-site` (runs a local file without installing). Full authoring guide:
|
|
115
|
+
[DESIGN.md](DESIGN.md).
|
|
116
|
+
|
|
117
|
+
## OpenAPI specs
|
|
118
|
+
|
|
119
|
+
Every bundled site ships a generated `openapi.yaml` next to its module (`dist/src/sites/<site>/openapi.yaml`
|
|
120
|
+
in the published package) describing its endpoints and CLI surface, including the `x-website-api` extension
|
|
121
|
+
block. Regenerate with `pnpm generate:openapi` after a build.
|
|
122
|
+
|
|
123
|
+
## Security model
|
|
124
|
+
|
|
125
|
+
- Cookies and credentials are read from Chrome's local encrypted storage and **only sent to the target
|
|
126
|
+
site's own domain**. They are never written to disk or sent anywhere else.
|
|
127
|
+
- `--debug` prints raw requests/responses (including cookie headers) to your terminal — don't paste that
|
|
128
|
+
output into bug reports.
|
|
129
|
+
- Registry installs run third-party code; they are integrity-checked (SHA256) and gated behind an explicit
|
|
130
|
+
confirmation that names the source repo.
|
|
131
|
+
|
|
132
|
+
## Development
|
|
133
|
+
|
|
134
|
+
```sh
|
|
135
|
+
pnpm install # local dev links chrome-tools from ../chrome_tools (pnpm-workspace.yaml)
|
|
136
|
+
pnpm build # tsc → dist (readable, unminified)
|
|
137
|
+
pnpm test # node:test — offline, no Chrome needed
|
|
138
|
+
pnpm lint # biome
|
|
139
|
+
pnpm typecheck
|
|
140
|
+
pnpm generate:openapi
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
MIT © guocity
|
package/dist/bin/cli.js
CHANGED
|
@@ -1,2 +1,205 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import{readFileSync
|
|
2
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
import { getDefaultChromeDir } from "chrome-tools";
|
|
7
|
+
import Table from "cli-table3";
|
|
8
|
+
import { program } from "commander";
|
|
9
|
+
import { registerExtCommands } from "../src/cli/ext.js";
|
|
10
|
+
import { parseArgsForWebsite } from "../src/util/args-parser.js";
|
|
11
|
+
import { getSite, loadSites, queryWebsite, sites } from "../src/website-api.js";
|
|
12
|
+
const packageJsonPath = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "package.json");
|
|
13
|
+
const { version: packageVersion } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
14
|
+
// Handle unhandled promise rejections cleanly
|
|
15
|
+
process.on("unhandledRejection", (reason) => {
|
|
16
|
+
console.error(reason instanceof Error ? reason.message : "command not found");
|
|
17
|
+
process.exit(1);
|
|
18
|
+
});
|
|
19
|
+
/**
|
|
20
|
+
* Renders a premium, comprehensive help page for a specific website adapter.
|
|
21
|
+
*/
|
|
22
|
+
function printWebsiteHelp(adapter) {
|
|
23
|
+
console.log(chalk.bold.green(`\n🌐 Website API: ${chalk.white(adapter.name)} (${chalk.yellow(adapter.id)})\n`));
|
|
24
|
+
console.log(` ${chalk.italic(adapter.description)}\n`);
|
|
25
|
+
let usageStr = `npx website-api ${adapter.id}`;
|
|
26
|
+
if (adapter.positionals && adapter.positionals.length > 0) {
|
|
27
|
+
for (const pos of adapter.positionals) {
|
|
28
|
+
usageStr += pos.required ? ` <${pos.name}>` : ` [${pos.name}]`;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
usageStr += " [options]";
|
|
32
|
+
console.log(`${chalk.bold("Usage:")} ${chalk.cyan(usageStr)}\n`);
|
|
33
|
+
if (adapter.positionals && adapter.positionals.length > 0) {
|
|
34
|
+
console.log(chalk.bold("Positional Arguments:"));
|
|
35
|
+
for (const pos of adapter.positionals) {
|
|
36
|
+
console.log(` ${chalk.cyan(pos.name.padEnd(15))} ${pos.description}`);
|
|
37
|
+
}
|
|
38
|
+
console.log();
|
|
39
|
+
}
|
|
40
|
+
console.log(chalk.bold("Options:"));
|
|
41
|
+
const allParams = [
|
|
42
|
+
...(adapter.parameters || []),
|
|
43
|
+
{ name: "profile", type: "string", description: "specific Chrome profile directory (e.g., 'Default')" },
|
|
44
|
+
{
|
|
45
|
+
name: "user-agent",
|
|
46
|
+
type: "string",
|
|
47
|
+
description: "custom User-Agent header for HTTP requests",
|
|
48
|
+
short: "u",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "debug",
|
|
52
|
+
type: "boolean",
|
|
53
|
+
description: "Print full HTTP request and response bodies for debugging",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "keep-open",
|
|
57
|
+
type: "boolean",
|
|
58
|
+
description: "Leave the browser tab open after running (preserve the logged-in session)",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "headed",
|
|
62
|
+
type: "boolean",
|
|
63
|
+
description: "Show the managed Chrome window (default headless; reuses an already-open session)",
|
|
64
|
+
},
|
|
65
|
+
{ name: "help", type: "boolean", description: "Show help for this website site", short: "h" },
|
|
66
|
+
];
|
|
67
|
+
for (const param of allParams) {
|
|
68
|
+
let flag = `--${param.name}`;
|
|
69
|
+
if (param.type !== "boolean") {
|
|
70
|
+
flag += ` <value>`;
|
|
71
|
+
}
|
|
72
|
+
if (param.short) {
|
|
73
|
+
flag = `-${param.short}, ${flag}`;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
flag = ` ${flag}`;
|
|
77
|
+
}
|
|
78
|
+
const defStr = param.default !== undefined ? ` (default: ${param.default})` : "";
|
|
79
|
+
console.log(` ${chalk.yellow(flag.padEnd(28))} ${param.description}${chalk.gray(defStr)}`);
|
|
80
|
+
}
|
|
81
|
+
console.log();
|
|
82
|
+
}
|
|
83
|
+
async function runCli() {
|
|
84
|
+
const argv = process.argv.slice(2);
|
|
85
|
+
// Check if first positional is a website adapter (avoiding commands and global flags)
|
|
86
|
+
const firstPositional = argv.find((arg) => !arg.startsWith("-"));
|
|
87
|
+
if (firstPositional && firstPositional !== "list") {
|
|
88
|
+
await loadSites();
|
|
89
|
+
const adapter = getSite(firstPositional);
|
|
90
|
+
if (adapter) {
|
|
91
|
+
// Bypasses standard commander parser to allow website-specific options
|
|
92
|
+
const websiteArgs = argv.filter((_, i) => i !== argv.indexOf(firstPositional));
|
|
93
|
+
let parsed;
|
|
94
|
+
try {
|
|
95
|
+
parsed = parseArgsForWebsite(adapter.positionals, adapter.parameters, websiteArgs);
|
|
96
|
+
}
|
|
97
|
+
catch (err) {
|
|
98
|
+
console.error(chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`));
|
|
99
|
+
console.log(`Run ${chalk.cyan(`npx website-api ${adapter.id} --help`)} for usage details.`);
|
|
100
|
+
process.exit(1);
|
|
101
|
+
}
|
|
102
|
+
if (parsed.helpRequested) {
|
|
103
|
+
printWebsiteHelp(adapter);
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
try {
|
|
107
|
+
const data = await queryWebsite(adapter.id, parsed.options);
|
|
108
|
+
let output;
|
|
109
|
+
if (parsed.options.text && data && typeof data === "object") {
|
|
110
|
+
const ans = data.answer || data.text;
|
|
111
|
+
output = ans !== undefined ? String(ans) : JSON.stringify(data, null, 2);
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
output = typeof data === "string" ? data : JSON.stringify(data, null, 2);
|
|
115
|
+
}
|
|
116
|
+
if (parsed.options.out) {
|
|
117
|
+
writeFileSync(parsed.options.out, output + "\n", "utf8");
|
|
118
|
+
console.log(chalk.green(`Success! Decoded response written to ${parsed.options.out}`));
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
console.log(output);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
console.error(chalk.red(err instanceof Error ? err.message : "command not found"));
|
|
126
|
+
process.exit(1);
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Fallback to Commander for global commands and options
|
|
132
|
+
program
|
|
133
|
+
.name("website-api")
|
|
134
|
+
.description("CLI to query website APIs using decrypted Chrome cookies on macOS")
|
|
135
|
+
.version(packageVersion);
|
|
136
|
+
// Global options
|
|
137
|
+
program
|
|
138
|
+
.option("--profile <name>", "specific Chrome profile directory (e.g., 'Default', 'Profile 1')")
|
|
139
|
+
.option("--current-profile", "Show the currently resolved/selected Chrome profile directory and name")
|
|
140
|
+
.option("-u, --user-agent <string>", "custom User-Agent header for HTTP requests");
|
|
141
|
+
program.option("--debug", "Print full HTTP request and response bodies for debugging");
|
|
142
|
+
program.option("--headed", "Show the managed Chrome window (default headless; reuses an already-open session)");
|
|
143
|
+
// List command
|
|
144
|
+
program
|
|
145
|
+
.command("list")
|
|
146
|
+
.description("List all supported website API sites")
|
|
147
|
+
.action(async () => {
|
|
148
|
+
await loadSites();
|
|
149
|
+
console.log(chalk.bold.green("\n🌐 Supported Website APIs:\n"));
|
|
150
|
+
const table = new Table({
|
|
151
|
+
head: [
|
|
152
|
+
chalk.bold.cyan("ID"),
|
|
153
|
+
chalk.bold.cyan("Name"),
|
|
154
|
+
chalk.bold.cyan("Domain"),
|
|
155
|
+
chalk.bold.cyan("Description"),
|
|
156
|
+
],
|
|
157
|
+
colWidths: [18, 25, 20, 50],
|
|
158
|
+
wordWrap: true,
|
|
159
|
+
style: { head: [], border: [] },
|
|
160
|
+
});
|
|
161
|
+
for (const web of sites) {
|
|
162
|
+
const markers = [];
|
|
163
|
+
if (web.transport === "browser")
|
|
164
|
+
markers.push(chalk.magenta("[p]"));
|
|
165
|
+
if (web.auth)
|
|
166
|
+
markers.push(chalk.red("[l]"));
|
|
167
|
+
if (web.origin === "extension")
|
|
168
|
+
markers.push(chalk.blue("[x]"));
|
|
169
|
+
const nameCell = markers.length ? `${web.name} ${markers.join(" ")}` : web.name;
|
|
170
|
+
table.push([chalk.yellow(web.id), nameCell, chalk.underline(web.domain), web.description]);
|
|
171
|
+
}
|
|
172
|
+
console.log(table.toString());
|
|
173
|
+
console.log(`\n${chalk.magenta("[p]")} requires a running Chrome (Playwright) ${chalk.red("[l]")} requires login ${chalk.blue("[x]")} user extension`);
|
|
174
|
+
console.log(`\nTo run an API query, execute: ${chalk.bold.cyan("npx website-api <id>")}\n`);
|
|
175
|
+
});
|
|
176
|
+
// Extension registry commands: `ext search|info|install|list|remove|update|registry`
|
|
177
|
+
registerExtCommands(program);
|
|
178
|
+
// Default command: fallback error or help
|
|
179
|
+
program
|
|
180
|
+
.argument("[website]", "website ID or domain to query (e.g. 'chatgpt.com')")
|
|
181
|
+
.action(async (website) => {
|
|
182
|
+
const globalOpts = program.opts();
|
|
183
|
+
if (globalOpts.currentProfile) {
|
|
184
|
+
const profilePath = process.env.PROFILE_PATH || process.env.CHROME_PROFILE_PATH || getDefaultChromeDir();
|
|
185
|
+
const profileName = globalOpts.profile || process.env.PROFILE_NAME || "Default";
|
|
186
|
+
console.log(chalk.bold.green("\n👤 Currently Resolved Profile:\n"));
|
|
187
|
+
console.log(` ${chalk.bold("Path:")} ${profilePath}`);
|
|
188
|
+
console.log(` ${chalk.bold("Name:")} ${profileName}\n`);
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
if (!website) {
|
|
192
|
+
program.outputHelp();
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
// If website not found
|
|
196
|
+
console.error(chalk.red(`Error: website adapter "${website}" not found.`));
|
|
197
|
+
console.log(`Run ${chalk.cyan("npx website-api list")} to see all supported adapters.`);
|
|
198
|
+
process.exit(1);
|
|
199
|
+
});
|
|
200
|
+
program.parse(process.argv);
|
|
201
|
+
}
|
|
202
|
+
runCli().catch((err) => {
|
|
203
|
+
console.error(err instanceof Error ? err.message : "command not found");
|
|
204
|
+
process.exit(1);
|
|
205
|
+
});
|
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { Browser, Page } from "playwright-core";
|
|
2
2
|
export interface BrowserOptions {
|
|
3
|
-
/**
|
|
3
|
+
/**
|
|
4
|
+
* CDP endpoint of an already-running Chrome. When set (or via the
|
|
5
|
+
* `CDP_ENDPOINT` env var), we attach to it directly and skip launching. When
|
|
6
|
+
* unset, chrome-cdp-manager launches/attaches a managed browser for us.
|
|
7
|
+
*/
|
|
4
8
|
cdpEndpoint?: string;
|
|
9
|
+
/** Launch the managed browser headless. Ignored when `cdpEndpoint` is set. */
|
|
10
|
+
headless?: boolean;
|
|
5
11
|
/** Close a tab opened by this session on dispose. Defaults to true. */
|
|
6
12
|
close?: boolean;
|
|
7
13
|
debug?: boolean;
|
|
@@ -1 +1,106 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* playwright-core is an optional dependency: HTTP-only installs work without
|
|
3
|
+
* it, and it's loaded here on the first browser connection.
|
|
4
|
+
*/
|
|
5
|
+
async function loadChromium() {
|
|
6
|
+
try {
|
|
7
|
+
return (await import("playwright-core")).chromium;
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
throw new Error('This site needs a browser, which requires the optional "playwright-core" dependency. ' +
|
|
11
|
+
"Install it with: npm install playwright-core");
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* chrome-cdp-manager launches (or attaches to) a launcher-managed CDP browser
|
|
16
|
+
* so the user never has to start Chrome with `--remote-debugging-port` by hand.
|
|
17
|
+
* Loaded lazily — HTTP-only runs never pay for it, and the os-restricted package
|
|
18
|
+
* surfaces a clear error if it's missing on an unsupported platform.
|
|
19
|
+
*/
|
|
20
|
+
async function loadCdpManager() {
|
|
21
|
+
try {
|
|
22
|
+
return await import("chrome-cdp-manager");
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
throw new Error('This site needs a browser, which is managed by the "chrome-cdp-manager" dependency. ' +
|
|
26
|
+
"Install it with: npm install chrome-cdp-manager (macOS/Windows only).");
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Resolves the CDP endpoint to connect to. An explicit endpoint (option or
|
|
31
|
+
* `CDP_ENDPOINT`) wins and is used as-is, so users can still point at a Chrome
|
|
32
|
+
* they manage themselves. Otherwise chrome-cdp-manager ensures a managed
|
|
33
|
+
* browser is running (launching it if needed) and returns its endpoint.
|
|
34
|
+
*/
|
|
35
|
+
async function resolveEndpoint(options) {
|
|
36
|
+
const explicit = options.cdpEndpoint || process.env.CDP_ENDPOINT;
|
|
37
|
+
if (explicit)
|
|
38
|
+
return explicit;
|
|
39
|
+
const { launch } = await loadCdpManager();
|
|
40
|
+
const { endpoint, launched } = await launch({ headless: !!options.headless });
|
|
41
|
+
if (options.debug) {
|
|
42
|
+
const mode = options.headless ? "headless" : "headed";
|
|
43
|
+
console.log(launched
|
|
44
|
+
? `Launched managed Chrome (${mode}) at ${endpoint}`
|
|
45
|
+
: `Attached to managed Chrome at ${endpoint}`);
|
|
46
|
+
}
|
|
47
|
+
return endpoint;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Connects to an existing Chrome over CDP and reuses (or opens) a tab for the
|
|
51
|
+
* target URL. Returns a session with an explicit `dispose()` the runtime calls
|
|
52
|
+
* during teardown — sites never manage the connection themselves.
|
|
53
|
+
*/
|
|
54
|
+
export const connectChrome = async (targetUrl, options = {}) => {
|
|
55
|
+
const debug = !!options.debug;
|
|
56
|
+
const endpoint = await resolveEndpoint(options);
|
|
57
|
+
const chromium = await loadChromium();
|
|
58
|
+
const browser = await chromium.connectOverCDP(endpoint);
|
|
59
|
+
const context = browser.contexts()[0];
|
|
60
|
+
if (!context) {
|
|
61
|
+
throw new Error("No active browser context found. Is Chrome running with remote debugging enabled?");
|
|
62
|
+
}
|
|
63
|
+
let opened = false;
|
|
64
|
+
let page = context.pages().find((p) => {
|
|
65
|
+
try {
|
|
66
|
+
const targetHost = new URL(targetUrl).hostname.replace("www.", "");
|
|
67
|
+
const pHost = new URL(p.url()).hostname;
|
|
68
|
+
return pHost.endsWith(targetHost) || p.url().startsWith(targetUrl);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
return p.url().startsWith(targetUrl);
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
if (page) {
|
|
75
|
+
if (debug)
|
|
76
|
+
console.log(`Reusing existing tab for ${targetUrl}`);
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
if (debug)
|
|
80
|
+
console.log(`Opening a new tab for ${targetUrl}`);
|
|
81
|
+
page = await context.newPage();
|
|
82
|
+
await page.goto(targetUrl, { waitUntil: "domcontentloaded" });
|
|
83
|
+
opened = true;
|
|
84
|
+
}
|
|
85
|
+
return {
|
|
86
|
+
page,
|
|
87
|
+
browser,
|
|
88
|
+
opened,
|
|
89
|
+
async dispose() {
|
|
90
|
+
if (opened && options.close !== false) {
|
|
91
|
+
try {
|
|
92
|
+
await page.close();
|
|
93
|
+
}
|
|
94
|
+
catch {
|
|
95
|
+
// ignore
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
try {
|
|
99
|
+
await browser.close();
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// ignore
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
};
|
|
106
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getCookies as realGetCookies, getPasswords as realGetPasswords
|
|
1
|
+
import { type CookieEntry, getCookies as realGetCookies, getPasswords as realGetPasswords } from "chrome-tools";
|
|
2
2
|
import type { Credentials, QueryOptions } from "../types.js";
|
|
3
3
|
export declare const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
|
|
4
4
|
/**
|
|
@@ -20,5 +20,11 @@ export declare function resolveCredentials(domain: string, options: QueryOptions
|
|
|
20
20
|
/**
|
|
21
21
|
* Resolves decrypted Chrome cookies for a domain. When `required` is false a
|
|
22
22
|
* missing login yields an empty array instead of throwing.
|
|
23
|
+
*
|
|
24
|
+
* For required sites the two failure modes are reported distinctly:
|
|
25
|
+
* - `getCookies` throws → Chrome's cookie store couldn't be read at all
|
|
26
|
+
* (e.g. keychain access denied, missing/locked profile). → "No login found".
|
|
27
|
+
* - it returns no rows → the store was read fine but holds no cookies for the
|
|
28
|
+
* domain: the user simply isn't signed in there. → "No cookies found".
|
|
23
29
|
*/
|
|
24
30
|
export declare function resolveCookies(domain: string, options: QueryOptions, required: boolean, providers?: CookieProviders): CookieEntry[];
|
|
@@ -1 +1,68 @@
|
|
|
1
|
-
import{getCookies as
|
|
1
|
+
import { getCookies as realGetCookies, getPasswords as realGetPasswords, } from "chrome-tools";
|
|
2
|
+
export const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36";
|
|
3
|
+
function resolveChromeDir(options, env) {
|
|
4
|
+
return options.profilePath || env.PROFILE_PATH || env.CHROME_PROFILE_PATH || undefined;
|
|
5
|
+
}
|
|
6
|
+
function resolveProfile(options, env) {
|
|
7
|
+
return options.profile || env.PROFILE_NAME || undefined;
|
|
8
|
+
}
|
|
9
|
+
export function resolveUserAgent(options, env = process.env) {
|
|
10
|
+
return options.userAgent || env.userAgent || env.USER_AGENT || DEFAULT_USER_AGENT;
|
|
11
|
+
}
|
|
12
|
+
export function buildCookieString(cookies) {
|
|
13
|
+
return cookies.map((c) => `${c.name}=${c.value}`).join("; ");
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Resolves saved Chrome credentials for a domain. Searches the full domain
|
|
17
|
+
* first, then falls back to the registrable name (e.g. "pseg" from "pseg.com").
|
|
18
|
+
*/
|
|
19
|
+
export function resolveCredentials(domain, options, providers = {}) {
|
|
20
|
+
const env = providers.env ?? process.env;
|
|
21
|
+
const getPasswords = providers.getPasswords ?? realGetPasswords;
|
|
22
|
+
const chromeDir = resolveChromeDir(options, env);
|
|
23
|
+
const profile = resolveProfile(options, env);
|
|
24
|
+
let credentials = getPasswords({ chromeDir, profile, search: domain });
|
|
25
|
+
if (!credentials || credentials.length === 0) {
|
|
26
|
+
const parts = domain.split(".");
|
|
27
|
+
const name = parts[parts.length - 2] || domain;
|
|
28
|
+
credentials = getPasswords({ chromeDir, profile, search: name });
|
|
29
|
+
}
|
|
30
|
+
if (!credentials || credentials.length === 0) {
|
|
31
|
+
throw new Error(`No saved passwords found in Chrome for '${domain}'`);
|
|
32
|
+
}
|
|
33
|
+
const { username, password } = credentials[0];
|
|
34
|
+
if (!username || !password) {
|
|
35
|
+
throw new Error(`Found credentials for '${domain}' but username or password was empty`);
|
|
36
|
+
}
|
|
37
|
+
return { username, password };
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Resolves decrypted Chrome cookies for a domain. When `required` is false a
|
|
41
|
+
* missing login yields an empty array instead of throwing.
|
|
42
|
+
*
|
|
43
|
+
* For required sites the two failure modes are reported distinctly:
|
|
44
|
+
* - `getCookies` throws → Chrome's cookie store couldn't be read at all
|
|
45
|
+
* (e.g. keychain access denied, missing/locked profile). → "No login found".
|
|
46
|
+
* - it returns no rows → the store was read fine but holds no cookies for the
|
|
47
|
+
* domain: the user simply isn't signed in there. → "No cookies found".
|
|
48
|
+
*/
|
|
49
|
+
export function resolveCookies(domain, options, required, providers = {}) {
|
|
50
|
+
const env = providers.env ?? process.env;
|
|
51
|
+
const getCookies = providers.getCookies ?? realGetCookies;
|
|
52
|
+
const chromeDir = resolveChromeDir(options, env);
|
|
53
|
+
const profile = resolveProfile(options, env);
|
|
54
|
+
let cookies;
|
|
55
|
+
try {
|
|
56
|
+
cookies = getCookies({ chromeDir, profile, domain, decrypt: true });
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
if (!required)
|
|
60
|
+
return [];
|
|
61
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
62
|
+
throw new Error(`No login found in browser: could not read Chrome cookies for ${domain} (${detail})`);
|
|
63
|
+
}
|
|
64
|
+
if ((!cookies || cookies.length === 0) && required) {
|
|
65
|
+
throw new Error(`No cookies found in browser for ${domain}. Sign in to ${domain} in Chrome and try again.`);
|
|
66
|
+
}
|
|
67
|
+
return cookies ?? [];
|
|
68
|
+
}
|
|
@@ -1 +1,32 @@
|
|
|
1
|
-
import
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Builds a `save(filename, content)` function bound to a target directory.
|
|
5
|
+
* Creates the directory on first write and returns the absolute path written.
|
|
6
|
+
* `outDir` of null means "current working directory".
|
|
7
|
+
*/
|
|
8
|
+
export function createSaver(outDir, cwd = process.cwd()) {
|
|
9
|
+
const targetDir = path.resolve(cwd, outDir ?? ".");
|
|
10
|
+
let ensured = false;
|
|
11
|
+
return async function save(filename, content) {
|
|
12
|
+
if (!ensured) {
|
|
13
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
14
|
+
ensured = true;
|
|
15
|
+
}
|
|
16
|
+
// Defend against path traversal: only the basename is honored.
|
|
17
|
+
const filePath = path.join(targetDir, path.basename(filename));
|
|
18
|
+
await fs.writeFile(filePath, content);
|
|
19
|
+
return filePath;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Guards a downloaded payload that should be data but may be an HTML error page
|
|
24
|
+
* (a common symptom of an expired session). Throws a clear, actionable error.
|
|
25
|
+
*/
|
|
26
|
+
export function assertNotHtml(text, label) {
|
|
27
|
+
const trimmed = String(text ?? "").trimStart();
|
|
28
|
+
if (/^<!doctype html/i.test(trimmed) || /^<html/i.test(trimmed)) {
|
|
29
|
+
throw new Error(`Download for ${label} returned HTML instead of data. The session may have expired.`);
|
|
30
|
+
}
|
|
31
|
+
return text;
|
|
32
|
+
}
|