website-api 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -1
- package/dist/bin/cli.js +204 -1
- package/dist/src/capabilities/browser.d.ts +8 -2
- package/dist/src/capabilities/browser.js +106 -1
- package/dist/src/capabilities/cookies.d.ts +7 -1
- package/dist/src/capabilities/cookies.js +68 -1
- package/dist/src/capabilities/download.js +32 -1
- package/dist/src/capabilities/fingerprint.js +62 -1
- package/dist/src/capabilities/http.js +101 -1
- package/dist/src/capabilities/login/login-helper.js +185 -1
- package/dist/src/capabilities/login/login-strategy.js +36 -1
- package/dist/src/challenges/perimeterx.d.ts +62 -0
- package/dist/src/challenges/perimeterx.js +112 -0
- package/dist/src/cli/ext.js +338 -1
- package/dist/src/core/context.d.ts +2 -2
- package/dist/src/core/context.js +137 -1
- package/dist/src/core/define-site.js +74 -1
- package/dist/src/core/loader.js +142 -1
- package/dist/src/core/registry.js +332 -1
- package/dist/src/core/runtime.d.ts +12 -4
- package/dist/src/core/runtime.js +98 -1
- package/dist/src/env.js +34 -1
- package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
- package/dist/src/sites/bloomberg.com/index.js +49 -0
- package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
- package/dist/src/sites/chase.com/download-helper.js +266 -1
- package/dist/src/sites/chase.com/index.js +87 -1
- package/dist/src/sites/chase.com/openapi.yaml +76 -0
- package/dist/src/sites/chatgpt.com/index.js +24 -1
- package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
- package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
- package/dist/src/sites/claude.ai/index.js +42 -1
- package/dist/src/sites/claude.ai/openapi.yaml +54 -0
- package/dist/src/sites/cursor.com/index.js +12 -1
- package/dist/src/sites/cursor.com/openapi.yaml +39 -0
- package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
- package/dist/src/sites/e-zpassny.com/index.js +344 -0
- package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
- package/dist/src/sites/gemini.google.com/index.js +80 -1
- package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
- package/dist/src/sites/google.com/google-helpers.js +255 -1
- package/dist/src/sites/google.com/index.js +253 -1
- package/dist/src/sites/google.com/openapi.yaml +59 -0
- package/dist/src/sites/ollama.com/index.js +43 -1
- package/dist/src/sites/ollama.com/openapi.yaml +39 -0
- package/dist/src/sites/perplexity.ai/index.js +253 -1
- package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
- package/dist/src/sites/pseg.com/index.js +243 -1
- package/dist/src/sites/pseg.com/openapi.yaml +42 -0
- package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
- package/dist/src/sites/voice.google.com/index.d.ts +2 -0
- package/dist/src/sites/voice.google.com/index.js +122 -0
- package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
- package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
- package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
- package/dist/src/sites/zillow.com/index.d.ts +2 -0
- package/dist/src/sites/zillow.com/index.js +303 -0
- package/dist/src/sites/zillow.com/openapi.yaml +55 -0
- package/dist/src/types.d.ts +7 -0
- package/dist/src/types.js +1 -1
- package/dist/src/util/args-parser.js +145 -1
- package/dist/src/util/google-json.js +74 -1
- package/dist/src/website-api.d.ts +7 -7
- package/dist/src/website-api.js +13 -1
- package/package.json +37 -10
package/dist/src/cli/ext.js
CHANGED
|
@@ -1 +1,338 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { existsSync, readdirSync, statSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
import { stdin, stdout } from "node:process";
|
|
4
|
+
import { createInterface } from "node:readline/promises";
|
|
5
|
+
import { pathToFileURL } from "node:url";
|
|
6
|
+
import chalk from "chalk";
|
|
7
|
+
import Table from "cli-table3";
|
|
8
|
+
import { createContext } from "../core/context.js";
|
|
9
|
+
import { defineSite } from "../core/define-site.js";
|
|
10
|
+
import { addRegistry, installEntry, listInstalled, loadIndex, removeInstalled, removeRegistry, resolveEntry, resolveRegistries, searchRegistries, } from "../core/registry.js";
|
|
11
|
+
import { getSite, loadSites } from "../core/runtime.js";
|
|
12
|
+
import { parseArgsForWebsite } from "../util/args-parser.js";
|
|
13
|
+
/** Asks a yes/no question on the TTY; returns true on "y". */
|
|
14
|
+
async function confirm(question) {
|
|
15
|
+
if (!stdin.isTTY)
|
|
16
|
+
return false; // never auto-confirm in a non-interactive shell
|
|
17
|
+
const rl = createInterface({ input: stdin, output: stdout });
|
|
18
|
+
try {
|
|
19
|
+
const answer = (await rl.question(`${question} ${chalk.gray("[y/N]")} `)).trim().toLowerCase();
|
|
20
|
+
return answer === "y" || answer === "yes";
|
|
21
|
+
}
|
|
22
|
+
finally {
|
|
23
|
+
rl.close();
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function fail(message) {
|
|
27
|
+
console.error(chalk.red(`Error: ${message}`));
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
const errMsg = (err) => (err instanceof Error ? err.message : String(err));
|
|
31
|
+
/** Resolves a path (file or directory) to a loadable site entry file. */
|
|
32
|
+
function resolveLocalEntry(path) {
|
|
33
|
+
const abs = resolve(process.cwd(), path);
|
|
34
|
+
if (!existsSync(abs))
|
|
35
|
+
fail(`path not found: ${path}`);
|
|
36
|
+
if (statSync(abs).isFile())
|
|
37
|
+
return abs;
|
|
38
|
+
const files = readdirSync(abs);
|
|
39
|
+
for (const candidate of ["index.mjs", "index.js"]) {
|
|
40
|
+
if (files.includes(candidate))
|
|
41
|
+
return join(abs, candidate);
|
|
42
|
+
}
|
|
43
|
+
const fallback = files.find((f) => /\.(m?js)$/.test(f) && !f.includes(".test."));
|
|
44
|
+
if (fallback)
|
|
45
|
+
return join(abs, fallback);
|
|
46
|
+
fail(`no .js/.mjs entry found in ${path}`);
|
|
47
|
+
}
|
|
48
|
+
/** Prints a compact usage block for a site loaded from disk. */
|
|
49
|
+
function printLocalHelp(site, entry) {
|
|
50
|
+
console.log(chalk.bold.green(`\n${site.name} (${site.id})`) + chalk.gray(` — ${entry}\n`));
|
|
51
|
+
console.log(` ${chalk.italic(site.description)}\n`);
|
|
52
|
+
if (site.positionals.length) {
|
|
53
|
+
console.log(chalk.bold("Positionals:"));
|
|
54
|
+
for (const p of site.positionals)
|
|
55
|
+
console.log(` ${chalk.cyan(p.name.padEnd(16))} ${p.description}`);
|
|
56
|
+
console.log();
|
|
57
|
+
}
|
|
58
|
+
console.log(chalk.bold("Options:"));
|
|
59
|
+
for (const p of site.parameters) {
|
|
60
|
+
const flag = (p.short ? `-${p.short}, ` : " ") + `--${p.name}` + (p.type === "boolean" ? "" : " <value>");
|
|
61
|
+
console.log(` ${chalk.yellow(flag.padEnd(26))} ${p.description}`);
|
|
62
|
+
}
|
|
63
|
+
console.log();
|
|
64
|
+
}
|
|
65
|
+
/** Registers the `ext` command group on the given Commander program. */
|
|
66
|
+
export function registerExtCommands(program) {
|
|
67
|
+
// `test` forwards unknown flags (e.g. --limit) to the site being tested, which
|
|
68
|
+
// requires positional-options mode on its parent commands.
|
|
69
|
+
program.enablePositionalOptions();
|
|
70
|
+
const ext = program
|
|
71
|
+
.command("ext")
|
|
72
|
+
.description("Discover and install website extensions from a public registry")
|
|
73
|
+
.enablePositionalOptions();
|
|
74
|
+
// ── test (run a local file directly, no install) ──
|
|
75
|
+
ext
|
|
76
|
+
.command("test <path> [args...]")
|
|
77
|
+
.description("Load and run a site .js file (or its folder) directly from disk — no install")
|
|
78
|
+
.allowUnknownOption()
|
|
79
|
+
.passThroughOptions()
|
|
80
|
+
.action(async (path, args) => {
|
|
81
|
+
const entry = resolveLocalEntry(path);
|
|
82
|
+
let mod;
|
|
83
|
+
try {
|
|
84
|
+
mod = await import(pathToFileURL(entry).href);
|
|
85
|
+
}
|
|
86
|
+
catch (err) {
|
|
87
|
+
fail(`failed to import ${entry}: ${errMsg(err)}`);
|
|
88
|
+
}
|
|
89
|
+
const def = mod.default ?? mod.site;
|
|
90
|
+
if (!def || typeof def !== "object")
|
|
91
|
+
fail(`${entry} does not default-export a site object`);
|
|
92
|
+
let site;
|
|
93
|
+
try {
|
|
94
|
+
site = defineSite(def);
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
fail(errMsg(err));
|
|
98
|
+
}
|
|
99
|
+
site.origin = "extension";
|
|
100
|
+
let parsed;
|
|
101
|
+
try {
|
|
102
|
+
// Drop a standalone "--" separator that passthrough forwards verbatim.
|
|
103
|
+
const siteArgs = (args ?? []).filter((a) => a !== "--");
|
|
104
|
+
parsed = parseArgsForWebsite(site.positionals, site.parameters, siteArgs);
|
|
105
|
+
}
|
|
106
|
+
catch (err) {
|
|
107
|
+
fail(errMsg(err));
|
|
108
|
+
}
|
|
109
|
+
if (parsed.helpRequested) {
|
|
110
|
+
printLocalHelp(site, entry);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
// Status goes to stderr so stdout stays clean (pipeable) data.
|
|
114
|
+
console.error(chalk.gray(`▶ running "${site.id}" from ${entry} (not installed)`));
|
|
115
|
+
const { ctx, dispose } = createContext(site, parsed.options);
|
|
116
|
+
try {
|
|
117
|
+
const data = await site.run(ctx);
|
|
118
|
+
const output = typeof data === "string" ? data : JSON.stringify(data, null, 2);
|
|
119
|
+
if (parsed.options.out) {
|
|
120
|
+
writeFileSync(parsed.options.out, output + "\n", "utf8");
|
|
121
|
+
console.error(chalk.green(`✓ wrote ${parsed.options.out}`));
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
console.log(output);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (err) {
|
|
128
|
+
fail(errMsg(err));
|
|
129
|
+
}
|
|
130
|
+
finally {
|
|
131
|
+
await dispose();
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
// ── search ──
|
|
135
|
+
ext
|
|
136
|
+
.command("search [query]")
|
|
137
|
+
.description("Search configured registries for installable sites")
|
|
138
|
+
.option("--refresh", "Bypass the cached index and re-fetch")
|
|
139
|
+
.action(async (query, opts) => {
|
|
140
|
+
const results = await searchRegistries(query ?? "", { refresh: opts.refresh });
|
|
141
|
+
if (results.length === 0) {
|
|
142
|
+
console.log(chalk.yellow(`No sites${query ? ` matching "${query}"` : ""} found.`));
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
const table = new Table({
|
|
146
|
+
head: [
|
|
147
|
+
chalk.bold.cyan("ID"),
|
|
148
|
+
chalk.bold.cyan("Name"),
|
|
149
|
+
chalk.bold.cyan("Domain"),
|
|
150
|
+
chalk.bold.cyan("Registry"),
|
|
151
|
+
chalk.bold.cyan("Description"),
|
|
152
|
+
],
|
|
153
|
+
colWidths: [16, 22, 18, 14, 44],
|
|
154
|
+
wordWrap: true,
|
|
155
|
+
style: { head: [], border: [] },
|
|
156
|
+
});
|
|
157
|
+
for (const r of results) {
|
|
158
|
+
const markers = [];
|
|
159
|
+
if (r.transport === "browser")
|
|
160
|
+
markers.push(chalk.magenta("[p]"));
|
|
161
|
+
if (r.auth)
|
|
162
|
+
markers.push(chalk.red("[l]"));
|
|
163
|
+
const name = markers.length ? `${r.name} ${markers.join(" ")}` : r.name;
|
|
164
|
+
table.push([chalk.yellow(r.id), name, chalk.underline(r.domain), r.registry.name, r.description]);
|
|
165
|
+
}
|
|
166
|
+
console.log(table.toString());
|
|
167
|
+
console.log(`\nInstall one with: ${chalk.bold.cyan("npx website-api ext install <id>")}\n`);
|
|
168
|
+
});
|
|
169
|
+
// ── info ──
|
|
170
|
+
ext
|
|
171
|
+
.command("info <id>")
|
|
172
|
+
.description("Show full catalog details for a single site")
|
|
173
|
+
.option("--registry <name>", "Disambiguate when multiple registries offer the id")
|
|
174
|
+
.action(async (id, opts) => {
|
|
175
|
+
const { source, entry } = await resolveEntry(id, { registryName: opts.registry });
|
|
176
|
+
console.log(chalk.bold.green(`\n🌐 ${entry.name} ${chalk.yellow(`(${entry.id})`)}\n`));
|
|
177
|
+
console.log(` ${chalk.italic(entry.description)}\n`);
|
|
178
|
+
const rows = [
|
|
179
|
+
["Domain", entry.domain],
|
|
180
|
+
["Registry", `${source.name} (${source.repo})`],
|
|
181
|
+
["Version", entry.version ?? "—"],
|
|
182
|
+
["Transport", entry.transport ?? "http"],
|
|
183
|
+
["Requires login", entry.auth ? "yes" : "no"],
|
|
184
|
+
["Tags", entry.tags?.join(", ") || "—"],
|
|
185
|
+
["Files", entry.files.map((f) => f.name).join(", ")],
|
|
186
|
+
];
|
|
187
|
+
for (const [k, v] of rows)
|
|
188
|
+
console.log(` ${chalk.bold((k + ":").padEnd(16))} ${v}`);
|
|
189
|
+
console.log(`\nInstall with: ${chalk.bold.cyan(`npx website-api ext install ${entry.id}`)}\n`);
|
|
190
|
+
});
|
|
191
|
+
// ── install ──
|
|
192
|
+
ext
|
|
193
|
+
.command("install <id>")
|
|
194
|
+
.alias("add-site")
|
|
195
|
+
.description("Download and install a site from a registry into your extensions folder")
|
|
196
|
+
.option("--registry <name>", "Disambiguate when multiple registries offer the id")
|
|
197
|
+
.option("-y, --yes", "Skip the confirmation prompt")
|
|
198
|
+
.option("--refresh", "Bypass the cached index and re-fetch")
|
|
199
|
+
.action(async (id, opts) => {
|
|
200
|
+
const { source, index, entry } = await resolveEntry(id, {
|
|
201
|
+
registryName: opts.registry,
|
|
202
|
+
refresh: opts.refresh,
|
|
203
|
+
});
|
|
204
|
+
// Security gate: an installed site runs with the user's decrypted Chrome
|
|
205
|
+
// cookies and credentials. Surface what it is and where it came from.
|
|
206
|
+
console.log(chalk.bold(`\nAbout to install ${chalk.yellow(entry.id)} — ${entry.name}`));
|
|
207
|
+
console.log(` ${chalk.bold("Domain:")} ${entry.domain}`);
|
|
208
|
+
console.log(` ${chalk.bold("Registry:")} ${source.name} (${source.repo}@${index.commit.slice(0, 10)})`);
|
|
209
|
+
console.log(` ${chalk.bold("Files:")} ${entry.files.map((f) => f.name).join(", ")}`);
|
|
210
|
+
if (entry.auth)
|
|
211
|
+
console.log(chalk.red(` ⚠ This site performs a login and will read saved credentials for ${entry.domain}.`));
|
|
212
|
+
// Warn if this would shadow a bundled/site already loaded by the same id.
|
|
213
|
+
await loadSites();
|
|
214
|
+
const existing = getSite(entry.id);
|
|
215
|
+
if (existing?.origin === "bundled") {
|
|
216
|
+
console.log(chalk.yellow(` ⚠ This will shadow the bundled "${entry.id}" site.`));
|
|
217
|
+
}
|
|
218
|
+
console.log(chalk.gray(` ⚠ Installing runs third-party code with your browser session. Only install sites you trust.\n`));
|
|
219
|
+
if (!opts.yes && !(await confirm("Install this site?"))) {
|
|
220
|
+
console.log(chalk.yellow("Aborted."));
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
const { dir } = await installEntry(source, index, entry, { refresh: opts.refresh });
|
|
224
|
+
console.log(chalk.green(`\n✓ Installed ${entry.id} → ${dir}`));
|
|
225
|
+
console.log(`Run it with: ${chalk.bold.cyan(`npx website-api ${entry.id}`)}\n`);
|
|
226
|
+
});
|
|
227
|
+
// ── list (installed) ──
|
|
228
|
+
ext
|
|
229
|
+
.command("list")
|
|
230
|
+
.description("List sites you have installed from registries")
|
|
231
|
+
.action(() => {
|
|
232
|
+
const installed = listInstalled();
|
|
233
|
+
if (installed.length === 0) {
|
|
234
|
+
console.log(chalk.yellow("No registry sites installed. Try: ") + chalk.cyan("npx website-api ext search"));
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
const table = new Table({
|
|
238
|
+
head: [
|
|
239
|
+
chalk.bold.cyan("ID"),
|
|
240
|
+
chalk.bold.cyan("Version"),
|
|
241
|
+
chalk.bold.cyan("Registry"),
|
|
242
|
+
chalk.bold.cyan("Commit"),
|
|
243
|
+
chalk.bold.cyan("Installed"),
|
|
244
|
+
],
|
|
245
|
+
style: { head: [], border: [] },
|
|
246
|
+
});
|
|
247
|
+
for (const r of installed) {
|
|
248
|
+
table.push([
|
|
249
|
+
chalk.yellow(r.id),
|
|
250
|
+
r.version ?? "—",
|
|
251
|
+
r.repo,
|
|
252
|
+
r.commit.slice(0, 10),
|
|
253
|
+
r.installedAt.slice(0, 10),
|
|
254
|
+
]);
|
|
255
|
+
}
|
|
256
|
+
console.log(table.toString());
|
|
257
|
+
});
|
|
258
|
+
// ── remove ──
|
|
259
|
+
ext
|
|
260
|
+
.command("remove <id>")
|
|
261
|
+
.alias("uninstall")
|
|
262
|
+
.description("Remove an installed registry site")
|
|
263
|
+
.action((id) => {
|
|
264
|
+
if (removeInstalled(id))
|
|
265
|
+
console.log(chalk.green(`✓ Removed ${id}`));
|
|
266
|
+
else
|
|
267
|
+
fail(`"${id}" is not installed`);
|
|
268
|
+
});
|
|
269
|
+
// ── update ──
|
|
270
|
+
ext
|
|
271
|
+
.command("update [id]")
|
|
272
|
+
.description("Re-install installed sites whose registry commit has changed")
|
|
273
|
+
.option("-y, --yes", "Skip the confirmation prompt")
|
|
274
|
+
.action(async (id, opts) => {
|
|
275
|
+
const installed = listInstalled().filter((r) => !id || r.id === id);
|
|
276
|
+
if (installed.length === 0)
|
|
277
|
+
fail(id ? `"${id}" is not installed` : "no registry sites installed");
|
|
278
|
+
let updated = 0;
|
|
279
|
+
for (const record of installed) {
|
|
280
|
+
let resolved;
|
|
281
|
+
try {
|
|
282
|
+
resolved = await resolveEntry(record.id, { registryName: record.registry, refresh: true });
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
console.log(chalk.yellow(`• ${record.id}: no longer in registry, skipping`));
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
if (resolved.index.commit === record.commit) {
|
|
289
|
+
console.log(chalk.gray(`• ${record.id}: up to date`));
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
console.log(`• ${record.id}: ${record.commit.slice(0, 10)} → ${resolved.index.commit.slice(0, 10)}`);
|
|
293
|
+
if (!opts.yes && !(await confirm(` Update ${record.id}?`)))
|
|
294
|
+
continue;
|
|
295
|
+
await installEntry(resolved.source, resolved.index, resolved.entry);
|
|
296
|
+
console.log(chalk.green(` ✓ updated ${record.id}`));
|
|
297
|
+
updated++;
|
|
298
|
+
}
|
|
299
|
+
console.log(updated ? chalk.green(`\nUpdated ${updated} site(s).`) : chalk.gray("\nNothing to update."));
|
|
300
|
+
});
|
|
301
|
+
// ── registry management ──
|
|
302
|
+
const reg = ext.command("registry").description("Manage the registries searched for sites");
|
|
303
|
+
reg
|
|
304
|
+
.command("list")
|
|
305
|
+
.description("List configured registries (in search priority order)")
|
|
306
|
+
.action(() => {
|
|
307
|
+
const table = new Table({
|
|
308
|
+
head: [chalk.bold.cyan("Name"), chalk.bold.cyan("Repo"), chalk.bold.cyan("Branch")],
|
|
309
|
+
style: { head: [], border: [] },
|
|
310
|
+
});
|
|
311
|
+
for (const r of resolveRegistries())
|
|
312
|
+
table.push([r.name, r.repo, r.branch]);
|
|
313
|
+
console.log(table.toString());
|
|
314
|
+
});
|
|
315
|
+
reg
|
|
316
|
+
.command("add <spec>")
|
|
317
|
+
.description("Add a registry (owner/repo, owner/repo#branch, or a github.com URL)")
|
|
318
|
+
.action(async (spec) => {
|
|
319
|
+
const source = addRegistry(spec);
|
|
320
|
+
try {
|
|
321
|
+
const index = await loadIndex(source, { refresh: true });
|
|
322
|
+
console.log(chalk.green(`✓ Added registry ${source.name} (${source.repo}) — ${index.sites.length} site(s) available`));
|
|
323
|
+
}
|
|
324
|
+
catch (err) {
|
|
325
|
+
// Keep it configured but warn the catalog could not be read yet.
|
|
326
|
+
console.log(chalk.yellow(`Added ${source.repo}, but its index.json could not be fetched: ${err instanceof Error ? err.message : String(err)}`));
|
|
327
|
+
}
|
|
328
|
+
});
|
|
329
|
+
reg
|
|
330
|
+
.command("remove <repoOrName>")
|
|
331
|
+
.description("Remove a configured registry")
|
|
332
|
+
.action((repoOrName) => {
|
|
333
|
+
if (removeRegistry(repoOrName))
|
|
334
|
+
console.log(chalk.green(`✓ Removed registry ${repoOrName}`));
|
|
335
|
+
else
|
|
336
|
+
fail(`registry "${repoOrName}" not found in config`);
|
|
337
|
+
});
|
|
338
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { QueryOptions, Site, SiteContext } from "../types.js";
|
|
2
|
-
import { type CookieProviders } from "../capabilities/cookies.js";
|
|
3
1
|
import { type BrowserConnector } from "../capabilities/browser.js";
|
|
2
|
+
import { type CookieProviders } from "../capabilities/cookies.js";
|
|
3
|
+
import type { QueryOptions, Site, SiteContext } from "../types.js";
|
|
4
4
|
/**
|
|
5
5
|
* Injectable dependencies. In production all default to the real
|
|
6
6
|
* implementations; tests pass fakes to exercise a site or capability without a
|
package/dist/src/core/context.js
CHANGED
|
@@ -1 +1,137 @@
|
|
|
1
|
-
import{resolve
|
|
1
|
+
import { resolve } from "node:path";
|
|
2
|
+
import { connectChrome } from "../capabilities/browser.js";
|
|
3
|
+
import { buildCookieString, resolveCookies, resolveCredentials, resolveUserAgent, } from "../capabilities/cookies.js";
|
|
4
|
+
import { createSaver } from "../capabilities/download.js";
|
|
5
|
+
import { applyFingerprint } from "../capabilities/fingerprint.js";
|
|
6
|
+
import { createHttp } from "../capabilities/http.js";
|
|
7
|
+
/** Treats `1`/`true`/`yes`/`on` (case-insensitive) as truthy for env flags. */
|
|
8
|
+
function isTruthyEnv(value) {
|
|
9
|
+
if (!value)
|
|
10
|
+
return false;
|
|
11
|
+
return ["1", "true", "yes", "on"].includes(value.trim().toLowerCase());
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* The managed browser launches **headless by default**. When a CDP session is
|
|
15
|
+
* already open, chrome-cdp-manager attaches to it instead of launching — so this
|
|
16
|
+
* only governs a fresh launch. `--headed` (or `CDP_HEADLESS=false`) forces a
|
|
17
|
+
* visible window, e.g. to solve a captcha that needs a real press-and-hold.
|
|
18
|
+
*/
|
|
19
|
+
function resolveHeadless(headed, headlessEnv) {
|
|
20
|
+
if (headed)
|
|
21
|
+
return false;
|
|
22
|
+
if (headlessEnv !== undefined)
|
|
23
|
+
return isTruthyEnv(headlessEnv);
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Builds the lazy capability context handed to `site.run(ctx)`. Nothing
|
|
28
|
+
* expensive happens until a capability is touched: cookies are read on first
|
|
29
|
+
* `cookies()`/HTTP call, and Chrome is launched on first `browser()`.
|
|
30
|
+
*/
|
|
31
|
+
export function createContext(site, options = {}, providers = {}) {
|
|
32
|
+
const env = providers.env ?? process.env;
|
|
33
|
+
const debug = !!options.debug;
|
|
34
|
+
const required = site.cookies === "required";
|
|
35
|
+
// ── memoized session resolution ──
|
|
36
|
+
let cookieCache;
|
|
37
|
+
const cookies = () => {
|
|
38
|
+
if (cookieCache === undefined) {
|
|
39
|
+
cookieCache = resolveCookies(site.cookieDomain, options, required, providers);
|
|
40
|
+
}
|
|
41
|
+
return cookieCache;
|
|
42
|
+
};
|
|
43
|
+
const cookieString = () => buildCookieString(cookies());
|
|
44
|
+
let userAgentCache;
|
|
45
|
+
const userAgent = () => {
|
|
46
|
+
if (userAgentCache === undefined) {
|
|
47
|
+
userAgentCache = resolveUserAgent(options, env);
|
|
48
|
+
}
|
|
49
|
+
return userAgentCache;
|
|
50
|
+
};
|
|
51
|
+
// Memoized: resolving credentials reads Chrome's password store, which can
|
|
52
|
+
// trigger a keychain prompt — it must happen at most once per invocation.
|
|
53
|
+
let credentialsCache;
|
|
54
|
+
const credentials = () => {
|
|
55
|
+
if (credentialsCache === undefined) {
|
|
56
|
+
credentialsCache = resolveCredentials(site.domain, options, providers);
|
|
57
|
+
}
|
|
58
|
+
return credentialsCache;
|
|
59
|
+
};
|
|
60
|
+
const http = createHttp({ fetchImpl: providers.fetchImpl, cookieString, userAgent, debug });
|
|
61
|
+
// ── browser capability (lazy + memoized) ──
|
|
62
|
+
const connect = providers.connectBrowser ?? connectChrome;
|
|
63
|
+
let session;
|
|
64
|
+
let pagePromise;
|
|
65
|
+
// Whether to close a tab we open on teardown. Precedence:
|
|
66
|
+
// explicit options.close → --keep-open flag → the site's keepBrowserOpen
|
|
67
|
+
const shouldClose = options.close !== undefined ? !!options.close : !(options.keepOpen || site.keepBrowserOpen);
|
|
68
|
+
// Whether the managed browser should launch headless (default true). An
|
|
69
|
+
// already-open CDP session is reused regardless; this only affects a fresh launch.
|
|
70
|
+
const headless = resolveHeadless(options.headed, env.CDP_HEADLESS);
|
|
71
|
+
const browser = () => {
|
|
72
|
+
if (!pagePromise) {
|
|
73
|
+
pagePromise = (async () => {
|
|
74
|
+
session = await connect(site.landingUrl, {
|
|
75
|
+
cdpEndpoint: env.CDP_ENDPOINT,
|
|
76
|
+
headless,
|
|
77
|
+
close: shouldClose,
|
|
78
|
+
debug,
|
|
79
|
+
});
|
|
80
|
+
await applyFingerprint(session.page, site.fingerprint, userAgent());
|
|
81
|
+
if (site.auth) {
|
|
82
|
+
await site.auth.ensureLoggedIn({
|
|
83
|
+
page: session.page,
|
|
84
|
+
debug,
|
|
85
|
+
getCredentials: credentials,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return session.page;
|
|
89
|
+
})();
|
|
90
|
+
// Don't cache a rejected setup: a retry within the same run() should
|
|
91
|
+
// attempt a fresh connection, with any partial session torn down first.
|
|
92
|
+
pagePromise = pagePromise.catch(async (err) => {
|
|
93
|
+
pagePromise = undefined;
|
|
94
|
+
if (session) {
|
|
95
|
+
const partial = session;
|
|
96
|
+
session = undefined;
|
|
97
|
+
await partial.dispose().catch(() => { });
|
|
98
|
+
}
|
|
99
|
+
throw err;
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
return pagePromise;
|
|
103
|
+
};
|
|
104
|
+
const outDir = options.outDir ? resolve(providers.cwd ?? process.cwd(), options.outDir) : null;
|
|
105
|
+
const save = createSaver(options.outDir ?? null, providers.cwd);
|
|
106
|
+
const ctx = {
|
|
107
|
+
site,
|
|
108
|
+
domain: site.domain,
|
|
109
|
+
options,
|
|
110
|
+
debug,
|
|
111
|
+
outDir,
|
|
112
|
+
cookies,
|
|
113
|
+
cookieString,
|
|
114
|
+
credentials,
|
|
115
|
+
userAgent,
|
|
116
|
+
http,
|
|
117
|
+
browser,
|
|
118
|
+
async eval(fn) {
|
|
119
|
+
const page = await browser();
|
|
120
|
+
return page.evaluate(fn);
|
|
121
|
+
},
|
|
122
|
+
save,
|
|
123
|
+
};
|
|
124
|
+
return {
|
|
125
|
+
ctx,
|
|
126
|
+
async dispose() {
|
|
127
|
+
if (session) {
|
|
128
|
+
try {
|
|
129
|
+
await session.dispose();
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
// ignore teardown failures
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
}
|
|
@@ -1 +1,74 @@
|
|
|
1
|
-
import{toLoginStrategy
|
|
1
|
+
import { toLoginStrategy } from "../capabilities/login/login-strategy.js";
|
|
2
|
+
/** Detects an already-normalized site so loading is idempotent. */
|
|
3
|
+
export function isSite(value) {
|
|
4
|
+
return typeof value === "object" && value !== null && value.__site === true;
|
|
5
|
+
}
|
|
6
|
+
/** Fetches one declared endpoint and applies its transform. */
|
|
7
|
+
async function fetchEndpoint(endpoint, ctx) {
|
|
8
|
+
const accept = endpoint.responseType === "html"
|
|
9
|
+
? "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
|
10
|
+
: endpoint.responseType === "text"
|
|
11
|
+
? "text/plain,*/*;q=0.8"
|
|
12
|
+
: "application/json, text/plain, */*";
|
|
13
|
+
const init = {
|
|
14
|
+
method: endpoint.method || "GET",
|
|
15
|
+
headers: { Accept: accept, ...endpoint.headers },
|
|
16
|
+
};
|
|
17
|
+
let body;
|
|
18
|
+
if (endpoint.responseType === "html" || endpoint.responseType === "text") {
|
|
19
|
+
body = await ctx.http.text(endpoint.url, init);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
body = await ctx.http.json(endpoint.url, init);
|
|
23
|
+
}
|
|
24
|
+
return endpoint.transform ? endpoint.transform(body, ctx) : body;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Builds the default `run` for a declarative, endpoint-based site. One
|
|
28
|
+
* endpoint returns its (transformed) body; several are fetched concurrently
|
|
29
|
+
* and returned as an array in declaration order.
|
|
30
|
+
*/
|
|
31
|
+
function endpointRun(endpoints) {
|
|
32
|
+
return async (ctx) => {
|
|
33
|
+
if (endpoints.length === 1)
|
|
34
|
+
return fetchEndpoint(endpoints[0], ctx);
|
|
35
|
+
return Promise.all(endpoints.map((endpoint) => fetchEndpoint(endpoint, ctx)));
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Normalizes a minimal {@link SiteDef} into a fully-defaulted {@link Site}.
|
|
40
|
+
* Accepts a plain object (the common external-extension case), an already
|
|
41
|
+
* normalized site, or a class instance / factory result with the same fields.
|
|
42
|
+
*/
|
|
43
|
+
export function defineSite(def) {
|
|
44
|
+
if (isSite(def))
|
|
45
|
+
return def;
|
|
46
|
+
for (const field of ["id", "name", "domain", "description"]) {
|
|
47
|
+
if (!def[field] || typeof def[field] !== "string") {
|
|
48
|
+
throw new Error(`Site is missing required string field "${field}"`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const hasEndpoints = Array.isArray(def.endpoints) && def.endpoints.length > 0;
|
|
52
|
+
if (!def.run && !hasEndpoints) {
|
|
53
|
+
throw new Error(`Site "${def.id}" must define either "endpoints" or "run"`);
|
|
54
|
+
}
|
|
55
|
+
const run = def.run ? async (ctx) => def.run(ctx) : endpointRun(def.endpoints);
|
|
56
|
+
return {
|
|
57
|
+
id: def.id,
|
|
58
|
+
name: def.name,
|
|
59
|
+
domain: def.domain,
|
|
60
|
+
cookieDomain: def.cookieDomain ?? def.domain,
|
|
61
|
+
description: def.description,
|
|
62
|
+
transport: def.transport ?? "http",
|
|
63
|
+
cookies: def.cookies ?? "required",
|
|
64
|
+
fingerprint: def.fingerprint ?? "stealth",
|
|
65
|
+
keepBrowserOpen: def.keepBrowserOpen ?? false,
|
|
66
|
+
auth: def.auth ? toLoginStrategy(def.auth) : undefined,
|
|
67
|
+
parameters: def.parameters ?? [],
|
|
68
|
+
positionals: def.positionals ?? [],
|
|
69
|
+
landingUrl: def.endpoints?.[0]?.url ?? `https://${def.domain}`,
|
|
70
|
+
endpoints: def.endpoints,
|
|
71
|
+
run,
|
|
72
|
+
__site: true,
|
|
73
|
+
};
|
|
74
|
+
}
|