website-api 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +204 -1
  3. package/dist/src/capabilities/browser.d.ts +8 -2
  4. package/dist/src/capabilities/browser.js +106 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +137 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
  32. package/dist/src/sites/claude.ai/index.js +42 -1
  33. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  34. package/dist/src/sites/cursor.com/index.js +12 -1
  35. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  36. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  37. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  38. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  39. package/dist/src/sites/gemini.google.com/index.js +80 -1
  40. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  41. package/dist/src/sites/google.com/google-helpers.js +255 -1
  42. package/dist/src/sites/google.com/index.js +253 -1
  43. package/dist/src/sites/google.com/openapi.yaml +59 -0
  44. package/dist/src/sites/ollama.com/index.js +43 -1
  45. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  46. package/dist/src/sites/perplexity.ai/index.js +253 -1
  47. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  48. package/dist/src/sites/pseg.com/index.js +243 -1
  49. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  50. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  51. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  52. package/dist/src/sites/voice.google.com/index.js +122 -0
  53. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  54. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  55. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  56. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  57. package/dist/src/sites/zillow.com/index.js +303 -0
  58. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  59. package/dist/src/types.d.ts +7 -0
  60. package/dist/src/types.js +1 -1
  61. package/dist/src/util/args-parser.js +145 -1
  62. package/dist/src/util/google-json.js +74 -1
  63. package/dist/src/website-api.d.ts +7 -7
  64. package/dist/src/website-api.js +13 -1
  65. package/package.json +37 -10
@@ -1 +1,338 @@
1
- import{createInterface as o}from"node:readline/promises";import{stdin as e,stdout as n}from"node:process";import{existsSync as t,readdirSync as i,statSync as s,writeFileSync as r}from"node:fs";import{join as a,resolve as l}from"node:path";import{pathToFileURL as c}from"node:url";import d from"chalk";import m from"cli-table3";import{loadSites as g,getSite as p}from"../core/runtime.js";import{defineSite as y}from"../core/define-site.js";import{createContext as f}from"../core/context.js";import{parseArgsForWebsite as h}from"../util/args-parser.js";import{addRegistry as u,installEntry as $,listInstalled as b,loadIndex as w,removeInstalled as x,removeRegistry as j,resolveEntry as v,resolveRegistries as R,searchRegistries as N}from"../core/registry.js";async function D(t){if(!e.isTTY)return!1;const i=o({input:e,output:n});try{const o=(await i.question(`${t} ${d.gray("[y/N]")} `)).trim().toLowerCase();return"y"===o||"yes"===o}finally{i.close()}}function S(o){console.error(d.red(`Error: ${o}`)),process.exit(1)}const I=o=>o instanceof Error?o.message:String(o);export function registerExtCommands(o){o.enablePositionalOptions();const e=o.command("ext").description("Discover and install website extensions from a public registry").enablePositionalOptions();e.command("test <path> [args...]").description("Load and run a site .js file (or its folder) directly from disk — no install").allowUnknownOption().passThroughOptions().action(async(o,e)=>{const n=function(o){const e=l(process.cwd(),o);if(t(e)||S(`path not found: ${o}`),s(e).isFile())return e;const n=i(e);for(const o of["index.mjs","index.js"])if(n.includes(o))return a(e,o);const r=n.find(o=>/\.(m?js)$/.test(o)&&!o.includes(".test."));if(r)return a(e,r);S(`no .js/.mjs entry found in ${o}`)}(o);let m;try{m=await import(c(n).href)}catch(o){S(`failed to import ${n}: ${I(o)}`)}const g=m.default??m.site;let p,u;g&&"object"==typeof g||S(`${n} does not default-export a site object`);try{p=y(g)}catch(o){S(I(o))}p.origin="extension";try{const o=(e??[]).filter(o=>"--"!==o);u=h(p.positionals,p.parameters,o)}catch(o){S(I(o))}if(u.helpRequested)return void function(o,e){if(console.log(d.bold.green(`\n${o.name} (${o.id})`)+d.gray(` — ${e}\n`)),console.log(` ${d.italic(o.description)}\n`),o.positionals.length){console.log(d.bold("Positionals:"));for(const e of o.positionals)console.log(` ${d.cyan(e.name.padEnd(16))} ${e.description}`);console.log()}console.log(d.bold("Options:"));for(const e of o.parameters){const o=(e.short?`-${e.short}, `:" ")+`--${e.name}`+("boolean"===e.type?"":" <value>");console.log(` ${d.yellow(o.padEnd(26))} ${e.description}`)}console.log()}(p,n);console.error(d.gray(`▶ running "${p.id}" from ${n} (not installed)`));const{ctx:$,dispose:b}=f(p,u.options);try{const o=await p.run($),e="string"==typeof o?o:JSON.stringify(o,null,2);u.options.out?(r(u.options.out,e+"\n","utf8"),console.error(d.green(`✓ wrote ${u.options.out}`))):console.log(e)}catch(o){S(I(o))}finally{await b()}}),e.command("search [query]").description("Search configured registries for installable sites").option("--refresh","Bypass the cached index and re-fetch").action(async(o,e)=>{const n=await N(o??"",{refresh:e.refresh});if(0===n.length)return void console.log(d.yellow(`No sites${o?` matching "${o}"`:""} found.`));const t=new m({head:[d.bold.cyan("ID"),d.bold.cyan("Name"),d.bold.cyan("Domain"),d.bold.cyan("Registry"),d.bold.cyan("Description")],colWidths:[16,22,18,14,44],wordWrap:!0,style:{head:[],border:[]}});for(const o of n){const e=[];"browser"===o.transport&&e.push(d.magenta("[p]")),o.auth&&e.push(d.red("[l]"));const n=e.length?`${o.name} ${e.join(" ")}`:o.name;t.push([d.yellow(o.id),n,d.underline(o.domain),o.registry.name,o.description])}console.log(t.toString()),console.log(`\nInstall one with: ${d.bold.cyan("npx website-api ext install <id>")}\n`)}),e.command("info <id>").description("Show full catalog details for a single site").option("--registry <name>","Disambiguate when multiple registries offer the id").action(async(o,e)=>{const{source:n,entry:t}=await v(o,{registryName:e.registry});console.log(d.bold.green(`\n🌐 ${t.name} ${d.yellow(`(${t.id})`)}\n`)),console.log(` ${d.italic(t.description)}\n`);const i=[["Domain",t.domain],["Registry",`${n.name} (${n.repo})`],["Version",t.version??"—"],["Transport",t.transport??"http"],["Requires login",t.auth?"yes":"no"],["Tags",t.tags?.join(", ")||"—"],["Files",t.files.map(o=>o.name).join(", ")]];for(const[o,e]of i)console.log(` ${d.bold((o+":").padEnd(16))} ${e}`);console.log(`\nInstall with: ${d.bold.cyan(`npx website-api ext install ${t.id}`)}\n`)}),e.command("install <id>").alias("add-site").description("Download and install a site from a registry into your extensions folder").option("--registry <name>","Disambiguate when multiple registries offer the id").option("-y, --yes","Skip the confirmation prompt").option("--refresh","Bypass the cached index and re-fetch").action(async(o,e)=>{const{source:n,index:t,entry:i}=await v(o,{registryName:e.registry,refresh:e.refresh});console.log(d.bold(`\nAbout to install ${d.yellow(i.id)} — ${i.name}`)),console.log(` ${d.bold("Domain:")} ${i.domain}`),console.log(` ${d.bold("Registry:")} ${n.name} (${n.repo}@${t.commit.slice(0,10)})`),console.log(` ${d.bold("Files:")} ${i.files.map(o=>o.name).join(", ")}`),i.auth&&console.log(d.red(` ⚠ This site performs a login and will read saved credentials for ${i.domain}.`)),await g();const s=p(i.id);if("bundled"===s?.origin&&console.log(d.yellow(` ⚠ This will shadow the bundled "${i.id}" site.`)),console.log(d.gray(" ⚠ Installing runs third-party code with your browser session. Only install sites you trust.\n")),!e.yes&&!await D("Install this site?"))return void console.log(d.yellow("Aborted."));const{dir:r}=await $(n,t,i,{refresh:e.refresh});console.log(d.green(`\n✓ Installed ${i.id} → ${r}`)),console.log(`Run it with: ${d.bold.cyan(`npx website-api ${i.id}`)}\n`)}),e.command("list").description("List sites you have installed from registries").action(()=>{const o=b();if(0===o.length)return void console.log(d.yellow("No registry sites installed. Try: ")+d.cyan("npx website-api ext search"));const e=new m({head:[d.bold.cyan("ID"),d.bold.cyan("Version"),d.bold.cyan("Registry"),d.bold.cyan("Commit"),d.bold.cyan("Installed")],style:{head:[],border:[]}});for(const n of o)e.push([d.yellow(n.id),n.version??"—",n.repo,n.commit.slice(0,10),n.installedAt.slice(0,10)]);console.log(e.toString())}),e.command("remove <id>").alias("uninstall").description("Remove an installed registry site").action(o=>{x(o)?console.log(d.green(`✓ Removed ${o}`)):S(`"${o}" is not installed`)}),e.command("update [id]").description("Re-install installed sites whose registry commit has changed").option("-y, --yes","Skip the confirmation prompt").action(async(o,e)=>{const n=b().filter(e=>!o||e.id===o);0===n.length&&S(o?`"${o}" is not installed`:"no registry sites installed");let t=0;for(const o of n){let n;try{n=await v(o.id,{registryName:o.registry,refresh:!0})}catch{console.log(d.yellow(`• ${o.id}: no longer in registry, skipping`));continue}n.index.commit!==o.commit?(console.log(`• ${o.id}: ${o.commit.slice(0,10)} → ${n.index.commit.slice(0,10)}`),(e.yes||await D(` Update ${o.id}?`))&&(await $(n.source,n.index,n.entry),console.log(d.green(` ✓ updated ${o.id}`)),t++)):console.log(d.gray(`• ${o.id}: up to date`))}console.log(t?d.green(`\nUpdated ${t} site(s).`):d.gray("\nNothing to update."))});const n=e.command("registry").description("Manage the registries searched for sites");n.command("list").description("List configured registries (in search priority order)").action(()=>{const o=new m({head:[d.bold.cyan("Name"),d.bold.cyan("Repo"),d.bold.cyan("Branch")],style:{head:[],border:[]}});for(const e of R())o.push([e.name,e.repo,e.branch]);console.log(o.toString())}),n.command("add <spec>").description("Add a registry (owner/repo, owner/repo#branch, or a github.com URL)").action(async o=>{const e=u(o);try{const o=await w(e,{refresh:!0});console.log(d.green(`✓ Added registry ${e.name} (${e.repo}) — ${o.sites.length} site(s) available`))}catch(o){console.log(d.yellow(`Added ${e.repo}, but its index.json could not be fetched: ${o instanceof Error?o.message:String(o)}`))}}),n.command("remove <repoOrName>").description("Remove a configured registry").action(o=>{j(o)?console.log(d.green(`✓ Removed registry ${o}`)):S(`registry "${o}" not found in config`)})}
1
+ import { existsSync, readdirSync, statSync, writeFileSync } from "node:fs";
2
+ import { join, resolve } from "node:path";
3
+ import { stdin, stdout } from "node:process";
4
+ import { createInterface } from "node:readline/promises";
5
+ import { pathToFileURL } from "node:url";
6
+ import chalk from "chalk";
7
+ import Table from "cli-table3";
8
+ import { createContext } from "../core/context.js";
9
+ import { defineSite } from "../core/define-site.js";
10
+ import { addRegistry, installEntry, listInstalled, loadIndex, removeInstalled, removeRegistry, resolveEntry, resolveRegistries, searchRegistries, } from "../core/registry.js";
11
+ import { getSite, loadSites } from "../core/runtime.js";
12
+ import { parseArgsForWebsite } from "../util/args-parser.js";
13
+ /** Asks a yes/no question on the TTY; returns true on "y". */
14
+ async function confirm(question) {
15
+ if (!stdin.isTTY)
16
+ return false; // never auto-confirm in a non-interactive shell
17
+ const rl = createInterface({ input: stdin, output: stdout });
18
+ try {
19
+ const answer = (await rl.question(`${question} ${chalk.gray("[y/N]")} `)).trim().toLowerCase();
20
+ return answer === "y" || answer === "yes";
21
+ }
22
+ finally {
23
+ rl.close();
24
+ }
25
+ }
26
+ function fail(message) {
27
+ console.error(chalk.red(`Error: ${message}`));
28
+ process.exit(1);
29
+ }
30
+ const errMsg = (err) => (err instanceof Error ? err.message : String(err));
31
+ /** Resolves a path (file or directory) to a loadable site entry file. */
32
+ function resolveLocalEntry(path) {
33
+ const abs = resolve(process.cwd(), path);
34
+ if (!existsSync(abs))
35
+ fail(`path not found: ${path}`);
36
+ if (statSync(abs).isFile())
37
+ return abs;
38
+ const files = readdirSync(abs);
39
+ for (const candidate of ["index.mjs", "index.js"]) {
40
+ if (files.includes(candidate))
41
+ return join(abs, candidate);
42
+ }
43
+ const fallback = files.find((f) => /\.(m?js)$/.test(f) && !f.includes(".test."));
44
+ if (fallback)
45
+ return join(abs, fallback);
46
+ fail(`no .js/.mjs entry found in ${path}`);
47
+ }
48
+ /** Prints a compact usage block for a site loaded from disk. */
49
+ function printLocalHelp(site, entry) {
50
+ console.log(chalk.bold.green(`\n${site.name} (${site.id})`) + chalk.gray(` — ${entry}\n`));
51
+ console.log(` ${chalk.italic(site.description)}\n`);
52
+ if (site.positionals.length) {
53
+ console.log(chalk.bold("Positionals:"));
54
+ for (const p of site.positionals)
55
+ console.log(` ${chalk.cyan(p.name.padEnd(16))} ${p.description}`);
56
+ console.log();
57
+ }
58
+ console.log(chalk.bold("Options:"));
59
+ for (const p of site.parameters) {
60
+ const flag = (p.short ? `-${p.short}, ` : " ") + `--${p.name}` + (p.type === "boolean" ? "" : " <value>");
61
+ console.log(` ${chalk.yellow(flag.padEnd(26))} ${p.description}`);
62
+ }
63
+ console.log();
64
+ }
65
+ /** Registers the `ext` command group on the given Commander program. */
66
+ export function registerExtCommands(program) {
67
+ // `test` forwards unknown flags (e.g. --limit) to the site being tested, which
68
+ // requires positional-options mode on its parent commands.
69
+ program.enablePositionalOptions();
70
+ const ext = program
71
+ .command("ext")
72
+ .description("Discover and install website extensions from a public registry")
73
+ .enablePositionalOptions();
74
+ // ── test (run a local file directly, no install) ──
75
+ ext
76
+ .command("test <path> [args...]")
77
+ .description("Load and run a site .js file (or its folder) directly from disk — no install")
78
+ .allowUnknownOption()
79
+ .passThroughOptions()
80
+ .action(async (path, args) => {
81
+ const entry = resolveLocalEntry(path);
82
+ let mod;
83
+ try {
84
+ mod = await import(pathToFileURL(entry).href);
85
+ }
86
+ catch (err) {
87
+ fail(`failed to import ${entry}: ${errMsg(err)}`);
88
+ }
89
+ const def = mod.default ?? mod.site;
90
+ if (!def || typeof def !== "object")
91
+ fail(`${entry} does not default-export a site object`);
92
+ let site;
93
+ try {
94
+ site = defineSite(def);
95
+ }
96
+ catch (err) {
97
+ fail(errMsg(err));
98
+ }
99
+ site.origin = "extension";
100
+ let parsed;
101
+ try {
102
+ // Drop a standalone "--" separator that passthrough forwards verbatim.
103
+ const siteArgs = (args ?? []).filter((a) => a !== "--");
104
+ parsed = parseArgsForWebsite(site.positionals, site.parameters, siteArgs);
105
+ }
106
+ catch (err) {
107
+ fail(errMsg(err));
108
+ }
109
+ if (parsed.helpRequested) {
110
+ printLocalHelp(site, entry);
111
+ return;
112
+ }
113
+ // Status goes to stderr so stdout stays clean (pipeable) data.
114
+ console.error(chalk.gray(`▶ running "${site.id}" from ${entry} (not installed)`));
115
+ const { ctx, dispose } = createContext(site, parsed.options);
116
+ try {
117
+ const data = await site.run(ctx);
118
+ const output = typeof data === "string" ? data : JSON.stringify(data, null, 2);
119
+ if (parsed.options.out) {
120
+ writeFileSync(parsed.options.out, output + "\n", "utf8");
121
+ console.error(chalk.green(`✓ wrote ${parsed.options.out}`));
122
+ }
123
+ else {
124
+ console.log(output);
125
+ }
126
+ }
127
+ catch (err) {
128
+ fail(errMsg(err));
129
+ }
130
+ finally {
131
+ await dispose();
132
+ }
133
+ });
134
+ // ── search ──
135
+ ext
136
+ .command("search [query]")
137
+ .description("Search configured registries for installable sites")
138
+ .option("--refresh", "Bypass the cached index and re-fetch")
139
+ .action(async (query, opts) => {
140
+ const results = await searchRegistries(query ?? "", { refresh: opts.refresh });
141
+ if (results.length === 0) {
142
+ console.log(chalk.yellow(`No sites${query ? ` matching "${query}"` : ""} found.`));
143
+ return;
144
+ }
145
+ const table = new Table({
146
+ head: [
147
+ chalk.bold.cyan("ID"),
148
+ chalk.bold.cyan("Name"),
149
+ chalk.bold.cyan("Domain"),
150
+ chalk.bold.cyan("Registry"),
151
+ chalk.bold.cyan("Description"),
152
+ ],
153
+ colWidths: [16, 22, 18, 14, 44],
154
+ wordWrap: true,
155
+ style: { head: [], border: [] },
156
+ });
157
+ for (const r of results) {
158
+ const markers = [];
159
+ if (r.transport === "browser")
160
+ markers.push(chalk.magenta("[p]"));
161
+ if (r.auth)
162
+ markers.push(chalk.red("[l]"));
163
+ const name = markers.length ? `${r.name} ${markers.join(" ")}` : r.name;
164
+ table.push([chalk.yellow(r.id), name, chalk.underline(r.domain), r.registry.name, r.description]);
165
+ }
166
+ console.log(table.toString());
167
+ console.log(`\nInstall one with: ${chalk.bold.cyan("npx website-api ext install <id>")}\n`);
168
+ });
169
+ // ── info ──
170
+ ext
171
+ .command("info <id>")
172
+ .description("Show full catalog details for a single site")
173
+ .option("--registry <name>", "Disambiguate when multiple registries offer the id")
174
+ .action(async (id, opts) => {
175
+ const { source, entry } = await resolveEntry(id, { registryName: opts.registry });
176
+ console.log(chalk.bold.green(`\n🌐 ${entry.name} ${chalk.yellow(`(${entry.id})`)}\n`));
177
+ console.log(` ${chalk.italic(entry.description)}\n`);
178
+ const rows = [
179
+ ["Domain", entry.domain],
180
+ ["Registry", `${source.name} (${source.repo})`],
181
+ ["Version", entry.version ?? "—"],
182
+ ["Transport", entry.transport ?? "http"],
183
+ ["Requires login", entry.auth ? "yes" : "no"],
184
+ ["Tags", entry.tags?.join(", ") || "—"],
185
+ ["Files", entry.files.map((f) => f.name).join(", ")],
186
+ ];
187
+ for (const [k, v] of rows)
188
+ console.log(` ${chalk.bold((k + ":").padEnd(16))} ${v}`);
189
+ console.log(`\nInstall with: ${chalk.bold.cyan(`npx website-api ext install ${entry.id}`)}\n`);
190
+ });
191
+ // ── install ──
192
+ ext
193
+ .command("install <id>")
194
+ .alias("add-site")
195
+ .description("Download and install a site from a registry into your extensions folder")
196
+ .option("--registry <name>", "Disambiguate when multiple registries offer the id")
197
+ .option("-y, --yes", "Skip the confirmation prompt")
198
+ .option("--refresh", "Bypass the cached index and re-fetch")
199
+ .action(async (id, opts) => {
200
+ const { source, index, entry } = await resolveEntry(id, {
201
+ registryName: opts.registry,
202
+ refresh: opts.refresh,
203
+ });
204
+ // Security gate: an installed site runs with the user's decrypted Chrome
205
+ // cookies and credentials. Surface what it is and where it came from.
206
+ console.log(chalk.bold(`\nAbout to install ${chalk.yellow(entry.id)} — ${entry.name}`));
207
+ console.log(` ${chalk.bold("Domain:")} ${entry.domain}`);
208
+ console.log(` ${chalk.bold("Registry:")} ${source.name} (${source.repo}@${index.commit.slice(0, 10)})`);
209
+ console.log(` ${chalk.bold("Files:")} ${entry.files.map((f) => f.name).join(", ")}`);
210
+ if (entry.auth)
211
+ console.log(chalk.red(` ⚠ This site performs a login and will read saved credentials for ${entry.domain}.`));
212
+ // Warn if this would shadow a bundled/site already loaded by the same id.
213
+ await loadSites();
214
+ const existing = getSite(entry.id);
215
+ if (existing?.origin === "bundled") {
216
+ console.log(chalk.yellow(` ⚠ This will shadow the bundled "${entry.id}" site.`));
217
+ }
218
+ console.log(chalk.gray(` ⚠ Installing runs third-party code with your browser session. Only install sites you trust.\n`));
219
+ if (!opts.yes && !(await confirm("Install this site?"))) {
220
+ console.log(chalk.yellow("Aborted."));
221
+ return;
222
+ }
223
+ const { dir } = await installEntry(source, index, entry, { refresh: opts.refresh });
224
+ console.log(chalk.green(`\n✓ Installed ${entry.id} → ${dir}`));
225
+ console.log(`Run it with: ${chalk.bold.cyan(`npx website-api ${entry.id}`)}\n`);
226
+ });
227
+ // ── list (installed) ──
228
+ ext
229
+ .command("list")
230
+ .description("List sites you have installed from registries")
231
+ .action(() => {
232
+ const installed = listInstalled();
233
+ if (installed.length === 0) {
234
+ console.log(chalk.yellow("No registry sites installed. Try: ") + chalk.cyan("npx website-api ext search"));
235
+ return;
236
+ }
237
+ const table = new Table({
238
+ head: [
239
+ chalk.bold.cyan("ID"),
240
+ chalk.bold.cyan("Version"),
241
+ chalk.bold.cyan("Registry"),
242
+ chalk.bold.cyan("Commit"),
243
+ chalk.bold.cyan("Installed"),
244
+ ],
245
+ style: { head: [], border: [] },
246
+ });
247
+ for (const r of installed) {
248
+ table.push([
249
+ chalk.yellow(r.id),
250
+ r.version ?? "—",
251
+ r.repo,
252
+ r.commit.slice(0, 10),
253
+ r.installedAt.slice(0, 10),
254
+ ]);
255
+ }
256
+ console.log(table.toString());
257
+ });
258
+ // ── remove ──
259
+ ext
260
+ .command("remove <id>")
261
+ .alias("uninstall")
262
+ .description("Remove an installed registry site")
263
+ .action((id) => {
264
+ if (removeInstalled(id))
265
+ console.log(chalk.green(`✓ Removed ${id}`));
266
+ else
267
+ fail(`"${id}" is not installed`);
268
+ });
269
+ // ── update ──
270
+ ext
271
+ .command("update [id]")
272
+ .description("Re-install installed sites whose registry commit has changed")
273
+ .option("-y, --yes", "Skip the confirmation prompt")
274
+ .action(async (id, opts) => {
275
+ const installed = listInstalled().filter((r) => !id || r.id === id);
276
+ if (installed.length === 0)
277
+ fail(id ? `"${id}" is not installed` : "no registry sites installed");
278
+ let updated = 0;
279
+ for (const record of installed) {
280
+ let resolved;
281
+ try {
282
+ resolved = await resolveEntry(record.id, { registryName: record.registry, refresh: true });
283
+ }
284
+ catch {
285
+ console.log(chalk.yellow(`• ${record.id}: no longer in registry, skipping`));
286
+ continue;
287
+ }
288
+ if (resolved.index.commit === record.commit) {
289
+ console.log(chalk.gray(`• ${record.id}: up to date`));
290
+ continue;
291
+ }
292
+ console.log(`• ${record.id}: ${record.commit.slice(0, 10)} → ${resolved.index.commit.slice(0, 10)}`);
293
+ if (!opts.yes && !(await confirm(` Update ${record.id}?`)))
294
+ continue;
295
+ await installEntry(resolved.source, resolved.index, resolved.entry);
296
+ console.log(chalk.green(` ✓ updated ${record.id}`));
297
+ updated++;
298
+ }
299
+ console.log(updated ? chalk.green(`\nUpdated ${updated} site(s).`) : chalk.gray("\nNothing to update."));
300
+ });
301
+ // ── registry management ──
302
+ const reg = ext.command("registry").description("Manage the registries searched for sites");
303
+ reg
304
+ .command("list")
305
+ .description("List configured registries (in search priority order)")
306
+ .action(() => {
307
+ const table = new Table({
308
+ head: [chalk.bold.cyan("Name"), chalk.bold.cyan("Repo"), chalk.bold.cyan("Branch")],
309
+ style: { head: [], border: [] },
310
+ });
311
+ for (const r of resolveRegistries())
312
+ table.push([r.name, r.repo, r.branch]);
313
+ console.log(table.toString());
314
+ });
315
+ reg
316
+ .command("add <spec>")
317
+ .description("Add a registry (owner/repo, owner/repo#branch, or a github.com URL)")
318
+ .action(async (spec) => {
319
+ const source = addRegistry(spec);
320
+ try {
321
+ const index = await loadIndex(source, { refresh: true });
322
+ console.log(chalk.green(`✓ Added registry ${source.name} (${source.repo}) — ${index.sites.length} site(s) available`));
323
+ }
324
+ catch (err) {
325
+ // Keep it configured but warn the catalog could not be read yet.
326
+ console.log(chalk.yellow(`Added ${source.repo}, but its index.json could not be fetched: ${err instanceof Error ? err.message : String(err)}`));
327
+ }
328
+ });
329
+ reg
330
+ .command("remove <repoOrName>")
331
+ .description("Remove a configured registry")
332
+ .action((repoOrName) => {
333
+ if (removeRegistry(repoOrName))
334
+ console.log(chalk.green(`✓ Removed registry ${repoOrName}`));
335
+ else
336
+ fail(`registry "${repoOrName}" not found in config`);
337
+ });
338
+ }
@@ -1,6 +1,6 @@
1
- import type { QueryOptions, Site, SiteContext } from "../types.js";
2
- import { type CookieProviders } from "../capabilities/cookies.js";
3
1
  import { type BrowserConnector } from "../capabilities/browser.js";
2
+ import { type CookieProviders } from "../capabilities/cookies.js";
3
+ import type { QueryOptions, Site, SiteContext } from "../types.js";
4
4
  /**
5
5
  * Injectable dependencies. In production all default to the real
6
6
  * implementations; tests pass fakes to exercise a site or capability without a
@@ -1 +1,137 @@
1
- import{resolve as e}from"node:path";import{buildCookieString as i,resolveCookies as o,resolveCredentials as t,resolveUserAgent as r}from"../capabilities/cookies.js";import{createHttp as a}from"../capabilities/http.js";import{applyFingerprint as s}from"../capabilities/fingerprint.js";import{connectChrome as n}from"../capabilities/browser.js";import{createSaver as c}from"../capabilities/download.js";export function createContext(p,d={},l={}){const u=l.env??process.env,g=!!d.debug,m="required"===p.cookies;let w;const b=()=>(void 0===w&&(w=o(p.cookieDomain,d,m,l)),w),f=()=>i(b()),h=()=>r(d,u),k=()=>t(p.domain,d,l),v=a({fetchImpl:l.fetchImpl,cookieString:f,userAgent:h,debug:g}),D=l.connectBrowser??n;let j,y;const I=void 0!==d.close?!!d.close:!(d.keepOpen||p.keepBrowserOpen),x=()=>(y||(y=(async()=>(j=await D(p.landingUrl,{cdpEndpoint:u.CDP_ENDPOINT,close:I,debug:g}),await s(j.page,p.fingerprint,h()),p.auth&&await p.auth.ensureLoggedIn({page:j.page,debug:g,getCredentials:k}),j.page))()),y),C=d.outDir?e(l.cwd??process.cwd(),d.outDir):null,O=c(d.outDir??null,l.cwd);return{ctx:{site:p,domain:p.domain,options:d,debug:g,outDir:C,cookies:b,cookieString:f,credentials:k,userAgent:h,http:v,browser:x,eval:async e=>(await x()).evaluate(e),save:O},async dispose(){if(j)try{await j.dispose()}catch{}}}}
1
+ import { resolve } from "node:path";
2
+ import { connectChrome } from "../capabilities/browser.js";
3
+ import { buildCookieString, resolveCookies, resolveCredentials, resolveUserAgent, } from "../capabilities/cookies.js";
4
+ import { createSaver } from "../capabilities/download.js";
5
+ import { applyFingerprint } from "../capabilities/fingerprint.js";
6
+ import { createHttp } from "../capabilities/http.js";
7
+ /** Treats `1`/`true`/`yes`/`on` (case-insensitive) as truthy for env flags. */
8
+ function isTruthyEnv(value) {
9
+ if (!value)
10
+ return false;
11
+ return ["1", "true", "yes", "on"].includes(value.trim().toLowerCase());
12
+ }
13
+ /**
14
+ * The managed browser launches **headless by default**. When a CDP session is
15
+ * already open, chrome-cdp-manager attaches to it instead of launching — so this
16
+ * only governs a fresh launch. `--headed` (or `CDP_HEADLESS=false`) forces a
17
+ * visible window, e.g. to solve a captcha that needs a real press-and-hold.
18
+ */
19
+ function resolveHeadless(headed, headlessEnv) {
20
+ if (headed)
21
+ return false;
22
+ if (headlessEnv !== undefined)
23
+ return isTruthyEnv(headlessEnv);
24
+ return true;
25
+ }
26
+ /**
27
+ * Builds the lazy capability context handed to `site.run(ctx)`. Nothing
28
+ * expensive happens until a capability is touched: cookies are read on first
29
+ * `cookies()`/HTTP call, and Chrome is launched on first `browser()`.
30
+ */
31
+ export function createContext(site, options = {}, providers = {}) {
32
+ const env = providers.env ?? process.env;
33
+ const debug = !!options.debug;
34
+ const required = site.cookies === "required";
35
+ // ── memoized session resolution ──
36
+ let cookieCache;
37
+ const cookies = () => {
38
+ if (cookieCache === undefined) {
39
+ cookieCache = resolveCookies(site.cookieDomain, options, required, providers);
40
+ }
41
+ return cookieCache;
42
+ };
43
+ const cookieString = () => buildCookieString(cookies());
44
+ let userAgentCache;
45
+ const userAgent = () => {
46
+ if (userAgentCache === undefined) {
47
+ userAgentCache = resolveUserAgent(options, env);
48
+ }
49
+ return userAgentCache;
50
+ };
51
+ // Memoized: resolving credentials reads Chrome's password store, which can
52
+ // trigger a keychain prompt — it must happen at most once per invocation.
53
+ let credentialsCache;
54
+ const credentials = () => {
55
+ if (credentialsCache === undefined) {
56
+ credentialsCache = resolveCredentials(site.domain, options, providers);
57
+ }
58
+ return credentialsCache;
59
+ };
60
+ const http = createHttp({ fetchImpl: providers.fetchImpl, cookieString, userAgent, debug });
61
+ // ── browser capability (lazy + memoized) ──
62
+ const connect = providers.connectBrowser ?? connectChrome;
63
+ let session;
64
+ let pagePromise;
65
+ // Whether to close a tab we open on teardown. Precedence:
66
+ // explicit options.close → --keep-open flag → the site's keepBrowserOpen
67
+ const shouldClose = options.close !== undefined ? !!options.close : !(options.keepOpen || site.keepBrowserOpen);
68
+ // Whether the managed browser should launch headless (default true). An
69
+ // already-open CDP session is reused regardless; this only affects a fresh launch.
70
+ const headless = resolveHeadless(options.headed, env.CDP_HEADLESS);
71
+ const browser = () => {
72
+ if (!pagePromise) {
73
+ pagePromise = (async () => {
74
+ session = await connect(site.landingUrl, {
75
+ cdpEndpoint: env.CDP_ENDPOINT,
76
+ headless,
77
+ close: shouldClose,
78
+ debug,
79
+ });
80
+ await applyFingerprint(session.page, site.fingerprint, userAgent());
81
+ if (site.auth) {
82
+ await site.auth.ensureLoggedIn({
83
+ page: session.page,
84
+ debug,
85
+ getCredentials: credentials,
86
+ });
87
+ }
88
+ return session.page;
89
+ })();
90
+ // Don't cache a rejected setup: a retry within the same run() should
91
+ // attempt a fresh connection, with any partial session torn down first.
92
+ pagePromise = pagePromise.catch(async (err) => {
93
+ pagePromise = undefined;
94
+ if (session) {
95
+ const partial = session;
96
+ session = undefined;
97
+ await partial.dispose().catch(() => { });
98
+ }
99
+ throw err;
100
+ });
101
+ }
102
+ return pagePromise;
103
+ };
104
+ const outDir = options.outDir ? resolve(providers.cwd ?? process.cwd(), options.outDir) : null;
105
+ const save = createSaver(options.outDir ?? null, providers.cwd);
106
+ const ctx = {
107
+ site,
108
+ domain: site.domain,
109
+ options,
110
+ debug,
111
+ outDir,
112
+ cookies,
113
+ cookieString,
114
+ credentials,
115
+ userAgent,
116
+ http,
117
+ browser,
118
+ async eval(fn) {
119
+ const page = await browser();
120
+ return page.evaluate(fn);
121
+ },
122
+ save,
123
+ };
124
+ return {
125
+ ctx,
126
+ async dispose() {
127
+ if (session) {
128
+ try {
129
+ await session.dispose();
130
+ }
131
+ catch {
132
+ // ignore teardown failures
133
+ }
134
+ }
135
+ },
136
+ };
137
+ }
@@ -1 +1,74 @@
1
- import{toLoginStrategy as t}from"../capabilities/login/login-strategy.js";export function isSite(t){return"object"==typeof t&&null!==t&&!0===t.__site}export function defineSite(e){if(isSite(e))return e;for(const t of["id","name","domain","description"])if(!e[t]||"string"!=typeof e[t])throw new Error(`Site is missing required string field "${t}"`);const i=Array.isArray(e.endpoints)&&e.endpoints.length>0;if(!e.run&&!i)throw new Error(`Site "${e.id}" must define either "endpoints" or "run"`);const n=e.run?async t=>e.run(t):(r=e.endpoints,async t=>{const e=r[0],i="html"===e.responseType?"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8":"text"===e.responseType?"text/plain,*/*;q=0.8":"application/json, text/plain, */*",n={method:e.method||"GET",headers:{Accept:i,...e.headers}};let o;return o="html"===e.responseType||"text"===e.responseType?await t.http.text(e.url,n):await t.http.json(e.url,n),e.transform?e.transform(o,t):o});var r;return{id:e.id,name:e.name,domain:e.domain,cookieDomain:e.cookieDomain??e.domain,description:e.description,transport:e.transport??"http",cookies:e.cookies??"required",fingerprint:e.fingerprint??"stealth",keepBrowserOpen:e.keepBrowserOpen??!1,auth:e.auth?t(e.auth):void 0,parameters:e.parameters??[],positionals:e.positionals??[],landingUrl:e.endpoints?.[0]?.url??`https://${e.domain}`,run:n,__site:!0}}
1
+ import { toLoginStrategy } from "../capabilities/login/login-strategy.js";
2
+ /** Detects an already-normalized site so loading is idempotent. */
3
+ export function isSite(value) {
4
+ return typeof value === "object" && value !== null && value.__site === true;
5
+ }
6
+ /** Fetches one declared endpoint and applies its transform. */
7
+ async function fetchEndpoint(endpoint, ctx) {
8
+ const accept = endpoint.responseType === "html"
9
+ ? "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
10
+ : endpoint.responseType === "text"
11
+ ? "text/plain,*/*;q=0.8"
12
+ : "application/json, text/plain, */*";
13
+ const init = {
14
+ method: endpoint.method || "GET",
15
+ headers: { Accept: accept, ...endpoint.headers },
16
+ };
17
+ let body;
18
+ if (endpoint.responseType === "html" || endpoint.responseType === "text") {
19
+ body = await ctx.http.text(endpoint.url, init);
20
+ }
21
+ else {
22
+ body = await ctx.http.json(endpoint.url, init);
23
+ }
24
+ return endpoint.transform ? endpoint.transform(body, ctx) : body;
25
+ }
26
+ /**
27
+ * Builds the default `run` for a declarative, endpoint-based site. One
28
+ * endpoint returns its (transformed) body; several are fetched concurrently
29
+ * and returned as an array in declaration order.
30
+ */
31
+ function endpointRun(endpoints) {
32
+ return async (ctx) => {
33
+ if (endpoints.length === 1)
34
+ return fetchEndpoint(endpoints[0], ctx);
35
+ return Promise.all(endpoints.map((endpoint) => fetchEndpoint(endpoint, ctx)));
36
+ };
37
+ }
38
+ /**
39
+ * Normalizes a minimal {@link SiteDef} into a fully-defaulted {@link Site}.
40
+ * Accepts a plain object (the common external-extension case), an already
41
+ * normalized site, or a class instance / factory result with the same fields.
42
+ */
43
+ export function defineSite(def) {
44
+ if (isSite(def))
45
+ return def;
46
+ for (const field of ["id", "name", "domain", "description"]) {
47
+ if (!def[field] || typeof def[field] !== "string") {
48
+ throw new Error(`Site is missing required string field "${field}"`);
49
+ }
50
+ }
51
+ const hasEndpoints = Array.isArray(def.endpoints) && def.endpoints.length > 0;
52
+ if (!def.run && !hasEndpoints) {
53
+ throw new Error(`Site "${def.id}" must define either "endpoints" or "run"`);
54
+ }
55
+ const run = def.run ? async (ctx) => def.run(ctx) : endpointRun(def.endpoints);
56
+ return {
57
+ id: def.id,
58
+ name: def.name,
59
+ domain: def.domain,
60
+ cookieDomain: def.cookieDomain ?? def.domain,
61
+ description: def.description,
62
+ transport: def.transport ?? "http",
63
+ cookies: def.cookies ?? "required",
64
+ fingerprint: def.fingerprint ?? "stealth",
65
+ keepBrowserOpen: def.keepBrowserOpen ?? false,
66
+ auth: def.auth ? toLoginStrategy(def.auth) : undefined,
67
+ parameters: def.parameters ?? [],
68
+ positionals: def.positionals ?? [],
69
+ landingUrl: def.endpoints?.[0]?.url ?? `https://${def.domain}`,
70
+ endpoints: def.endpoints,
71
+ run,
72
+ __site: true,
73
+ };
74
+ }