website-api 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +204 -1
  3. package/dist/src/capabilities/browser.d.ts +8 -2
  4. package/dist/src/capabilities/browser.js +106 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +137 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
  32. package/dist/src/sites/claude.ai/index.js +42 -1
  33. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  34. package/dist/src/sites/cursor.com/index.js +12 -1
  35. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  36. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  37. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  38. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  39. package/dist/src/sites/gemini.google.com/index.js +80 -1
  40. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  41. package/dist/src/sites/google.com/google-helpers.js +255 -1
  42. package/dist/src/sites/google.com/index.js +253 -1
  43. package/dist/src/sites/google.com/openapi.yaml +59 -0
  44. package/dist/src/sites/ollama.com/index.js +43 -1
  45. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  46. package/dist/src/sites/perplexity.ai/index.js +253 -1
  47. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  48. package/dist/src/sites/pseg.com/index.js +243 -1
  49. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  50. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  51. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  52. package/dist/src/sites/voice.google.com/index.js +122 -0
  53. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  54. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  55. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  56. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  57. package/dist/src/sites/zillow.com/index.js +303 -0
  58. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  59. package/dist/src/types.d.ts +7 -0
  60. package/dist/src/types.js +1 -1
  61. package/dist/src/util/args-parser.js +145 -1
  62. package/dist/src/util/google-json.js +74 -1
  63. package/dist/src/website-api.d.ts +7 -7
  64. package/dist/src/website-api.js +13 -1
  65. package/package.json +37 -10
@@ -1 +1,142 @@
1
- import{existsSync as t,readdirSync as o,statSync as n}from"node:fs";import{homedir as r}from"node:os";import{dirname as e,join as s}from"node:path";import{fileURLToPath as i,pathToFileURL as c}from"node:url";import{defineSite as f,isSite as u}from"./define-site.js";const d=e(i(import.meta.url));export const BUNDLED_SITES_DIR=s(d,"..","sites");export function extensionRoots(t=process.env){const o=[],n=t.XDG_CONFIG_HOME||s(r(),".config");if(o.push(s(n,"website-api","extensions")),t.WEBSITE_API_EXTENSIONS)for(const n of t.WEBSITE_API_EXTENSIONS.split(":"))n.trim()&&o.push(n.trim());return o}function p(t){return!(!t.endsWith(".js")&&!t.endsWith(".mjs"))&&(!t.includes(".test.")&&!t.includes(".d.")&&!/(^|[-.])helper(s)?\.(m?js)$/i.test(t))}function a(t){const n=o(t);for(const o of["index.mjs","index.js"])if(n.includes(o))return s(t,o);const r=n.find(p);return r?s(t,r):null}function l(t){const r=[];let e;try{e=o(t)}catch{return r}for(const o of e){const e=s(t,o);let i;try{i=n(e)}catch{continue}if(i.isDirectory()){const t=a(e);t&&r.push(t)}else p(o)&&r.push(e)}return r}async function m(t,o){const n=await import(c(t).href);let r=n.default??n.site??n.sites;if(null==r)return[];if("function"==typeof r&&!u(r))try{r=new r}catch{try{r=r()}catch{return[]}}const e=Array.isArray(r)?r:[r],s=[];for(const t of e){if(!t||"object"!=typeof t)continue;const n=f(t);n.origin=o,s.push(n)}return s}export async function discoverSites(o={}){const n=o.env??process.env,r=[{dir:o.bundledDir??BUNDLED_SITES_DIR,origin:"bundled"},...(o.roots??extensionRoots(n)).map(t=>({dir:t,origin:"extension"}))],e=new Map;for(const{dir:o,origin:s}of r)if(t(o))for(const t of l(o))try{for(const o of await m(t,s))e.set(o.id,o)}catch(o){n.WEBSITE_API_DEBUG&&console.error(`[loader] failed to load ${t}:`,o)}return Array.from(e.values())}
1
+ import { existsSync, readdirSync, statSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath, pathToFileURL } from "node:url";
5
+ import { defineSite, isSite } from "./define-site.js";
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+ const errMessage = (err) => (err instanceof Error ? err.message : String(err));
8
+ /** Bundled sites ship inside the package (dist/src/sites). */
9
+ export const BUNDLED_SITES_DIR = join(__dirname, "..", "sites");
10
+ /**
11
+ * Resolves the extension roots searched in addition to bundled sites:
12
+ * 1. <config>/website-api/extensions (XDG_CONFIG_HOME or ~/.config)
13
+ * 2. each dir in $WEBSITE_API_EXTENSIONS (colon-separated)
14
+ *
15
+ * These hold the user's own sites, kept entirely outside the source tree.
16
+ */
17
+ export function extensionRoots(env = process.env) {
18
+ const roots = [];
19
+ const configHome = env.XDG_CONFIG_HOME || join(homedir(), ".config");
20
+ roots.push(join(configHome, "website-api", "extensions"));
21
+ if (env.WEBSITE_API_EXTENSIONS) {
22
+ for (const dir of env.WEBSITE_API_EXTENSIONS.split(":")) {
23
+ if (dir.trim())
24
+ roots.push(dir.trim());
25
+ }
26
+ }
27
+ return roots;
28
+ }
29
+ function isLoadableSiteFile(name) {
30
+ if (!name.endsWith(".js") && !name.endsWith(".mjs"))
31
+ return false;
32
+ if (name.includes(".test.") || name.includes(".d."))
33
+ return false;
34
+ if (/(^|[-.])helper(s)?\.(m?js)$/i.test(name))
35
+ return false;
36
+ return true;
37
+ }
38
+ /** Picks the entry file inside a site directory (prefers `index`). */
39
+ function entryFileIn(dir) {
40
+ const files = readdirSync(dir);
41
+ for (const candidate of ["index.mjs", "index.js"]) {
42
+ if (files.includes(candidate))
43
+ return join(dir, candidate);
44
+ }
45
+ const fallback = files.find(isLoadableSiteFile);
46
+ return fallback ? join(dir, fallback) : null;
47
+ }
48
+ /** Collects candidate entry files in a root: subdir entries + top-level files. */
49
+ function collectSiteFiles(root) {
50
+ const files = [];
51
+ let entries;
52
+ try {
53
+ entries = readdirSync(root);
54
+ }
55
+ catch {
56
+ return files;
57
+ }
58
+ for (const name of entries) {
59
+ const full = join(root, name);
60
+ let stat;
61
+ try {
62
+ stat = statSync(full);
63
+ }
64
+ catch {
65
+ continue;
66
+ }
67
+ if (stat.isDirectory()) {
68
+ const entry = entryFileIn(full);
69
+ if (entry)
70
+ files.push(entry);
71
+ }
72
+ else if (isLoadableSiteFile(name)) {
73
+ files.push(full);
74
+ }
75
+ }
76
+ return files;
77
+ }
78
+ /** Imports one file and normalizes any site(s) it default-exports. */
79
+ async function loadSiteFile(filePath, origin) {
80
+ const mod = await import(pathToFileURL(filePath).href);
81
+ let exported = mod.default ?? mod.site ?? mod.sites;
82
+ if (exported == null)
83
+ return [];
84
+ // A class or factory function → instantiate/call.
85
+ if (typeof exported === "function" && !isSite(exported)) {
86
+ try {
87
+ exported = new exported();
88
+ }
89
+ catch (constructErr) {
90
+ try {
91
+ exported = exported();
92
+ }
93
+ catch (callErr) {
94
+ console.error(`[website-api] skipping ${filePath}: export is a function but failed as both ` +
95
+ `a constructor (${errMessage(constructErr)}) and a factory (${errMessage(callErr)})`);
96
+ return [];
97
+ }
98
+ }
99
+ }
100
+ const list = Array.isArray(exported) ? exported : [exported];
101
+ const sites = [];
102
+ for (const item of list) {
103
+ if (!item || typeof item !== "object")
104
+ continue;
105
+ const site = defineSite(item);
106
+ site.origin = origin;
107
+ sites.push(site);
108
+ }
109
+ return sites;
110
+ }
111
+ /**
112
+ * Discovers and loads all sites from the bundled dir and every extension root.
113
+ * Later roots override earlier ones by `id`, so a user extension can shadow a
114
+ * bundled site. Failures in one file never abort the whole load.
115
+ */
116
+ export async function discoverSites(options = {}) {
117
+ const env = options.env ?? process.env;
118
+ const searchRoots = [
119
+ { dir: options.bundledDir ?? BUNDLED_SITES_DIR, origin: "bundled" },
120
+ ...(options.roots ?? extensionRoots(env)).map((dir) => ({ dir, origin: "extension" })),
121
+ ];
122
+ const byId = new Map();
123
+ for (const { dir, origin } of searchRoots) {
124
+ if (!existsSync(dir))
125
+ continue;
126
+ for (const file of collectSiteFiles(dir)) {
127
+ try {
128
+ for (const site of await loadSiteFile(file, origin)) {
129
+ byId.set(site.id, site);
130
+ }
131
+ }
132
+ catch (err) {
133
+ // A broken site must never abort the others, but it must not vanish
134
+ // silently either — extension authors need the one-liner to diagnose.
135
+ console.error(`[website-api] failed to load site ${file}: ${errMessage(err)}`);
136
+ if (env.WEBSITE_API_DEBUG)
137
+ console.error(err);
138
+ }
139
+ }
140
+ }
141
+ return Array.from(byId.values());
142
+ }
@@ -1 +1,332 @@
1
- import{createHash as e}from"node:crypto";import{existsSync as t,mkdirSync as r,readdirSync as n,readFileSync as o,rmSync as i,statSync as s,writeFileSync as c}from"node:fs";import{homedir as a}from"node:os";import{dirname as f,join as p}from"node:path";import{pathToFileURL as u}from"node:url";import{defineSite as l}from"./define-site.js";import{extensionRoots as d}from"./loader.js";export const DEFAULT_REGISTRY={name:"guocity",repo:"guocity/website-api-list",branch:"main"};function h(e){return e.XDG_CONFIG_HOME||p(a(),".config")}export function configPath(e=process.env){return p(h(e),"website-api","config.json")}function m(e){return d(e)[0]}export function parseRepoSpec(e){let t=e.trim();t=t.replace(/^https?:\/\/github\.com\//i,"").replace(/^github:/i,""),t=t.replace(/\.git$/i,"").replace(/\/$/,"");let r="main";const n=t.indexOf("#");n>=0&&(r=t.slice(n+1)||"main",t=t.slice(0,n));const o=t.split("/").filter(Boolean);if(o.length<2)throw new Error(`Invalid registry spec "${e}" (expected owner/repo)`);const i=`${o[0]}/${o[1]}`;return{name:o[0],repo:i,branch:r}}function g(e){const r=configPath(e);if(!t(r))return{};try{return JSON.parse(o(r,"utf8"))}catch{return{}}}function w(e,t){const n=configPath(t);r(f(n),{recursive:!0}),c(n,JSON.stringify(e,null,2)+"\n","utf8")}export function resolveRegistries(e=process.env){const t=[],r=new Set,n=e=>{r.has(e.repo)||(r.add(e.repo),t.push(e))};if(e.WEBSITE_API_REGISTRY)for(const t of e.WEBSITE_API_REGISTRY.split(","))t.trim()&&n(parseRepoSpec(t));for(const t of g(e).registries??[])n(t);return n(DEFAULT_REGISTRY),t}export function addRegistry(e,t=process.env){const r=parseRepoSpec(e),n=g(t);return n.registries=(n.registries??[]).filter(e=>e.repo!==r.repo),n.registries.push(r),w(n,t),r}export function removeRegistry(e,t=process.env){const r=g(t),n=r.registries?.length??0;return r.registries=(r.registries??[]).filter(t=>t.repo!==e&&t.name!==e),w(r,t),(r.registries?.length??0)<n}function y(e,t,r){return`https://raw.githubusercontent.com/${e}/${t}/${r}`}function v(){return e=>fetch(e)}async function $(e,t){const r=await t(e);if(!r.ok)throw new Error(`GET ${e} → HTTP ${r.status}`);return Buffer.from(await r.arrayBuffer())}function E(t){return e("sha256").update(t).digest("hex")}export async function loadIndex(e,n={}){const i=n.env??process.env,s=n.fetchImpl??v(),a=n.ttlMs??36e5,u=function(e,t){const r=e.replace(/[^a-z0-9._-]+/gi,"_");return p(h(t),"website-api","cache",`${r}.json`)}(e.repo,i);if(!n.refresh&&t(u))try{const{fetchedAt:e,index:t}=JSON.parse(o(u,"utf8"));if("number"==typeof e&&Date.now()-e<a)return t}catch{}const l=await async function(e,t){const r=await t(e);if(!r.ok)throw new Error(`GET ${e} → HTTP ${r.status}`);return r.text()}(y(e.repo,e.branch,"index.json"),s),d=JSON.parse(l);if(!d||!Array.isArray(d.sites))throw new Error(`Registry ${e.repo} has no valid index.json`);try{r(f(u),{recursive:!0}),c(u,JSON.stringify({fetchedAt:Date.now(),index:d}),"utf8")}catch{}return d}export async function searchRegistries(e,t={}){const r=t.env??process.env,n=e.trim().toLowerCase(),o=[];for(const e of resolveRegistries(r)){let r;try{r=await loadIndex(e,t)}catch{continue}for(const t of r.sites){const r=[t.id,t.name,t.domain,t.description,...t.tags??[]].join(" ").toLowerCase();n&&!r.includes(n)||o.push({...t,registry:e})}}return o}export async function resolveEntry(e,t={}){const r=t.env??process.env,n=[];for(const o of resolveRegistries(r)){if(t.registryName&&o.name!==t.registryName&&o.repo!==t.registryName)continue;let r;try{r=await loadIndex(o,t)}catch{continue}const i=r.sites.find(t=>t.id===e);i&&n.push({source:o,index:r,entry:i})}if(0===n.length)throw new Error(`Site "${e}" not found in any registry`);if(n.length>1){const t=n.map(e=>e.source.name).join(", ");throw new Error(`Site "${e}" is offered by multiple registries (${t}); pass --registry <name>`)}return n[0]}export async function installEntry(e,t,n,o={}){const s=o.env??process.env,a=o.fetchImpl??v();if(!n.files?.length)throw new Error(`Catalog entry "${n.id}" lists no files`);const d=p(m(s),n.id),h=[];for(const r of n.files){const o=y(e.repo,t.commit,`${n.path}/${r.name}`),i=await $(o,a),s=E(i);if(s!==r.sha256)throw new Error(`Integrity check failed for ${n.id}/${r.name} (expected ${r.sha256.slice(0,12)}…, got ${s.slice(0,12)}…)`);h.push({name:r.name,buf:i})}i(d,{recursive:!0,force:!0}),r(d,{recursive:!0});for(const{name:e,buf:t}of h){const n=p(d,e);r(f(n),{recursive:!0}),c(n,t)}await async function(e,t){let r;try{r=await import(u(e).href)}catch(e){throw new Error(`Downloaded site failed to import: ${e instanceof Error?e.message:String(e)}`)}const n=r.default??r.site;if(!n||"object"!=typeof n)throw new Error(`Downloaded ${e} does not default-export a site object`);const o=l(n);if(o.id!==t)throw new Error(`Catalog id "${t}" does not match the site's own id "${o.id}"`)}(p(d,n.files[0].name),n.id);const g={id:n.id,registry:e.name,repo:e.repo,commit:t.commit,version:n.version,installedAt:(new Date).toISOString()};return c(p(d,".source.json"),JSON.stringify(g,null,2)+"\n","utf8"),{dir:d,record:g}}export function listInstalled(e=process.env){const r=m(e);if(!t(r))return[];const i=[];for(const e of n(r)){const t=p(r,e,".source.json");try{s(t).isFile()&&i.push(JSON.parse(o(t,"utf8")))}catch{}}return i}export function removeInstalled(e,r=process.env){const n=p(m(r),e);return!!t(n)&&(i(n,{recursive:!0,force:!0}),!0)}
1
+ import { createHash } from "node:crypto";
2
+ import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
3
+ import { homedir } from "node:os";
4
+ import { dirname, join } from "node:path";
5
+ import { pathToFileURL } from "node:url";
6
+ import { defineSite } from "./define-site.js";
7
+ import { extensionRoots } from "./loader.js";
8
+ /** Built-in default registry, used when the user has configured none. */
9
+ export const DEFAULT_REGISTRY = {
10
+ name: "guocity",
11
+ repo: "guocity/website-api-list",
12
+ branch: "main",
13
+ };
14
+ const INDEX_TTL_MS = 60 * 60 * 1000; // 1h cache for the catalog
15
+ // ───────────────────────────── paths ─────────────────────────────
16
+ function configHome(env) {
17
+ return env.XDG_CONFIG_HOME || join(homedir(), ".config");
18
+ }
19
+ /** `~/.config/website-api/config.json` */
20
+ export function configPath(env = process.env) {
21
+ return join(configHome(env), "website-api", "config.json");
22
+ }
23
+ function cachePath(repo, env) {
24
+ const safe = repo.replace(/[^a-z0-9._-]+/gi, "_");
25
+ return join(configHome(env), "website-api", "cache", `${safe}.json`);
26
+ }
27
+ /** Install destination: the first (XDG) extensions root. */
28
+ function installRoot(env) {
29
+ return extensionRoots(env)[0];
30
+ }
31
+ // ───────────────────────────── config / sources ─────────────────────────────
32
+ /** Parses "owner/repo", "owner/repo#branch", or a github.com URL. */
33
+ export function parseRepoSpec(spec) {
34
+ let s = spec.trim();
35
+ s = s.replace(/^https?:\/\/github\.com\//i, "").replace(/^github:/i, "");
36
+ s = s.replace(/\.git$/i, "").replace(/\/$/, "");
37
+ let branch = "main";
38
+ const hash = s.indexOf("#");
39
+ if (hash >= 0) {
40
+ branch = s.slice(hash + 1) || "main";
41
+ s = s.slice(0, hash);
42
+ }
43
+ const parts = s.split("/").filter(Boolean);
44
+ if (parts.length < 2) {
45
+ throw new Error(`Invalid registry spec "${spec}" (expected owner/repo)`);
46
+ }
47
+ const repo = `${parts[0]}/${parts[1]}`;
48
+ return { name: parts[0], repo, branch };
49
+ }
50
+ function readConfig(env) {
51
+ const path = configPath(env);
52
+ if (!existsSync(path))
53
+ return {};
54
+ try {
55
+ return JSON.parse(readFileSync(path, "utf8"));
56
+ }
57
+ catch {
58
+ return {};
59
+ }
60
+ }
61
+ function writeConfig(cfg, env) {
62
+ const path = configPath(env);
63
+ mkdirSync(dirname(path), { recursive: true });
64
+ writeFileSync(path, JSON.stringify(cfg, null, 2) + "\n", "utf8");
65
+ }
66
+ /**
67
+ * The registries to search, in priority order:
68
+ * 1. $WEBSITE_API_REGISTRY (one-off override; may be a comma list)
69
+ * 2. configured registries in config.json
70
+ * 3. the built-in DEFAULT_REGISTRY (always present as a fallback)
71
+ * Later duplicates by `repo` are dropped.
72
+ */
73
+ export function resolveRegistries(env = process.env) {
74
+ const out = [];
75
+ const seen = new Set();
76
+ const push = (s) => {
77
+ if (!seen.has(s.repo)) {
78
+ seen.add(s.repo);
79
+ out.push(s);
80
+ }
81
+ };
82
+ if (env.WEBSITE_API_REGISTRY) {
83
+ for (const spec of env.WEBSITE_API_REGISTRY.split(",")) {
84
+ if (spec.trim())
85
+ push(parseRepoSpec(spec));
86
+ }
87
+ }
88
+ for (const s of readConfig(env).registries ?? [])
89
+ push(s);
90
+ push(DEFAULT_REGISTRY);
91
+ return out;
92
+ }
93
+ /** Adds a registry to config.json (idempotent by repo). Returns the source. */
94
+ export function addRegistry(spec, env = process.env) {
95
+ const source = parseRepoSpec(spec);
96
+ const cfg = readConfig(env);
97
+ cfg.registries = (cfg.registries ?? []).filter((r) => r.repo !== source.repo);
98
+ cfg.registries.push(source);
99
+ writeConfig(cfg, env);
100
+ return source;
101
+ }
102
+ /** Removes a registry from config.json by repo or name. Returns true if removed. */
103
+ export function removeRegistry(repoOrName, env = process.env) {
104
+ const cfg = readConfig(env);
105
+ const before = cfg.registries?.length ?? 0;
106
+ cfg.registries = (cfg.registries ?? []).filter((r) => r.repo !== repoOrName && r.name !== repoOrName);
107
+ writeConfig(cfg, env);
108
+ return (cfg.registries?.length ?? 0) < before;
109
+ }
110
+ // ───────────────────────────── fetching ─────────────────────────────
111
+ function rawUrl(repo, ref, path) {
112
+ return `https://raw.githubusercontent.com/${repo}/${ref}/${path}`;
113
+ }
114
+ function defaultFetch() {
115
+ return (url) => fetch(url);
116
+ }
117
+ async function getText(url, fetchImpl) {
118
+ const res = await fetchImpl(url);
119
+ if (!res.ok)
120
+ throw new Error(`GET ${url} → HTTP ${res.status}`);
121
+ return res.text();
122
+ }
123
+ async function getBuffer(url, fetchImpl) {
124
+ const res = await fetchImpl(url);
125
+ if (!res.ok)
126
+ throw new Error(`GET ${url} → HTTP ${res.status}`);
127
+ return Buffer.from(await res.arrayBuffer());
128
+ }
129
+ function sha256(buf) {
130
+ return createHash("sha256").update(buf).digest("hex");
131
+ }
132
+ /**
133
+ * Validates a fetched index.json before anything downstream trusts its shape.
134
+ * Registries live on the open internet — a malformed catalog must fail here
135
+ * with a pointed message, not at install time with a TypeError.
136
+ */
137
+ function validateIndex(data, repo) {
138
+ const bad = (detail) => {
139
+ throw new Error(`Registry ${repo} has an invalid index.json: ${detail}`);
140
+ };
141
+ if (!data || typeof data !== "object")
142
+ bad("not an object");
143
+ const index = data;
144
+ if (typeof index.commit !== "string" || !index.commit)
145
+ bad(`missing "commit"`);
146
+ if (!Array.isArray(index.sites))
147
+ bad(`"sites" is not an array`);
148
+ index.sites.forEach((entry, i) => {
149
+ for (const field of ["id", "name", "domain", "description", "path"]) {
150
+ if (typeof entry?.[field] !== "string" || !entry[field])
151
+ bad(`sites[${i}].${field} is missing`);
152
+ }
153
+ if (!Array.isArray(entry.files) || entry.files.length === 0)
154
+ bad(`sites[${i}].files is empty`);
155
+ entry.files.forEach((file, j) => {
156
+ if (typeof file?.name !== "string" || !file.name)
157
+ bad(`sites[${i}].files[${j}].name is missing`);
158
+ if (typeof file?.sha256 !== "string" || !/^[0-9a-f]{64}$/i.test(file.sha256)) {
159
+ bad(`sites[${i}].files[${j}].sha256 is not a sha256 hex digest`);
160
+ }
161
+ });
162
+ });
163
+ return index;
164
+ }
165
+ /** Loads (and disk-caches with a TTL) one registry's index.json. */
166
+ export async function loadIndex(source, opts = {}) {
167
+ const env = opts.env ?? process.env;
168
+ const fetchImpl = opts.fetchImpl ?? defaultFetch();
169
+ const ttl = opts.ttlMs ?? INDEX_TTL_MS;
170
+ const cache = cachePath(source.repo, env);
171
+ if (!opts.refresh && existsSync(cache)) {
172
+ try {
173
+ const { fetchedAt, index } = JSON.parse(readFileSync(cache, "utf8"));
174
+ if (typeof fetchedAt === "number" && Date.now() - fetchedAt < ttl) {
175
+ return index;
176
+ }
177
+ }
178
+ catch {
179
+ // fall through to a fresh fetch
180
+ }
181
+ }
182
+ const text = await getText(rawUrl(source.repo, source.branch, "index.json"), fetchImpl);
183
+ const index = validateIndex(JSON.parse(text), source.repo);
184
+ try {
185
+ mkdirSync(dirname(cache), { recursive: true });
186
+ writeFileSync(cache, JSON.stringify({ fetchedAt: Date.now(), index }), "utf8");
187
+ }
188
+ catch {
189
+ // a non-writable cache must not break the install
190
+ }
191
+ return index;
192
+ }
193
+ /** Searches every configured registry; optional free-text query over id/name/domain/tags. */
194
+ export async function searchRegistries(query, opts = {}) {
195
+ const env = opts.env ?? process.env;
196
+ const q = query.trim().toLowerCase();
197
+ const out = [];
198
+ for (const source of resolveRegistries(env)) {
199
+ let index;
200
+ try {
201
+ index = await loadIndex(source, opts);
202
+ }
203
+ catch {
204
+ continue; // an unreachable registry never aborts the search
205
+ }
206
+ for (const entry of index.sites) {
207
+ const haystack = [entry.id, entry.name, entry.domain, entry.description, ...(entry.tags ?? [])]
208
+ .join(" ")
209
+ .toLowerCase();
210
+ if (!q || haystack.includes(q))
211
+ out.push({ ...entry, registry: source });
212
+ }
213
+ }
214
+ return out;
215
+ }
216
+ /**
217
+ * Resolves a single id to its catalog entry. If `registryName` is omitted and
218
+ * more than one registry offers the id, throws and asks the caller to choose.
219
+ */
220
+ export async function resolveEntry(id, opts = {}) {
221
+ const env = opts.env ?? process.env;
222
+ const matches = [];
223
+ for (const source of resolveRegistries(env)) {
224
+ if (opts.registryName && source.name !== opts.registryName && source.repo !== opts.registryName)
225
+ continue;
226
+ let index;
227
+ try {
228
+ index = await loadIndex(source, opts);
229
+ }
230
+ catch {
231
+ continue;
232
+ }
233
+ const entry = index.sites.find((s) => s.id === id);
234
+ if (entry)
235
+ matches.push({ source, index, entry });
236
+ }
237
+ if (matches.length === 0)
238
+ throw new Error(`Site "${id}" not found in any registry`);
239
+ if (matches.length > 1) {
240
+ const names = matches.map((m) => m.source.name).join(", ");
241
+ throw new Error(`Site "${id}" is offered by multiple registries (${names}); pass --registry <name>`);
242
+ }
243
+ return matches[0];
244
+ }
245
+ // ───────────────────────────── install / list / remove ─────────────────────────────
246
+ /**
247
+ * Downloads one site's files (pinned to `index.commit`), verifies each against
248
+ * its sha256, writes them to the extensions root, validates the entry actually
249
+ * loads as a site, and records provenance. Returns the install directory.
250
+ */
251
+ export async function installEntry(source, index, entry, opts = {}) {
252
+ const env = opts.env ?? process.env;
253
+ const fetchImpl = opts.fetchImpl ?? defaultFetch();
254
+ if (!entry.files?.length)
255
+ throw new Error(`Catalog entry "${entry.id}" lists no files`);
256
+ const dir = join(installRoot(env), entry.id);
257
+ const staged = [];
258
+ // Download + verify everything before writing anything, so a bad hash
259
+ // mid-download never leaves a half-installed site behind.
260
+ for (const file of entry.files) {
261
+ const url = rawUrl(source.repo, index.commit, `${entry.path}/${file.name}`);
262
+ const buf = await getBuffer(url, fetchImpl);
263
+ const got = sha256(buf);
264
+ if (got !== file.sha256) {
265
+ throw new Error(`Integrity check failed for ${entry.id}/${file.name} (expected ${file.sha256.slice(0, 12)}…, got ${got.slice(0, 12)}…)`);
266
+ }
267
+ staged.push({ name: file.name, buf });
268
+ }
269
+ rmSync(dir, { recursive: true, force: true });
270
+ mkdirSync(dir, { recursive: true });
271
+ for (const { name, buf } of staged) {
272
+ const target = join(dir, name);
273
+ mkdirSync(dirname(target), { recursive: true });
274
+ writeFileSync(target, buf);
275
+ }
276
+ // Validate it loads as a site whose id matches the catalog.
277
+ await assertLoadsAsSite(join(dir, entry.files[0].name), entry.id);
278
+ const record = {
279
+ id: entry.id,
280
+ registry: source.name,
281
+ repo: source.repo,
282
+ commit: index.commit,
283
+ version: entry.version,
284
+ installedAt: new Date().toISOString(),
285
+ };
286
+ writeFileSync(join(dir, ".source.json"), JSON.stringify(record, null, 2) + "\n", "utf8");
287
+ return { dir, record };
288
+ }
289
+ /** Imports a downloaded entry file and confirms it normalizes to the expected id. */
290
+ async function assertLoadsAsSite(filePath, expectedId) {
291
+ let mod;
292
+ try {
293
+ mod = await import(pathToFileURL(filePath).href);
294
+ }
295
+ catch (err) {
296
+ throw new Error(`Downloaded site failed to import: ${err instanceof Error ? err.message : String(err)}`);
297
+ }
298
+ const exported = mod.default ?? mod.site;
299
+ if (!exported || typeof exported !== "object") {
300
+ throw new Error(`Downloaded ${filePath} does not default-export a site object`);
301
+ }
302
+ const site = defineSite(exported); // throws on missing required fields
303
+ if (site.id !== expectedId) {
304
+ throw new Error(`Catalog id "${expectedId}" does not match the site's own id "${site.id}"`);
305
+ }
306
+ }
307
+ /** Lists locally installed registry sites (those carrying a `.source.json`). */
308
+ export function listInstalled(env = process.env) {
309
+ const root = installRoot(env);
310
+ if (!existsSync(root))
311
+ return [];
312
+ const out = [];
313
+ for (const name of readdirSync(root)) {
314
+ const meta = join(root, name, ".source.json");
315
+ try {
316
+ if (statSync(meta).isFile())
317
+ out.push(JSON.parse(readFileSync(meta, "utf8")));
318
+ }
319
+ catch {
320
+ // not a registry-installed site; ignore
321
+ }
322
+ }
323
+ return out;
324
+ }
325
+ /** Removes an installed site directory. Returns true if something was removed. */
326
+ export function removeInstalled(id, env = process.env) {
327
+ const dir = join(installRoot(env), id);
328
+ if (!existsSync(dir))
329
+ return false;
330
+ rmSync(dir, { recursive: true, force: true });
331
+ return true;
332
+ }
@@ -1,8 +1,9 @@
1
- import type { QueryOptions, Site } from "../types.js";
1
+ import type { QueryOptions, Site, SiteDef } from "../types.js";
2
2
  import { type ContextProviders } from "./context.js";
3
3
  /** The loaded site registry, populated by {@link loadSites}. */
4
4
  export declare let sites: Site[];
5
- /** Auto-discovers and loads all sites (bundled + extensions). Idempotent. */
5
+ /** Auto-discovers and loads all sites (bundled + extensions). Idempotent
6
+ * concurrent callers share one in-flight discovery. */
6
7
  export declare function loadSites(force?: boolean): Promise<Site[]>;
7
8
  /** Test/embedding hook: replace the registry with an explicit set. */
8
9
  export declare function setSites(next: Site[]): void;
@@ -14,7 +15,14 @@ export declare function getSite(id: string): Site | null;
14
15
  */
15
16
  export declare function createUniversalSite(websiteId: string): Site | null;
16
17
  /**
17
- * Resolves a site (dedicated or universal), runs it with a fresh capability
18
- * context, and guarantees teardown of any browser session.
18
+ * Runs a single site (or plain site definition) with a fresh capability
19
+ * context and guaranteed teardown. This is the embedding API for other
20
+ * packages: import a bundled site via `website-api/sites/<dir>` (or bring
21
+ * your own definition) and execute it directly — no registry involved.
22
+ */
23
+ export declare function runSite(siteOrDef: Site | SiteDef, options?: QueryOptions, providers?: ContextProviders): Promise<unknown>;
24
+ /**
25
+ * Resolves a site by id from the loaded registry (or falls back to a
26
+ * universal GET for unknown domains/URLs) and runs it.
19
27
  */
20
28
  export declare function queryWebsite(websiteId: string, options?: QueryOptions, providers?: ContextProviders): Promise<unknown>;
@@ -1 +1,98 @@
1
- import{defineSite as t}from"./define-site.js";import{discoverSites as e}from"./loader.js";import{createContext as o}from"./context.js";export let sites=[];let r=!1;export async function loadSites(t=!1){return r&&!t||(sites=await e(),r=!0),sites}export function setSites(t){sites=t,r=!0}export function getSite(t){if(!t)return null;const e=t.toLowerCase().trim();return sites.find(t=>t.id.toLowerCase()===e||t.id.toLowerCase().replace(".com","")===e||t.domain.toLowerCase()===e||t.domain.toLowerCase().replace(".com","")===e)??null}export function createUniversalSite(e){let o=e;o.startsWith("http://")||o.startsWith("https://")||(o="https://"+o);try{const r=new URL(o);return t({id:e,name:e,domain:r.hostname,description:`Universal site for ${r.hostname}`,cookies:"optional",endpoints:[{url:r.href}]})}catch{return null}}export async function queryWebsite(t,e={},r={}){await loadSites();let i=getSite(t);if(!i){if(!function(t){return t.startsWith("http://")||t.startsWith("https://")||t.includes(".")||t.includes("/")||t.includes(":")}(t))throw new Error("command not found");if(i=createUniversalSite(t),!i)throw new Error("command not found")}const{ctx:n,dispose:s}=o(i,e,r);try{return await i.run(n)}finally{await s()}}
1
+ import { createContext } from "./context.js";
2
+ import { defineSite } from "./define-site.js";
3
+ import { discoverSites } from "./loader.js";
4
+ /** The loaded site registry, populated by {@link loadSites}. */
5
+ export let sites = [];
6
+ let loadPromise;
7
+ /** Auto-discovers and loads all sites (bundled + extensions). Idempotent —
8
+ * concurrent callers share one in-flight discovery. */
9
+ export async function loadSites(force = false) {
10
+ if (!loadPromise || force) {
11
+ loadPromise = discoverSites().then((found) => {
12
+ sites = found;
13
+ return sites;
14
+ });
15
+ loadPromise.catch(() => {
16
+ loadPromise = undefined; // allow retry after a failed discovery
17
+ });
18
+ }
19
+ return loadPromise;
20
+ }
21
+ /** Test/embedding hook: replace the registry with an explicit set. */
22
+ export function setSites(next) {
23
+ sites = next;
24
+ loadPromise = Promise.resolve(next);
25
+ }
26
+ /** Finds a site by exact id/domain or a `.com`-insensitive match. */
27
+ export function getSite(id) {
28
+ if (!id)
29
+ return null;
30
+ const normalized = id.toLowerCase().trim();
31
+ return (sites.find((s) => s.id.toLowerCase() === normalized ||
32
+ s.id.toLowerCase().replace(".com", "") === normalized ||
33
+ s.domain.toLowerCase() === normalized ||
34
+ s.domain.toLowerCase().replace(".com", "") === normalized) ?? null);
35
+ }
36
+ function isLikelyDomainOrUrl(websiteId) {
37
+ return (websiteId.startsWith("http://") ||
38
+ websiteId.startsWith("https://") ||
39
+ websiteId.includes(".") ||
40
+ websiteId.includes("/") ||
41
+ websiteId.includes(":"));
42
+ }
43
+ /**
44
+ * Creates a fallback site for any URL/domain without a dedicated definition:
45
+ * a single GET to the URL with optional cookies.
46
+ */
47
+ export function createUniversalSite(websiteId) {
48
+ let urlStr = websiteId;
49
+ if (!urlStr.startsWith("http://") && !urlStr.startsWith("https://")) {
50
+ urlStr = "https://" + urlStr;
51
+ }
52
+ try {
53
+ const url = new URL(urlStr);
54
+ return defineSite({
55
+ id: websiteId,
56
+ name: websiteId,
57
+ domain: url.hostname,
58
+ description: `Universal site for ${url.hostname}`,
59
+ cookies: "optional",
60
+ endpoints: [{ url: url.href }],
61
+ });
62
+ }
63
+ catch {
64
+ return null;
65
+ }
66
+ }
67
+ /**
68
+ * Runs a single site (or plain site definition) with a fresh capability
69
+ * context and guaranteed teardown. This is the embedding API for other
70
+ * packages: import a bundled site via `website-api/sites/<dir>` (or bring
71
+ * your own definition) and execute it directly — no registry involved.
72
+ */
73
+ export async function runSite(siteOrDef, options = {}, providers = {}) {
74
+ const site = defineSite(siteOrDef);
75
+ const { ctx, dispose } = createContext(site, options, providers);
76
+ try {
77
+ return await site.run(ctx);
78
+ }
79
+ finally {
80
+ await dispose();
81
+ }
82
+ }
83
+ /**
84
+ * Resolves a site by id from the loaded registry (or falls back to a
85
+ * universal GET for unknown domains/URLs) and runs it.
86
+ */
87
+ export async function queryWebsite(websiteId, options = {}, providers = {}) {
88
+ await loadSites();
89
+ let site = getSite(websiteId);
90
+ if (!site) {
91
+ if (!isLikelyDomainOrUrl(websiteId))
92
+ throw new Error("command not found");
93
+ site = createUniversalSite(websiteId);
94
+ if (!site)
95
+ throw new Error("command not found");
96
+ }
97
+ return runSite(site, options, providers);
98
+ }
package/dist/src/env.js CHANGED
@@ -1 +1,34 @@
1
- import{readFileSync as t,existsSync as n}from"node:fs";import{dirname as o,resolve as r}from"node:path";import{fileURLToPath as s}from"node:url";const i=o(s(import.meta.url));export function loadEnv(){try{const o=r(i,"../../.env");if(!n(o))return;const s=t(o,"utf8");for(const t of s.split(/\r?\n/)){const n=t.trim();if(!n||n.startsWith("#"))continue;const o=n.indexOf("=");if(o<=0)continue;const r=n.substring(0,o).trim();let s=n.substring(o+1).trim();(s.startsWith('"')&&s.endsWith('"')||s.startsWith("'")&&s.endsWith("'"))&&(s=s.substring(1,s.length-1)),process.env[r]=s}}catch{}}
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { dirname, resolve } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ const __dirname = dirname(fileURLToPath(import.meta.url));
5
+ /**
6
+ * Loads environment variables from the project root .env file.
7
+ * Silently skips if the file doesn't exist.
8
+ */
9
+ export function loadEnv() {
10
+ try {
11
+ const envPath = resolve(__dirname, "../../.env");
12
+ if (!existsSync(envPath))
13
+ return;
14
+ const content = readFileSync(envPath, "utf8");
15
+ for (const line of content.split(/\r?\n/)) {
16
+ const trimmed = line.trim();
17
+ if (!trimmed || trimmed.startsWith("#"))
18
+ continue;
19
+ const eqIndex = trimmed.indexOf("=");
20
+ if (eqIndex <= 0)
21
+ continue;
22
+ const key = trimmed.substring(0, eqIndex).trim();
23
+ let val = trimmed.substring(eqIndex + 1).trim();
24
+ // Strip surrounding quotes
25
+ if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
26
+ val = val.substring(1, val.length - 1);
27
+ }
28
+ process.env[key] = val;
29
+ }
30
+ }
31
+ catch {
32
+ // Fail silently
33
+ }
34
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * The page embeds the entire ranking inline as a global:
3
+ * `window.top500 = [ ...500 objects... ];`
4
+ * Each entry carries rank, worth/fWorth, biography, netWorthSummary, milestones,
5
+ * public/private/cash assets, schools, industry, etc. There is no JSON API — the
6
+ * data lives in the HTML — and the site is fronted by PerimeterX, so a plain
7
+ * fetch gets flagged. We drive the CDP-attached Chrome, clear any PerimeterX
8
+ * challenge automatically, then read the global out of the page.
9
+ */
10
+ declare const _default: import("../../types.js").Site;
11
+ export default _default;