website-api 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +141 -1
  2. package/dist/bin/cli.js +204 -1
  3. package/dist/src/capabilities/browser.d.ts +8 -2
  4. package/dist/src/capabilities/browser.js +106 -1
  5. package/dist/src/capabilities/cookies.d.ts +7 -1
  6. package/dist/src/capabilities/cookies.js +68 -1
  7. package/dist/src/capabilities/download.js +32 -1
  8. package/dist/src/capabilities/fingerprint.js +62 -1
  9. package/dist/src/capabilities/http.js +101 -1
  10. package/dist/src/capabilities/login/login-helper.js +185 -1
  11. package/dist/src/capabilities/login/login-strategy.js +36 -1
  12. package/dist/src/challenges/perimeterx.d.ts +62 -0
  13. package/dist/src/challenges/perimeterx.js +112 -0
  14. package/dist/src/cli/ext.js +338 -1
  15. package/dist/src/core/context.d.ts +2 -2
  16. package/dist/src/core/context.js +137 -1
  17. package/dist/src/core/define-site.js +74 -1
  18. package/dist/src/core/loader.js +142 -1
  19. package/dist/src/core/registry.js +332 -1
  20. package/dist/src/core/runtime.d.ts +12 -4
  21. package/dist/src/core/runtime.js +98 -1
  22. package/dist/src/env.js +34 -1
  23. package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
  24. package/dist/src/sites/bloomberg.com/index.js +49 -0
  25. package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
  26. package/dist/src/sites/chase.com/download-helper.js +266 -1
  27. package/dist/src/sites/chase.com/index.js +87 -1
  28. package/dist/src/sites/chase.com/openapi.yaml +76 -0
  29. package/dist/src/sites/chatgpt.com/index.js +24 -1
  30. package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
  31. package/dist/src/sites/claude.ai/claude-helpers.d.ts +20 -0
  32. package/dist/src/sites/claude.ai/claude-helpers.js +26 -0
  33. package/dist/src/sites/claude.ai/index.d.ts +2 -0
  34. package/dist/src/sites/claude.ai/index.js +42 -0
  35. package/dist/src/sites/claude.ai/openapi.yaml +54 -0
  36. package/dist/src/sites/cursor.com/index.js +12 -1
  37. package/dist/src/sites/cursor.com/openapi.yaml +39 -0
  38. package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
  39. package/dist/src/sites/e-zpassny.com/index.js +344 -0
  40. package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
  41. package/dist/src/sites/gemini.google.com/index.d.ts +11 -0
  42. package/dist/src/sites/gemini.google.com/index.js +80 -1
  43. package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
  44. package/dist/src/sites/google.com/google-helpers.js +255 -1
  45. package/dist/src/sites/google.com/index.js +253 -1
  46. package/dist/src/sites/google.com/openapi.yaml +59 -0
  47. package/dist/src/sites/ollama.com/index.js +43 -1
  48. package/dist/src/sites/ollama.com/openapi.yaml +39 -0
  49. package/dist/src/sites/perplexity.ai/index.js +253 -1
  50. package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
  51. package/dist/src/sites/pseg.com/index.js +243 -1
  52. package/dist/src/sites/pseg.com/openapi.yaml +42 -0
  53. package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
  54. package/dist/src/sites/voice.google.com/index.d.ts +2 -0
  55. package/dist/src/sites/voice.google.com/index.js +122 -0
  56. package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
  57. package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
  58. package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
  59. package/dist/src/sites/zillow.com/index.d.ts +2 -0
  60. package/dist/src/sites/zillow.com/index.js +303 -0
  61. package/dist/src/sites/zillow.com/openapi.yaml +55 -0
  62. package/dist/src/types.d.ts +16 -0
  63. package/dist/src/types.js +1 -1
  64. package/dist/src/util/args-parser.js +145 -1
  65. package/dist/src/util/google-json.js +74 -1
  66. package/dist/src/website-api.d.ts +7 -7
  67. package/dist/src/website-api.js +13 -1
  68. package/package.json +37 -10
@@ -0,0 +1,303 @@
1
+ import { solvePerimeterX, waitForManualSolve } from "../../challenges/perimeterx.js";
2
+ import { defineSite } from "../../core/define-site.js";
3
+ const ORIGIN = "https://www.zillow.com";
4
+ /**
5
+ * `getHomeRecs` query (homepage "Trending Homes" / "Selling Soon" collections).
6
+ * NOTE: /graphql enforces a persisted-query safelist (rejects anything else with
7
+ * QUERY_NOT_IN_SAFELIST), so this MUST stay byte-for-byte what Zillow's client
8
+ * sends — do not trim or reformat it.
9
+ */
10
+ const HOME_RECS_QUERY = `query getHomeRecs($placements: [ModulePlacement]) {
11
+ collections(
12
+ cExperienceId: "zillowWebHomePageCollections"
13
+ placements: $placements
14
+ ) {
15
+ collections {
16
+ __typename
17
+ contentType
18
+ recs {
19
+ id {
20
+ type
21
+ value
22
+ __typename
23
+ }
24
+ recsTags {
25
+ type
26
+ value
27
+ __typename
28
+ }
29
+ heroImage {
30
+ id
31
+ defaultLink
32
+ mediumSizeLink
33
+ highResolutionLink
34
+ __typename
35
+ }
36
+ __typename
37
+ }
38
+ name
39
+ placement
40
+ caption
41
+ contentType
42
+ link {
43
+ displayText
44
+ url
45
+ __typename
46
+ }
47
+ trackingTags {
48
+ type
49
+ value
50
+ __typename
51
+ }
52
+ propertyDetails {
53
+ address {
54
+ city
55
+ state
56
+ streetAddress
57
+ zipcode
58
+ __typename
59
+ }
60
+ attributionInfo {
61
+ brokerName
62
+ brokerPhoneNumber
63
+ mlsId
64
+ mlsName
65
+ agentName
66
+ agentPhoneNumber
67
+ providerLogo
68
+ __typename
69
+ }
70
+ bedrooms
71
+ bathrooms
72
+ homeStatus
73
+ resoFacts {
74
+ bathroomsFull
75
+ bathroomsHalf
76
+ bathroomsOneQuarter
77
+ bathroomsThreeQuarter
78
+ __typename
79
+ }
80
+ building {
81
+ buildingName
82
+ __typename
83
+ }
84
+ buildingId
85
+ country
86
+ currency
87
+ hdpUrl
88
+ newConstructionType
89
+ price
90
+ pslHomeType
91
+ pslMarketingStatus
92
+ pslProviderCategory
93
+ pslRawHomeStatus
94
+ videoId
95
+ isZillowOwned
96
+ zestimate
97
+ pslPublicForeclosureStage
98
+ listingStatusChangeDate
99
+ livingAreaValue
100
+ isRecentStatusChange
101
+ isPremierBuilder
102
+ isNonOwnerOccupied
103
+ isSavedByCurrentSignedInUser
104
+ totalRequiredMonthlyMaxFee
105
+ totalRequiredMonthlyMinFee
106
+ listPriceIncludesRequiredMonthlyFees
107
+ baseRent
108
+ photos {
109
+ url
110
+ width
111
+ height
112
+ __typename
113
+ }
114
+ zpid
115
+ listingMetadata {
116
+ isAdsRestricted
117
+ __typename
118
+ }
119
+ __typename
120
+ }
121
+ }
122
+ trackingTags {
123
+ type
124
+ value
125
+ __typename
126
+ }
127
+ __typename
128
+ }
129
+ }`;
130
+ /** "New York, NY" → "New-York-NY" for the /homes/.../{slug}_rb/ path. */
131
+ function toRegionSlug(query) {
132
+ return query.trim().replace(/,\s*/g, "-").replace(/\s+/g, "-");
133
+ }
134
+ /** Zillow returns detailUrl as either an absolute URL or a "/homedetails/..." path. */
135
+ function absUrl(u) {
136
+ if (!u)
137
+ return null;
138
+ return /^https?:\/\//.test(u) ? u : `${ORIGIN}${u}`;
139
+ }
140
+ const STATUS_PATH = {
141
+ for_sale: "for_sale",
142
+ for_rent: "for_rent",
143
+ sold: "recently_sold",
144
+ };
145
+ /**
146
+ * Zillow is fronted by PerimeterX/HUMAN. The lighter inline press-&-hold is
147
+ * auto-solved; the hardened iframe variant (what Zillow serves) rejects
148
+ * synthetic holds, so we fall back to waiting for a one-time human hold in the
149
+ * attached browser — which sets the _px3/_pxhd clearance cookies for the session.
150
+ */
151
+ async function clearChallenge(page, log) {
152
+ const r = await solvePerimeterX(page, { log });
153
+ if (r.cleared)
154
+ return;
155
+ if (r.kind === "hard-block") {
156
+ throw new Error(`Zillow served a PerimeterX hard block (reference ${r.referenceId ?? "?"}). Wait for the IP to cool.`);
157
+ }
158
+ log("[zillow] press-&-hold needs a human — complete it in the attached Chrome window...");
159
+ const ok = await waitForManualSolve(page, { log });
160
+ if (!ok) {
161
+ throw new Error("Zillow PerimeterX press-&-hold was not cleared. Complete it in the attached browser, then retry.");
162
+ }
163
+ }
164
+ export default defineSite({
165
+ id: "zillow",
166
+ name: "Zillow",
167
+ domain: "zillow.com",
168
+ description: "Search Zillow listings, list trending homes, and resolve listing URLs (browser transport, auto-handles the PerimeterX challenge).",
169
+ transport: "browser",
170
+ cookies: "optional",
171
+ keepBrowserOpen: true, // keep the px clearance cookie warm for subsequent runs
172
+ parameters: [
173
+ {
174
+ name: "search",
175
+ type: "string",
176
+ description: 'Place to search, e.g. "Austin, TX" or "New York, NY"',
177
+ short: "s",
178
+ },
179
+ {
180
+ name: "status",
181
+ type: "string",
182
+ description: "for_sale | for_rent | sold (default for_sale)",
183
+ default: "for_sale",
184
+ },
185
+ {
186
+ name: "trending",
187
+ type: "boolean",
188
+ description: "List homepage trending / selling-soon collections",
189
+ short: "t",
190
+ },
191
+ { name: "zpid", type: "string", description: "Return the canonical listing URL for a zpid" },
192
+ { name: "limit", type: "number", description: "Cap the number of results returned" },
193
+ { name: "hold", type: "number", description: "PerimeterX press-&-hold duration in ms (default 11000)" },
194
+ ],
195
+ run: async (ctx) => {
196
+ // zpid → URL needs no network.
197
+ if (ctx.options.zpid) {
198
+ const zpid = String(ctx.options.zpid);
199
+ return { zpid, url: `${ORIGIN}/homedetails/${zpid}_zpid/` };
200
+ }
201
+ const page = await ctx.browser();
202
+ const log = (m) => ctx.debug && console.log(m);
203
+ const limit = ctx.options.limit !== undefined ? Number(ctx.options.limit) : undefined;
204
+ await clearChallenge(page, log);
205
+ // ── Trending homes (homepage collections via getHomeRecs) ──
206
+ if (ctx.options.trending) {
207
+ if (!page.url().startsWith(`${ORIGIN}/`) || new URL(page.url()).pathname !== "/") {
208
+ await page.goto(`${ORIGIN}/`, { waitUntil: "domcontentloaded" });
209
+ await clearChallenge(page, log);
210
+ }
211
+ const data = await page.evaluate(async (query) => {
212
+ const res = await fetch("/graphql", {
213
+ method: "POST",
214
+ credentials: "include",
215
+ headers: {
216
+ "content-type": "application/json",
217
+ "client-id": "hops-homepage",
218
+ "x-caller-id": "hops-homepage",
219
+ },
220
+ body: JSON.stringify({ operationName: "getHomeRecs", query, variables: {} }),
221
+ });
222
+ return res.json();
223
+ }, HOME_RECS_QUERY);
224
+ const cols = data?.data?.collections?.collections ?? [];
225
+ const collections = cols.map((c) => ({
226
+ name: c.name,
227
+ caption: c.caption,
228
+ contentType: c.contentType,
229
+ seeMoreUrl: c.link?.url ?? null,
230
+ homes: (c.propertyDetails ?? []).map((p) => ({
231
+ zpid: p.zpid,
232
+ url: absUrl(p.hdpUrl),
233
+ price: p.price,
234
+ beds: p.bedrooms,
235
+ baths: p.bathrooms,
236
+ area: p.livingAreaValue,
237
+ status: p.homeStatus,
238
+ address: p.address
239
+ ? `${p.address.streetAddress}, ${p.address.city}, ${p.address.state} ${p.address.zipcode}`
240
+ : null,
241
+ })),
242
+ }));
243
+ return { source: "getHomeRecs", collections };
244
+ }
245
+ // ── Search listings (navigate region URL → replay async-create-search-page-state) ──
246
+ if (ctx.options.search) {
247
+ const slug = toRegionSlug(String(ctx.options.search));
248
+ const status = STATUS_PATH[String(ctx.options.status || "for_sale")] ?? "for_sale";
249
+ await page.goto(`${ORIGIN}/homes/${status}/${slug}_rb/`, { waitUntil: "domcontentloaded" });
250
+ await clearChallenge(page, log);
251
+ const payload = await page.evaluate(async () => {
252
+ const nd = document.getElementById("__NEXT_DATA__");
253
+ if (!nd)
254
+ return { error: "no __NEXT_DATA__" };
255
+ const j = JSON.parse(nd.textContent || "{}");
256
+ const sps = j?.props?.pageProps?.searchPageState;
257
+ const queryState = sps?.queryState;
258
+ if (!queryState)
259
+ return { error: "no queryState" };
260
+ const res = await fetch("/async-create-search-page-state", {
261
+ method: "PUT",
262
+ credentials: "include",
263
+ headers: { "content-type": "application/json" },
264
+ body: JSON.stringify({
265
+ searchQueryState: queryState,
266
+ wants: { cat1: ["listResults", "mapResults"] },
267
+ requestId: 2,
268
+ isDebugRequest: false,
269
+ }),
270
+ });
271
+ const data = await res.json();
272
+ return { queryState, data };
273
+ });
274
+ if (payload.error)
275
+ throw new Error(`Zillow search failed: ${payload.error}`);
276
+ const qs = payload.queryState;
277
+ const sr = payload.data?.cat1?.searchResults ?? {};
278
+ let results = sr.listResults ?? sr.mapResults ?? [];
279
+ const total = payload.data?.categoryTotals?.cat1?.totalResultCount ?? results.length;
280
+ if (limit && limit > 0)
281
+ results = results.slice(0, limit);
282
+ return {
283
+ query: String(ctx.options.search),
284
+ regionSelection: qs?.regionSelection,
285
+ mapBounds: qs?.mapBounds,
286
+ totalResultCount: total,
287
+ count: results.length,
288
+ results: results.map((r) => ({
289
+ zpid: r.zpid,
290
+ url: absUrl(r.detailUrl),
291
+ address: r.address,
292
+ price: r.price,
293
+ beds: r.beds,
294
+ baths: r.baths,
295
+ area: r.area,
296
+ status: r.statusType,
297
+ latLong: r.latLong,
298
+ })),
299
+ };
300
+ }
301
+ throw new Error("Specify one of: --search <place>, --trending, or --zpid <id>.");
302
+ },
303
+ });
@@ -0,0 +1,55 @@
1
+ # Generated by `pnpm generate:openapi` — do not edit by hand.
2
+ openapi: 3.1.0
3
+ info:
4
+ title: Zillow
5
+ description: Search Zillow listings, list trending homes, and resolve listing URLs (browser
6
+ transport, auto-handles the PerimeterX challenge).
7
+ version: 1.1.3
8
+ servers:
9
+ - url: https://zillow.com
10
+ paths: {}
11
+ components:
12
+ securitySchemes:
13
+ chromeSession:
14
+ type: apiKey
15
+ in: cookie
16
+ name: session
17
+ description: "Authenticated via the user's real Chrome session: website-api injects decrypted Chrome
18
+ cookies for zillow.com into every request."
19
+ x-website-api:
20
+ id: zillow
21
+ domain: zillow.com
22
+ cookieDomain: zillow.com
23
+ transport: browser
24
+ cookies: optional
25
+ requiresLogin: false
26
+ imperative: true
27
+ cli:
28
+ command: website-api zillow
29
+ positionals: []
30
+ parameters:
31
+ - flag: --search
32
+ type: string
33
+ description: Place to search, e.g. "Austin, TX" or "New York, NY"
34
+ required: false
35
+ - flag: --status
36
+ type: string
37
+ description: for_sale | for_rent | sold (default for_sale)
38
+ default: for_sale
39
+ required: false
40
+ - flag: --trending
41
+ type: boolean
42
+ description: List homepage trending / selling-soon collections
43
+ required: false
44
+ - flag: --zpid
45
+ type: string
46
+ description: Return the canonical listing URL for a zpid
47
+ required: false
48
+ - flag: --limit
49
+ type: number
50
+ description: Cap the number of results returned
51
+ required: false
52
+ - flag: --hold
53
+ type: number
54
+ description: PerimeterX press-&-hold duration in ms (default 11000)
55
+ required: false
@@ -185,6 +185,13 @@ export interface SiteDef {
185
185
  domain: string;
186
186
  /** Short description of what data this site fetches. */
187
187
  description: string;
188
+ /**
189
+ * Domain to resolve Chrome cookies for, when it differs from `domain`. Use
190
+ * for sites whose auth cookies live on a parent domain — e.g. Gemini runs on
191
+ * `gemini.google.com` but its session cookies are scoped to `google.com`.
192
+ * Defaults to `domain`.
193
+ */
194
+ cookieDomain?: string;
188
195
  /** Transport used to reach the site. Defaults to "http". */
189
196
  transport?: "http" | "browser";
190
197
  /** Whether valid cookies are required. Defaults to "required". */
@@ -216,6 +223,8 @@ export interface Site {
216
223
  id: string;
217
224
  name: string;
218
225
  domain: string;
226
+ /** Domain used for cookie resolution (defaults to `domain`). */
227
+ cookieDomain: string;
219
228
  description: string;
220
229
  transport: "http" | "browser";
221
230
  cookies: "required" | "optional";
@@ -226,6 +235,8 @@ export interface Site {
226
235
  positionals: PositionalDefinition[];
227
236
  /** URL the browser transport lands on (from endpoints[0] or https://domain). */
228
237
  landingUrl: string;
238
+ /** The declared endpoints, preserved for tooling (e.g. OpenAPI generation). */
239
+ endpoints?: Endpoint[];
229
240
  run: (ctx: SiteContext) => Promise<unknown>;
230
241
  /** Set by the loader: where this site was discovered. */
231
242
  origin?: "bundled" | "extension";
@@ -245,6 +256,11 @@ export interface QueryOptions {
245
256
  userAgent?: string;
246
257
  /** When true, print full HTTP request/response details for debugging. */
247
258
  debug?: boolean;
259
+ /**
260
+ * Show the managed Chrome window (browser transport). Defaults to headless;
261
+ * an already-open CDP session is always reused regardless of this flag.
262
+ */
263
+ headed?: boolean;
248
264
  /** Directory to write downloads to. */
249
265
  outDir?: string;
250
266
  /** Allows passing custom site-specific parameters. */
package/dist/src/types.js CHANGED
@@ -1 +1 @@
1
- export{};
1
+ export {};
@@ -1 +1,145 @@
1
- const e=[{name:"profile",type:"string",description:"specific Chrome profile directory (e.g., 'Default')"},{name:"user-agent",type:"string",description:"custom User-Agent header for HTTP requests",short:"u"},{name:"debug",type:"boolean",description:"Print full HTTP request and response bodies for debugging"},{name:"keep-open",type:"boolean",description:"Leave the browser tab open after running (preserve the logged-in session)"},{name:"out",type:"string",description:"Write decoded response JSON/text to file instead of stdout",short:"o"}];export function parseArgsForWebsite(r=[],n=[],o){let i=!1;const s={},a=[...n,...e];for(const e of a){const r=t(e.name);void 0!==e.default?s[r]=e.default:"boolean"===e.type&&(s[r]=!1)}const c={};for(const e of r)c[e.name]=[];let p=0;for(let e=0;e<o.length;e++){const n=o[e];if("--help"!==n&&"-h"!==n)if(n.startsWith("-")){let r;const i=!n.startsWith("--"),c=i?n.slice(1):n.slice(2);if(r=i?a.find(e=>e.short===c):a.find(e=>e.name===c),!r)throw new Error(`Unknown option: ${n}`);const p=t(r.name);if("boolean"===r.type)s[p]=!0;else if("string"===r.type){const t=o[e+1];if(void 0===t||t.startsWith("-"))throw new Error(`Option ${n} requires a value`);s[p]=t,e++}else if("number"===r.type){const t=o[e+1];if(void 0===t||t.startsWith("-"))throw new Error(`Option ${n} requires a numeric value`);const r=Number(t);if(isNaN(r))throw new Error(`Option ${n} requires a valid numeric value, received: "${t}"`);s[p]=r,e++}}else if(p<r.length){const e=r[p];c[e.name].push(n),e.variadic||p++}else{const e=r[r.length-1];if(!e?.variadic)throw new Error(`Unexpected extra argument: "${n}"`);c[e.name].push(n)}else i=!0}for(const e of r){const r=c[e.name];if(e.required&&0===r.length&&!i)throw new Error(`Missing required argument: <${e.name}>`);s[t(e.name)]=r.length>0?r.join(" "):null}for(const e of a){const r=t(e.name);if(e.required&&void 0===s[r]&&!i)throw new Error(`Missing required option: --${e.name}`)}return{options:s,helpRequested:i}}function t(e){return e.replace(/-([a-z])/g,(e,t)=>t.toUpperCase())}
1
+ const GLOBAL_PARAMETERS = [
2
+ { name: "profile", type: "string", description: "specific Chrome profile directory (e.g., 'Default')" },
3
+ {
4
+ name: "user-agent",
5
+ type: "string",
6
+ description: "custom User-Agent header for HTTP requests",
7
+ short: "u",
8
+ },
9
+ {
10
+ name: "debug",
11
+ type: "boolean",
12
+ description: "Print full HTTP request and response bodies for debugging",
13
+ },
14
+ {
15
+ name: "keep-open",
16
+ type: "boolean",
17
+ description: "Leave the browser tab open after running (preserve the logged-in session)",
18
+ },
19
+ {
20
+ name: "headed",
21
+ type: "boolean",
22
+ description: "Show the managed Chrome window (default headless; reuses an already-open session)",
23
+ },
24
+ {
25
+ name: "out",
26
+ type: "string",
27
+ description: "Write decoded response JSON/text to file instead of stdout",
28
+ short: "o",
29
+ },
30
+ ];
31
+ /**
32
+ * Parses raw command-line arguments based on positional and parameter schemas of a website adapter.
33
+ * Handles defaults, types, validation, variadic trailing arguments, and maps kebab-case CLI options to camelCase.
34
+ *
35
+ * @param positionalDefs Expected positional arguments schema
36
+ * @param parameterDefs Expected custom parameter option flags schema
37
+ * @param argv Command line arguments (excluding node, cli paths and website id)
38
+ */
39
+ export function parseArgsForWebsite(positionalDefs = [], parameterDefs = [], argv) {
40
+ let helpRequested = false;
41
+ const options = {};
42
+ // Combine custom parameters with global parameters
43
+ const allParameters = [...parameterDefs, ...GLOBAL_PARAMETERS];
44
+ // Initialize defaults
45
+ for (const param of allParameters) {
46
+ const camelName = toCamelCase(param.name);
47
+ if (param.default !== undefined) {
48
+ options[camelName] = param.default;
49
+ }
50
+ else if (param.type === "boolean") {
51
+ options[camelName] = false;
52
+ }
53
+ }
54
+ const positionalValues = {};
55
+ for (const pos of positionalDefs) {
56
+ positionalValues[pos.name] = [];
57
+ }
58
+ let positionalIdx = 0;
59
+ for (let i = 0; i < argv.length; i++) {
60
+ const arg = argv[i];
61
+ if (arg === "--help" || arg === "-h") {
62
+ helpRequested = true;
63
+ continue;
64
+ }
65
+ if (arg.startsWith("-")) {
66
+ // It is an option/flag!
67
+ let matchedParam;
68
+ const isShort = !arg.startsWith("--");
69
+ const flagName = isShort ? arg.slice(1) : arg.slice(2);
70
+ if (isShort) {
71
+ matchedParam = allParameters.find((p) => p.short === flagName);
72
+ }
73
+ else {
74
+ matchedParam = allParameters.find((p) => p.name === flagName);
75
+ }
76
+ if (!matchedParam) {
77
+ throw new Error(`Unknown option: ${arg}`);
78
+ }
79
+ const camelName = toCamelCase(matchedParam.name);
80
+ if (matchedParam.type === "boolean") {
81
+ options[camelName] = true;
82
+ }
83
+ else if (matchedParam.type === "string") {
84
+ const nextVal = argv[i + 1];
85
+ if (nextVal === undefined || nextVal.startsWith("-")) {
86
+ throw new Error(`Option ${arg} requires a value`);
87
+ }
88
+ options[camelName] = nextVal;
89
+ i++; // Skip the value in the next iteration
90
+ }
91
+ else if (matchedParam.type === "number") {
92
+ const nextVal = argv[i + 1];
93
+ if (nextVal === undefined || nextVal.startsWith("-")) {
94
+ throw new Error(`Option ${arg} requires a numeric value`);
95
+ }
96
+ const num = Number(nextVal);
97
+ if (Number.isNaN(num)) {
98
+ throw new Error(`Option ${arg} requires a valid numeric value, received: "${nextVal}"`);
99
+ }
100
+ options[camelName] = num;
101
+ i++;
102
+ }
103
+ }
104
+ else {
105
+ // It's a positional argument!
106
+ if (positionalIdx < positionalDefs.length) {
107
+ const currentPos = positionalDefs[positionalIdx];
108
+ positionalValues[currentPos.name].push(arg);
109
+ // If the positional is not variadic, move to the next positional definition
110
+ if (!currentPos.variadic) {
111
+ positionalIdx++;
112
+ }
113
+ }
114
+ else {
115
+ // Extra positional arguments: if the last positional was variadic, append to it!
116
+ const lastPos = positionalDefs[positionalDefs.length - 1];
117
+ if (lastPos?.variadic) {
118
+ positionalValues[lastPos.name].push(arg);
119
+ }
120
+ else {
121
+ throw new Error(`Unexpected extra argument: "${arg}"`);
122
+ }
123
+ }
124
+ }
125
+ }
126
+ // Join and validate positionals
127
+ for (const pos of positionalDefs) {
128
+ const vals = positionalValues[pos.name];
129
+ if (pos.required && vals.length === 0 && !helpRequested) {
130
+ throw new Error(`Missing required argument: <${pos.name}>`);
131
+ }
132
+ options[toCamelCase(pos.name)] = vals.length > 0 ? vals.join(" ") : null;
133
+ }
134
+ // Validate required parameters
135
+ for (const param of allParameters) {
136
+ const camelName = toCamelCase(param.name);
137
+ if (param.required && options[camelName] === undefined && !helpRequested) {
138
+ throw new Error(`Missing required option: --${param.name}`);
139
+ }
140
+ }
141
+ return { options, helpRequested };
142
+ }
143
+ function toCamelCase(str) {
144
+ return str.replace(/-([a-z])/g, (_, g) => g.toUpperCase());
145
+ }
@@ -1 +1,74 @@
1
- function o(o,e){let r=o;for(const o of e){if(null==r||!Array.isArray(r))return;r=r[o]}return r}export function parseGoogleJsonWithSchema(e,r){const n={};for(const[t,s]of Object.entries(r)){const r=o(e,s.path);s.items&&Array.isArray(r)?n[t]=r.map(o=>parseGoogleJsonWithSchema(o,s.items)):n[t]=s.transform?s.transform(r):r}return n}export function decodeGoogleJson(o,e){let r=o.trim();r.startsWith(")]}'")&&(r=r.substring(4).trim());const n=r.indexOf("\n");if(-1===n)throw new Error("Invalid Google JSON response format: No newline found");const t=r.substring(0,n).trim(),s=parseInt(t,10);if(isNaN(s))throw new Error(`Invalid Google JSON response chunk length: "${t}"`);let i=r.substring(n+1,n+1+s);const a=i.lastIndexOf("]]");-1!==a&&(i=i.substring(0,a+2));const c=JSON.parse(i).find(o=>Array.isArray(o)&&o[1]===e);if(!c)throw new Error(`RPC ID "${e}" payload not found in Google JSON response`);return JSON.parse(c[2])}export const decode_google_json=decodeGoogleJson;export const parse_google_json_with_schema=parseGoogleJsonWithSchema;export const decodeGoogleJsonWithSchema=(o,e,r)=>parseGoogleJsonWithSchema(decodeGoogleJson(o,e),r);export const decode_google_json_with_schema=decodeGoogleJsonWithSchema;
1
+ /**
2
+ * Traverses a nested array to retrieve the value at a specified path of indices.
3
+ */
4
+ function getValueByPath(obj, path) {
5
+ let current = obj;
6
+ for (const index of path) {
7
+ if (current === undefined || current === null || !Array.isArray(current)) {
8
+ return undefined;
9
+ }
10
+ current = current[index];
11
+ }
12
+ return current;
13
+ }
14
+ /**
15
+ * Parses and transforms Google batchexecute payload array using a universal schema mapping.
16
+ *
17
+ * @param payload The decoded RPC payload array.
18
+ * @param schema The universal schema mapping fields to array paths and options.
19
+ * @returns The normalized JSON structure.
20
+ */
21
+ export function parseGoogleJsonWithSchema(payload, schema) {
22
+ const result = {};
23
+ for (const [key, field] of Object.entries(schema)) {
24
+ const rawVal = getValueByPath(payload, field.path);
25
+ if (field.items && Array.isArray(rawVal)) {
26
+ result[key] = rawVal.map((item) => parseGoogleJsonWithSchema(item, field.items));
27
+ }
28
+ else {
29
+ result[key] = field.transform ? field.transform(rawVal) : rawVal;
30
+ }
31
+ }
32
+ return result;
33
+ }
34
+ /**
35
+ * Decodes raw Google batchexecute response envelope and extracts the payload for a given RPC ID.
36
+ *
37
+ * @param rawResponse The raw HTTP response body from Google's batchexecute endpoint.
38
+ * @param rpcId The Google RPC ID to extract (e.g. 'jSf9Qc').
39
+ * @returns The parsed JSON payload for the requested RPC ID.
40
+ */
41
+ export function decodeGoogleJson(rawResponse, rpcId) {
42
+ let text = rawResponse.trim();
43
+ if (text.startsWith(")]}'")) {
44
+ text = text.substring(4).trim();
45
+ }
46
+ const firstNewline = text.indexOf("\n");
47
+ if (firstNewline === -1) {
48
+ throw new Error("Invalid Google JSON response format: No newline found");
49
+ }
50
+ const prefix = text.substring(0, firstNewline).trim();
51
+ const chunkLength = parseInt(prefix, 10);
52
+ if (Number.isNaN(chunkLength)) {
53
+ throw new Error(`Invalid Google JSON response chunk length: "${prefix}"`);
54
+ }
55
+ let jsonStr = text.substring(firstNewline + 1, firstNewline + 1 + chunkLength);
56
+ const lastBrackets = jsonStr.lastIndexOf("]]");
57
+ if (lastBrackets !== -1) {
58
+ jsonStr = jsonStr.substring(0, lastBrackets + 2);
59
+ }
60
+ const envelope = JSON.parse(jsonStr);
61
+ const rpcMessage = envelope.find((msg) => Array.isArray(msg) && msg[1] === rpcId);
62
+ if (!rpcMessage) {
63
+ throw new Error(`RPC ID "${rpcId}" payload not found in Google JSON response`);
64
+ }
65
+ return JSON.parse(rpcMessage[2]);
66
+ }
67
+ // Aliases for backward compatibility or snake_case preferences
68
+ export const decode_google_json = decodeGoogleJson;
69
+ export const parse_google_json_with_schema = parseGoogleJsonWithSchema;
70
+ export const decodeGoogleJsonWithSchema = (rawResponse, rpcId, schema) => {
71
+ const payload = decodeGoogleJson(rawResponse, rpcId);
72
+ return parseGoogleJsonWithSchema(payload, schema);
73
+ };
74
+ export const decode_google_json_with_schema = decodeGoogleJsonWithSchema;
@@ -1,9 +1,9 @@
1
- export { defineSite, isSite } from "./core/define-site.js";
2
- export type { Site, SiteDef, SiteContext, Endpoint, QueryOptions, ParameterDefinition, PositionalDefinition, FingerprintOption, FingerprintConfig, FormLoginConfig, LoginStrategy, LoginContext, Credentials, HttpCapability, SSEResult, } from "./types.js";
3
1
  export { FormLoginStrategy } from "./capabilities/login/login-strategy.js";
4
- export { sites, loadSites, setSites, getSite, queryWebsite, createUniversalSite, } from "./core/runtime.js";
5
- export { discoverSites, extensionRoots, BUNDLED_SITES_DIR } from "./core/loader.js";
6
- export { createContext } from "./core/context.js";
7
2
  export type { ContextProviders, ManagedContext } from "./core/context.js";
8
- export { DEFAULT_REGISTRY, resolveRegistries, addRegistry, removeRegistry, parseRepoSpec, loadIndex, searchRegistries, resolveEntry, installEntry, listInstalled, removeInstalled, configPath, } from "./core/registry.js";
9
- export type { RegistrySource, RegistryFile, RegistrySiteEntry, RegistryIndex, InstalledRecord, FoundEntry, } from "./core/registry.js";
3
+ export { createContext } from "./core/context.js";
4
+ export { defineSite, isSite } from "./core/define-site.js";
5
+ export { BUNDLED_SITES_DIR, discoverSites, extensionRoots } from "./core/loader.js";
6
+ export type { FoundEntry, InstalledRecord, RegistryFile, RegistryIndex, RegistrySiteEntry, RegistrySource, } from "./core/registry.js";
7
+ export { addRegistry, configPath, DEFAULT_REGISTRY, installEntry, listInstalled, loadIndex, parseRepoSpec, removeInstalled, removeRegistry, resolveEntry, resolveRegistries, searchRegistries, } from "./core/registry.js";
8
+ export { createUniversalSite, getSite, loadSites, queryWebsite, runSite, setSites, sites, } from "./core/runtime.js";
9
+ export type { Credentials, Endpoint, FingerprintConfig, FingerprintOption, FormLoginConfig, HttpCapability, LoginContext, LoginStrategy, ParameterDefinition, PositionalDefinition, QueryOptions, Site, SiteContext, SiteDef, SSEResult, } from "./types.js";
@@ -1 +1,13 @@
1
- import{loadEnv as e}from"./env.js";e();export{defineSite,isSite}from"./core/define-site.js";export{FormLoginStrategy}from"./capabilities/login/login-strategy.js";export{sites,loadSites,setSites,getSite,queryWebsite,createUniversalSite}from"./core/runtime.js";export{discoverSites,extensionRoots,BUNDLED_SITES_DIR}from"./core/loader.js";export{createContext}from"./core/context.js";export{DEFAULT_REGISTRY,resolveRegistries,addRegistry,removeRegistry,parseRepoSpec,loadIndex,searchRegistries,resolveEntry,installEntry,listInstalled,removeInstalled,configPath}from"./core/registry.js";
1
+ import { loadEnv } from "./env.js";
2
+ // Load environment variables (.env) on import.
3
+ loadEnv();
4
+ // ── login strategies (for sites that need a custom auth flow) ──
5
+ export { FormLoginStrategy } from "./capabilities/login/login-strategy.js";
6
+ export { createContext } from "./core/context.js";
7
+ // ── public authoring + types ──
8
+ export { defineSite, isSite } from "./core/define-site.js";
9
+ export { BUNDLED_SITES_DIR, discoverSites, extensionRoots } from "./core/loader.js";
10
+ // ── remote site registry (search + install extensions) ──
11
+ export { addRegistry, configPath, DEFAULT_REGISTRY, installEntry, listInstalled, loadIndex, parseRepoSpec, removeInstalled, removeRegistry, resolveEntry, resolveRegistries, searchRegistries, } from "./core/registry.js";
12
+ // ── runtime + registry ──
13
+ export { createUniversalSite, getSite, loadSites, queryWebsite, runSite, setSites, sites, } from "./core/runtime.js";