website-api 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -1
- package/dist/bin/cli.js +204 -1
- package/dist/src/capabilities/browser.d.ts +8 -2
- package/dist/src/capabilities/browser.js +106 -1
- package/dist/src/capabilities/cookies.d.ts +7 -1
- package/dist/src/capabilities/cookies.js +68 -1
- package/dist/src/capabilities/download.js +32 -1
- package/dist/src/capabilities/fingerprint.js +62 -1
- package/dist/src/capabilities/http.js +101 -1
- package/dist/src/capabilities/login/login-helper.js +185 -1
- package/dist/src/capabilities/login/login-strategy.js +36 -1
- package/dist/src/challenges/perimeterx.d.ts +62 -0
- package/dist/src/challenges/perimeterx.js +112 -0
- package/dist/src/cli/ext.js +338 -1
- package/dist/src/core/context.d.ts +2 -2
- package/dist/src/core/context.js +137 -1
- package/dist/src/core/define-site.js +74 -1
- package/dist/src/core/loader.js +142 -1
- package/dist/src/core/registry.js +332 -1
- package/dist/src/core/runtime.d.ts +12 -4
- package/dist/src/core/runtime.js +98 -1
- package/dist/src/env.js +34 -1
- package/dist/src/sites/bloomberg.com/index.d.ts +11 -0
- package/dist/src/sites/bloomberg.com/index.js +49 -0
- package/dist/src/sites/bloomberg.com/openapi.yaml +38 -0
- package/dist/src/sites/chase.com/download-helper.js +266 -1
- package/dist/src/sites/chase.com/index.js +87 -1
- package/dist/src/sites/chase.com/openapi.yaml +76 -0
- package/dist/src/sites/chatgpt.com/index.js +24 -1
- package/dist/src/sites/chatgpt.com/openapi.yaml +29 -0
- package/dist/src/sites/claude.ai/claude-helpers.js +26 -1
- package/dist/src/sites/claude.ai/index.js +42 -1
- package/dist/src/sites/claude.ai/openapi.yaml +54 -0
- package/dist/src/sites/cursor.com/index.js +12 -1
- package/dist/src/sites/cursor.com/openapi.yaml +39 -0
- package/dist/src/sites/e-zpassny.com/index.d.ts +2 -0
- package/dist/src/sites/e-zpassny.com/index.js +344 -0
- package/dist/src/sites/e-zpassny.com/openapi.yaml +68 -0
- package/dist/src/sites/gemini.google.com/index.js +80 -1
- package/dist/src/sites/gemini.google.com/openapi.yaml +39 -0
- package/dist/src/sites/google.com/google-helpers.js +255 -1
- package/dist/src/sites/google.com/index.js +253 -1
- package/dist/src/sites/google.com/openapi.yaml +59 -0
- package/dist/src/sites/ollama.com/index.js +43 -1
- package/dist/src/sites/ollama.com/openapi.yaml +39 -0
- package/dist/src/sites/perplexity.ai/index.js +253 -1
- package/dist/src/sites/perplexity.ai/openapi.yaml +51 -0
- package/dist/src/sites/pseg.com/index.js +243 -1
- package/dist/src/sites/pseg.com/openapi.yaml +42 -0
- package/dist/src/sites/pseg.com/pseg-helpers.js +53 -1
- package/dist/src/sites/voice.google.com/index.d.ts +2 -0
- package/dist/src/sites/voice.google.com/index.js +122 -0
- package/dist/src/sites/voice.google.com/openapi.yaml +67 -0
- package/dist/src/sites/voice.google.com/voice-helpers.d.ts +105 -0
- package/dist/src/sites/voice.google.com/voice-helpers.js +181 -0
- package/dist/src/sites/zillow.com/index.d.ts +2 -0
- package/dist/src/sites/zillow.com/index.js +303 -0
- package/dist/src/sites/zillow.com/openapi.yaml +55 -0
- package/dist/src/types.d.ts +7 -0
- package/dist/src/types.js +1 -1
- package/dist/src/util/args-parser.js +145 -1
- package/dist/src/util/google-json.js +74 -1
- package/dist/src/website-api.d.ts +7 -7
- package/dist/src/website-api.js +13 -1
- package/package.json +37 -10
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Generated by `pnpm generate:openapi` — do not edit by hand.
|
|
2
|
+
openapi: 3.1.0
|
|
3
|
+
info:
|
|
4
|
+
title: Gemini Usage
|
|
5
|
+
description: Fetches Gemini account usage/quota details directly over HTTP (no browser).
|
|
6
|
+
version: 1.1.3
|
|
7
|
+
servers:
|
|
8
|
+
- url: https://gemini.google.com
|
|
9
|
+
paths:
|
|
10
|
+
/usage:
|
|
11
|
+
get:
|
|
12
|
+
summary: "Gemini Usage: GET /usage"
|
|
13
|
+
description: Fetches Gemini account usage/quota details directly over HTTP (no browser).
|
|
14
|
+
operationId: gemini_usage_get__usage
|
|
15
|
+
responses:
|
|
16
|
+
"200":
|
|
17
|
+
description: JSON response body (shape defined by the site, see its transform)
|
|
18
|
+
security:
|
|
19
|
+
- chromeSession: []
|
|
20
|
+
components:
|
|
21
|
+
securitySchemes:
|
|
22
|
+
chromeSession:
|
|
23
|
+
type: apiKey
|
|
24
|
+
in: cookie
|
|
25
|
+
name: session
|
|
26
|
+
description: "Authenticated via the user's real Chrome session: website-api injects decrypted Chrome
|
|
27
|
+
cookies for google.com into every request."
|
|
28
|
+
x-website-api:
|
|
29
|
+
id: gemini-usage
|
|
30
|
+
domain: gemini.google.com
|
|
31
|
+
cookieDomain: google.com
|
|
32
|
+
transport: http
|
|
33
|
+
cookies: required
|
|
34
|
+
requiresLogin: true
|
|
35
|
+
imperative: false
|
|
36
|
+
cli:
|
|
37
|
+
command: website-api gemini-usage
|
|
38
|
+
positionals: []
|
|
39
|
+
parameters: []
|
|
@@ -1 +1,255 @@
|
|
|
1
|
-
|
|
1
|
+
// Pure encoders/decoders for Google's search + batchexecute responses.
|
|
2
|
+
// No Playwright or I/O here, so every function is directly unit-testable.
|
|
3
|
+
export function formEncode(value) {
|
|
4
|
+
const safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
|
|
5
|
+
let out = "";
|
|
6
|
+
const input = String(value ?? "");
|
|
7
|
+
for (let i = 0; i < input.length; i++) {
|
|
8
|
+
const codePoint = input.codePointAt(i);
|
|
9
|
+
if (codePoint === undefined)
|
|
10
|
+
continue;
|
|
11
|
+
const ch = String.fromCodePoint(codePoint);
|
|
12
|
+
if (codePoint > 0xffff)
|
|
13
|
+
i++;
|
|
14
|
+
if (ch === " ")
|
|
15
|
+
out += "+";
|
|
16
|
+
else if (safe.includes(ch))
|
|
17
|
+
out += ch;
|
|
18
|
+
else {
|
|
19
|
+
for (const byte of utf8Bytes(codePoint)) {
|
|
20
|
+
out += `%${byte.toString(16).toUpperCase().padStart(2, "0")}`;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return out;
|
|
25
|
+
}
|
|
26
|
+
export function utf8Bytes(codePoint) {
|
|
27
|
+
if (codePoint <= 0x7f)
|
|
28
|
+
return [codePoint];
|
|
29
|
+
if (codePoint <= 0x7ff)
|
|
30
|
+
return [0xc0 | (codePoint >> 6), 0x80 | (codePoint & 0x3f)];
|
|
31
|
+
if (codePoint <= 0xffff) {
|
|
32
|
+
return [0xe0 | (codePoint >> 12), 0x80 | ((codePoint >> 6) & 0x3f), 0x80 | (codePoint & 0x3f)];
|
|
33
|
+
}
|
|
34
|
+
return [
|
|
35
|
+
0xf0 | (codePoint >> 18),
|
|
36
|
+
0x80 | ((codePoint >> 12) & 0x3f),
|
|
37
|
+
0x80 | ((codePoint >> 6) & 0x3f),
|
|
38
|
+
0x80 | (codePoint & 0x3f),
|
|
39
|
+
];
|
|
40
|
+
}
|
|
41
|
+
export function utf8String(bytes) {
|
|
42
|
+
let out = "";
|
|
43
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
44
|
+
const b1 = bytes[i];
|
|
45
|
+
let cp = b1;
|
|
46
|
+
if ((b1 & 0xe0) === 0xc0)
|
|
47
|
+
cp = ((b1 & 0x1f) << 6) | (bytes[++i] & 0x3f);
|
|
48
|
+
else if ((b1 & 0xf0) === 0xe0)
|
|
49
|
+
cp = ((b1 & 0x0f) << 12) | ((bytes[++i] & 0x3f) << 6) | (bytes[++i] & 0x3f);
|
|
50
|
+
else if ((b1 & 0xf8) === 0xf0)
|
|
51
|
+
cp =
|
|
52
|
+
((b1 & 0x07) << 18) | ((bytes[++i] & 0x3f) << 12) | ((bytes[++i] & 0x3f) << 6) | (bytes[++i] & 0x3f);
|
|
53
|
+
out += String.fromCodePoint(cp);
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
export function percentDecode(value) {
|
|
58
|
+
const input = String(value ?? "").replace(/\+/g, " ");
|
|
59
|
+
let out = "";
|
|
60
|
+
for (let i = 0; i < input.length; i++) {
|
|
61
|
+
if (input[i] !== "%" || !/[0-9a-fA-F]{2}/.test(input.slice(i + 1, i + 3))) {
|
|
62
|
+
out += input[i];
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const bytes = [];
|
|
66
|
+
while (input[i] === "%" && /[0-9a-fA-F]{2}/.test(input.slice(i + 1, i + 3))) {
|
|
67
|
+
bytes.push(Number.parseInt(input.slice(i + 1, i + 3), 16));
|
|
68
|
+
i += 3;
|
|
69
|
+
}
|
|
70
|
+
i--;
|
|
71
|
+
try {
|
|
72
|
+
out += utf8String(bytes);
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
out += bytes.map((b) => `%${b.toString(16).toUpperCase().padStart(2, "0")}`).join("");
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return out;
|
|
79
|
+
}
|
|
80
|
+
export function parseQueryString(search) {
|
|
81
|
+
const query = {};
|
|
82
|
+
for (const part of String(search ?? "")
|
|
83
|
+
.replace(/^\?/, "")
|
|
84
|
+
.split("&")) {
|
|
85
|
+
if (!part)
|
|
86
|
+
continue;
|
|
87
|
+
const eq = part.indexOf("=");
|
|
88
|
+
const rawKey = eq === -1 ? part : part.slice(0, eq);
|
|
89
|
+
const rawValue = eq === -1 ? "" : part.slice(eq + 1);
|
|
90
|
+
query[percentDecode(rawKey)] = percentDecode(rawValue);
|
|
91
|
+
}
|
|
92
|
+
return query;
|
|
93
|
+
}
|
|
94
|
+
export function buildQueryString(query) {
|
|
95
|
+
return Object.entries(query)
|
|
96
|
+
.filter(([, value]) => value != null && value !== "")
|
|
97
|
+
.map(([key, value]) => `${formEncode(key)}=${formEncode(value)}`)
|
|
98
|
+
.join("&");
|
|
99
|
+
}
|
|
100
|
+
export function googlePath(url) {
|
|
101
|
+
const m = /^https?:\/\/([^/]+)(\/[^?#]*)?/i.exec(String(url ?? ""));
|
|
102
|
+
if (!m || !/(^|\.)google\.[^/]+$/i.test(m[1]))
|
|
103
|
+
return null;
|
|
104
|
+
return m[2] || "/";
|
|
105
|
+
}
|
|
106
|
+
export function stripXssi(text) {
|
|
107
|
+
return text.replace(/^\s*\)\]\}'\s*\n?/, "");
|
|
108
|
+
}
|
|
109
|
+
export function parseJsonMaybe(text) {
|
|
110
|
+
try {
|
|
111
|
+
return JSON.parse(text);
|
|
112
|
+
}
|
|
113
|
+
catch {
|
|
114
|
+
return undefined;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
export function parseGoogleRecordStream(text) {
|
|
118
|
+
const records = [];
|
|
119
|
+
for (const rawLine of stripXssi(text).split(/\r?\n/)) {
|
|
120
|
+
const line = rawLine.trim();
|
|
121
|
+
if (!line)
|
|
122
|
+
continue;
|
|
123
|
+
const m = /^([a-zA-Z0-9_-]+);(.*)$/.exec(line);
|
|
124
|
+
if (!m)
|
|
125
|
+
continue;
|
|
126
|
+
const valueText = m[2].trim();
|
|
127
|
+
records.push({ id: m[1], value: parseJsonMaybe(valueText) ?? valueText });
|
|
128
|
+
}
|
|
129
|
+
return records;
|
|
130
|
+
}
|
|
131
|
+
export function decodeGoogleBody({ body, contentType = "" }) {
|
|
132
|
+
const text = String(body ?? "");
|
|
133
|
+
const unprefixed = stripXssi(text).trimStart();
|
|
134
|
+
const xssiPrefixed = /^\s*\)\]\}'/.test(text);
|
|
135
|
+
const parsed = /^[[{]/.test(unprefixed) ? parseJsonMaybe(unprefixed) : undefined;
|
|
136
|
+
const records = parsed === undefined ? parseGoogleRecordStream(text) : [];
|
|
137
|
+
let format = "text";
|
|
138
|
+
if (contentType.includes("html") || /^\s*</.test(unprefixed))
|
|
139
|
+
format = "html";
|
|
140
|
+
if (xssiPrefixed && parsed !== undefined)
|
|
141
|
+
format = "google-xssi-json";
|
|
142
|
+
else if (xssiPrefixed && records.length)
|
|
143
|
+
format = "google-xssi-record-stream";
|
|
144
|
+
else if (parsed !== undefined)
|
|
145
|
+
format = "json";
|
|
146
|
+
else if (records.length)
|
|
147
|
+
format = "google-record-stream";
|
|
148
|
+
return { format, xssiPrefixed, parsed, records };
|
|
149
|
+
}
|
|
150
|
+
export function cleanText(s, limit = 4000) {
|
|
151
|
+
return String(s ?? "")
|
|
152
|
+
.replace(/\u0000/g, "")
|
|
153
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
154
|
+
.trim()
|
|
155
|
+
.slice(0, limit);
|
|
156
|
+
}
|
|
157
|
+
export function extractAnswerFromText(text) {
|
|
158
|
+
const lines = String(text ?? "")
|
|
159
|
+
.split("\n")
|
|
160
|
+
.map((s) => s.trim())
|
|
161
|
+
.filter(Boolean);
|
|
162
|
+
const ready = lines.findIndex((l) => /AI Mode response is ready/i.test(l));
|
|
163
|
+
if (ready > 0)
|
|
164
|
+
return lines[ready - 1];
|
|
165
|
+
const ignored = /^(Skip to main content|Accessibility help|Accessibility feedback|AI Mode|All|Images|Videos|News|More|Search Results|Sources|Related)$/i;
|
|
166
|
+
return lines.find((l) => !ignored.test(l)) ?? null;
|
|
167
|
+
}
|
|
168
|
+
function stripStyleAndScriptTags(html) {
|
|
169
|
+
let clean = "";
|
|
170
|
+
let currentIndex = 0;
|
|
171
|
+
while (currentIndex < html.length) {
|
|
172
|
+
const styleStartIndex = html.toLowerCase().indexOf("<style", currentIndex);
|
|
173
|
+
if (styleStartIndex === -1) {
|
|
174
|
+
clean += html.slice(currentIndex);
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
clean += html.slice(currentIndex, styleStartIndex);
|
|
178
|
+
const styleEndIndex = html.toLowerCase().indexOf("</style>", styleStartIndex);
|
|
179
|
+
if (styleEndIndex === -1)
|
|
180
|
+
break;
|
|
181
|
+
currentIndex = styleEndIndex + "</style>".length;
|
|
182
|
+
}
|
|
183
|
+
let clean2 = "";
|
|
184
|
+
currentIndex = 0;
|
|
185
|
+
while (currentIndex < clean.length) {
|
|
186
|
+
const scriptStartIndex = clean.toLowerCase().indexOf("<script", currentIndex);
|
|
187
|
+
if (scriptStartIndex === -1) {
|
|
188
|
+
clean2 += clean.slice(currentIndex);
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
clean2 += clean.slice(currentIndex, scriptStartIndex);
|
|
192
|
+
const scriptEndIndex = clean.toLowerCase().indexOf("</script>", scriptStartIndex);
|
|
193
|
+
if (scriptEndIndex === -1)
|
|
194
|
+
break;
|
|
195
|
+
currentIndex = scriptEndIndex + "</script>".length;
|
|
196
|
+
}
|
|
197
|
+
return clean2;
|
|
198
|
+
}
|
|
199
|
+
export function cleanHtml(html) {
|
|
200
|
+
let clean = html
|
|
201
|
+
.replace(/</g, "<")
|
|
202
|
+
.replace(/>/g, ">")
|
|
203
|
+
.replace(/&/g, "&")
|
|
204
|
+
.replace(/ /g, " ");
|
|
205
|
+
clean = stripStyleAndScriptTags(clean);
|
|
206
|
+
clean = clean.replace(/<[^>]+>/g, " ");
|
|
207
|
+
return clean.replace(/\s+/g, " ").trim();
|
|
208
|
+
}
|
|
209
|
+
export function findHtmlInObject(obj) {
|
|
210
|
+
if (!obj)
|
|
211
|
+
return null;
|
|
212
|
+
if (typeof obj === "string") {
|
|
213
|
+
const trimmed = obj.trim();
|
|
214
|
+
if (trimmed.startsWith("<") || trimmed.includes("class=") || trimmed.includes("id="))
|
|
215
|
+
return obj;
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
if (Array.isArray(obj)) {
|
|
219
|
+
for (const item of obj) {
|
|
220
|
+
const found = findHtmlInObject(item);
|
|
221
|
+
if (found)
|
|
222
|
+
return found;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
else if (typeof obj === "object") {
|
|
226
|
+
if (typeof obj.html === "string")
|
|
227
|
+
return obj.html;
|
|
228
|
+
if (typeof obj.aimc_block?.html === "string")
|
|
229
|
+
return obj.aimc_block.html;
|
|
230
|
+
if (typeof obj.value === "string" && (obj.value.startsWith("<") || obj.value.includes("class="))) {
|
|
231
|
+
return obj.value;
|
|
232
|
+
}
|
|
233
|
+
for (const value of Object.values(obj)) {
|
|
234
|
+
const found = findHtmlInObject(value);
|
|
235
|
+
if (found)
|
|
236
|
+
return found;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return null;
|
|
240
|
+
}
|
|
241
|
+
export function extractAnswerFromRecordStream(text) {
|
|
242
|
+
try {
|
|
243
|
+
const unprefixed = stripXssi(text).trimStart();
|
|
244
|
+
if (unprefixed.startsWith("<") || unprefixed.includes("class=") || unprefixed.includes("id=")) {
|
|
245
|
+
return cleanHtml(unprefixed);
|
|
246
|
+
}
|
|
247
|
+
for (const record of parseGoogleRecordStream(text)) {
|
|
248
|
+
const html = findHtmlInObject(record.value);
|
|
249
|
+
if (html)
|
|
250
|
+
return cleanHtml(html);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
catch { }
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
@@ -1 +1,253 @@
|
|
|
1
|
-
import{defineSite
|
|
1
|
+
import { defineSite } from "../../core/define-site.js";
|
|
2
|
+
import { buildQueryString, cleanText, decodeGoogleBody, extractAnswerFromRecordStream, extractAnswerFromText, formEncode, googlePath, parseQueryString, } from "./google-helpers.js";
|
|
3
|
+
async function waitForAiAnswer(page, timeout) {
|
|
4
|
+
const started = Date.now();
|
|
5
|
+
while (Date.now() - started < Math.min(timeout, 45000)) {
|
|
6
|
+
await page.waitForTimeout(750);
|
|
7
|
+
const answer = await page.evaluate(() => {
|
|
8
|
+
const direct = Array.from(document.querySelectorAll('[jsname="KFl8ub"], [data-attrid], .kp-wholepage'))
|
|
9
|
+
.map((e) => e.innerText?.trim())
|
|
10
|
+
.filter(Boolean)
|
|
11
|
+
.find((t) => !/^(Sources|Related|AI Mode response is ready)$/i.test(t));
|
|
12
|
+
if (direct)
|
|
13
|
+
return direct;
|
|
14
|
+
const text = document.body?.innerText || "";
|
|
15
|
+
const lines = text
|
|
16
|
+
.split("\n")
|
|
17
|
+
.map((s) => s.trim())
|
|
18
|
+
.filter(Boolean);
|
|
19
|
+
const ready = lines.findIndex((l) => /AI Mode response is ready/i.test(l));
|
|
20
|
+
return ready > 0 ? lines[ready - 1] : null;
|
|
21
|
+
});
|
|
22
|
+
if (answer)
|
|
23
|
+
return answer;
|
|
24
|
+
}
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
async function collectPageState(page, limit) {
|
|
28
|
+
return page.evaluate((l) => {
|
|
29
|
+
const bodyText = document.body?.innerText || "";
|
|
30
|
+
const html = document.documentElement?.outerHTML || "";
|
|
31
|
+
return {
|
|
32
|
+
title: document.title,
|
|
33
|
+
url: document.location.href,
|
|
34
|
+
bodyText: bodyText.slice(0, l),
|
|
35
|
+
htmlPrefix: html.slice(0, l),
|
|
36
|
+
};
|
|
37
|
+
}, limit);
|
|
38
|
+
}
|
|
39
|
+
async function buildFolwrEndpoint(page) {
|
|
40
|
+
const pageData = await page.evaluate(() => {
|
|
41
|
+
const root = document.querySelector("[data-garc][data-lro-token][data-lro-signature][data-ei]");
|
|
42
|
+
if (!root)
|
|
43
|
+
return { url: null, error: "Missing AI Mode token container" };
|
|
44
|
+
const stkp = document.getElementById("rKxeg")?.getAttribute("data-stkp") || null;
|
|
45
|
+
return {
|
|
46
|
+
origin: document.location.origin,
|
|
47
|
+
search: document.location.search,
|
|
48
|
+
stkp,
|
|
49
|
+
fmt: document.querySelector("[data-madl]") ? "madl" : "adl",
|
|
50
|
+
tokens: {
|
|
51
|
+
ei: root.dataset.ei,
|
|
52
|
+
garc: root.dataset.garc,
|
|
53
|
+
lroToken: root.dataset.lroToken,
|
|
54
|
+
lroSignature: root.dataset.lroSignature,
|
|
55
|
+
xsrfFolwrToken: root.dataset.xsrfFolwrToken || null,
|
|
56
|
+
srtst: root.dataset.srtst || null,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
});
|
|
60
|
+
if (!pageData?.origin)
|
|
61
|
+
return pageData;
|
|
62
|
+
const current = parseQueryString(pageData.search);
|
|
63
|
+
const query = {};
|
|
64
|
+
const copyKeys = [
|
|
65
|
+
"q",
|
|
66
|
+
"udm",
|
|
67
|
+
"mstk",
|
|
68
|
+
"csuir",
|
|
69
|
+
"mtid",
|
|
70
|
+
"ved",
|
|
71
|
+
"vet",
|
|
72
|
+
"sei",
|
|
73
|
+
"dpr",
|
|
74
|
+
"hl",
|
|
75
|
+
"gl",
|
|
76
|
+
"source",
|
|
77
|
+
"vsrid",
|
|
78
|
+
"lns_img",
|
|
79
|
+
"cinpts",
|
|
80
|
+
];
|
|
81
|
+
for (const key of copyKeys)
|
|
82
|
+
if (current[key])
|
|
83
|
+
query[key] = current[key];
|
|
84
|
+
if (pageData.tokens.srtst)
|
|
85
|
+
query.srtst = pageData.tokens.srtst;
|
|
86
|
+
query.garc = pageData.tokens.garc;
|
|
87
|
+
query.mlro = pageData.tokens.lroToken;
|
|
88
|
+
query.mlros = pageData.tokens.lroSignature;
|
|
89
|
+
query.ei = pageData.tokens.ei;
|
|
90
|
+
if (pageData.stkp)
|
|
91
|
+
query.stkp = pageData.stkp;
|
|
92
|
+
const asyncParts = { _fmt: pageData.fmt };
|
|
93
|
+
if (pageData.tokens.xsrfFolwrToken)
|
|
94
|
+
asyncParts._xsrf = pageData.tokens.xsrfFolwrToken;
|
|
95
|
+
const queryString = buildQueryString(query);
|
|
96
|
+
const asyncString = Object.entries(asyncParts)
|
|
97
|
+
.map(([key, value]) => `${formEncode(key)}:${formEncode(value)}`)
|
|
98
|
+
.join(",");
|
|
99
|
+
return {
|
|
100
|
+
url: `${pageData.origin}/async/folwr?${queryString}&async=${asyncString}`,
|
|
101
|
+
tokens: pageData.tokens,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
function summarizeNetwork(rows, rawLimit) {
|
|
105
|
+
return rows.map((r) => {
|
|
106
|
+
const decoded = r.body == null ? null : decodeGoogleBody({ body: r.body, contentType: r.mimeType });
|
|
107
|
+
return {
|
|
108
|
+
method: r.method,
|
|
109
|
+
type: r.type,
|
|
110
|
+
url: r.url,
|
|
111
|
+
status: r.status,
|
|
112
|
+
mimeType: r.mimeType,
|
|
113
|
+
decodedFormat: decoded?.format ?? null,
|
|
114
|
+
recordCount: decoded?.records?.length ?? 0,
|
|
115
|
+
bodyPrefix: r.body == null ? null : r.body.slice(0, rawLimit),
|
|
116
|
+
};
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
export default defineSite({
|
|
120
|
+
id: "google-ai",
|
|
121
|
+
name: "Google AI Overview",
|
|
122
|
+
domain: "google.com",
|
|
123
|
+
description: "Fetches Google's AI Overview and AI Mode answers using browser-attached Playwright.",
|
|
124
|
+
transport: "browser",
|
|
125
|
+
cookies: "optional",
|
|
126
|
+
endpoints: [{ url: "https://www.google.com" }],
|
|
127
|
+
positionals: [
|
|
128
|
+
{
|
|
129
|
+
name: "question",
|
|
130
|
+
description: "The search query or question to ask Google",
|
|
131
|
+
required: true,
|
|
132
|
+
variadic: true,
|
|
133
|
+
},
|
|
134
|
+
],
|
|
135
|
+
parameters: [
|
|
136
|
+
{ name: "raw-limit", type: "number", description: "Max raw response chars to include", default: 12000 },
|
|
137
|
+
{ name: "timeout", type: "number", description: "Playwright timeout in milliseconds", default: 90000 },
|
|
138
|
+
{
|
|
139
|
+
name: "text",
|
|
140
|
+
type: "boolean",
|
|
141
|
+
description: "Print only the extracted AI Overview answer text",
|
|
142
|
+
short: "t",
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
run: async (ctx) => {
|
|
146
|
+
const question = ctx.options.question;
|
|
147
|
+
const rawLimit = ctx.options.rawLimit !== undefined ? Number(ctx.options.rawLimit) : 12000;
|
|
148
|
+
const timeout = ctx.options.timeout !== undefined ? Number(ctx.options.timeout) : 90000;
|
|
149
|
+
const page = await ctx.browser();
|
|
150
|
+
const cdp = await page.context().newCDPSession(page);
|
|
151
|
+
await cdp.send("Network.enable", { maxTotalBufferSize: 100000000, maxResourceBufferSize: 100000000 });
|
|
152
|
+
const rows = [];
|
|
153
|
+
const byId = new Map();
|
|
154
|
+
const interesting = (url) => {
|
|
155
|
+
const path = googlePath(url);
|
|
156
|
+
return path === "/search" || path?.startsWith("/async/") || path?.includes("batchexecute");
|
|
157
|
+
};
|
|
158
|
+
cdp.on("Network.requestWillBeSent", (p) => {
|
|
159
|
+
const r = p.request || {};
|
|
160
|
+
if (!interesting(r.url))
|
|
161
|
+
return;
|
|
162
|
+
byId.set(p.requestId, rows.length);
|
|
163
|
+
rows.push({
|
|
164
|
+
id: p.requestId,
|
|
165
|
+
type: p.type,
|
|
166
|
+
method: r.method,
|
|
167
|
+
url: r.url,
|
|
168
|
+
postData: r.postData || null,
|
|
169
|
+
status: null,
|
|
170
|
+
mimeType: null,
|
|
171
|
+
body: null,
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
cdp.on("Network.responseReceived", (p) => {
|
|
175
|
+
const i = byId.get(p.requestId);
|
|
176
|
+
if (i == null)
|
|
177
|
+
return;
|
|
178
|
+
rows[i].status = p.response.status;
|
|
179
|
+
rows[i].mimeType = p.response.mimeType;
|
|
180
|
+
});
|
|
181
|
+
cdp.on("Network.loadingFinished", async (p) => {
|
|
182
|
+
const i = byId.get(p.requestId);
|
|
183
|
+
if (i == null)
|
|
184
|
+
return;
|
|
185
|
+
try {
|
|
186
|
+
const mt = rows[i].mimeType || "";
|
|
187
|
+
if (!/text|json|html|javascript|x-protobuf/.test(mt))
|
|
188
|
+
return;
|
|
189
|
+
const b = await cdp.send("Network.getResponseBody", { requestId: p.requestId });
|
|
190
|
+
rows[i].body = b.base64Encoded ? null : (b.body || "").slice(0, rawLimit);
|
|
191
|
+
}
|
|
192
|
+
catch { }
|
|
193
|
+
});
|
|
194
|
+
const searchUrl = `https://www.google.com/search?${buildQueryString({ q: question, udm: "50" })}`;
|
|
195
|
+
if (ctx.debug)
|
|
196
|
+
console.log(`Navigating to Google Search: ${searchUrl}`);
|
|
197
|
+
await page.goto(searchUrl, { waitUntil: "domcontentloaded" });
|
|
198
|
+
const answer = await waitForAiAnswer(page, timeout);
|
|
199
|
+
const searchPage = await collectPageState(page, rawLimit);
|
|
200
|
+
const endpoint = await buildFolwrEndpoint(page);
|
|
201
|
+
let endpointResult = null;
|
|
202
|
+
if (endpoint?.url) {
|
|
203
|
+
if (ctx.debug)
|
|
204
|
+
console.log(`Discovered folwr endpoint: ${endpoint.url}`);
|
|
205
|
+
try {
|
|
206
|
+
const responseText = await page.evaluate(async (url) => {
|
|
207
|
+
const res = await fetch(url);
|
|
208
|
+
return res.text();
|
|
209
|
+
}, endpoint.url);
|
|
210
|
+
const decodedAnswer = extractAnswerFromRecordStream(responseText);
|
|
211
|
+
const rawTextAnswer = extractAnswerFromText(responseText);
|
|
212
|
+
const cleanFallbackAnswer = rawTextAnswer && !rawTextAnswer.includes("<") && !rawTextAnswer.includes("class=")
|
|
213
|
+
? rawTextAnswer
|
|
214
|
+
: null;
|
|
215
|
+
endpointResult = {
|
|
216
|
+
title: searchPage.title,
|
|
217
|
+
url: endpoint.url,
|
|
218
|
+
bodyText: responseText,
|
|
219
|
+
htmlPrefix: responseText.slice(0, rawLimit),
|
|
220
|
+
answer: decodedAnswer || cleanFallbackAnswer,
|
|
221
|
+
decoded: decodeGoogleBody({ body: responseText, contentType: "text/plain" }),
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
catch (err) {
|
|
225
|
+
if (ctx.debug)
|
|
226
|
+
console.warn("Failed to query folwr endpoint in-page:", err);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
await page.waitForTimeout(500);
|
|
230
|
+
const finalAnswer = endpointResult?.answer || answer || extractAnswerFromText(searchPage.bodyText);
|
|
231
|
+
return {
|
|
232
|
+
question,
|
|
233
|
+
answer: cleanText(finalAnswer, rawLimit) || null,
|
|
234
|
+
finalUrl: page.url(),
|
|
235
|
+
endpoint: endpoint || null,
|
|
236
|
+
searchPage: {
|
|
237
|
+
title: searchPage.title,
|
|
238
|
+
url: searchPage.url,
|
|
239
|
+
bodyText: cleanText(searchPage.bodyText, rawLimit),
|
|
240
|
+
},
|
|
241
|
+
endpointResult: endpointResult
|
|
242
|
+
? {
|
|
243
|
+
title: endpointResult.title,
|
|
244
|
+
url: endpointResult.url,
|
|
245
|
+
bodyText: cleanText(endpointResult.bodyText, rawLimit),
|
|
246
|
+
htmlPrefix: endpointResult.htmlPrefix,
|
|
247
|
+
decoded: endpointResult.decoded,
|
|
248
|
+
}
|
|
249
|
+
: null,
|
|
250
|
+
requests: summarizeNetwork(rows, rawLimit),
|
|
251
|
+
};
|
|
252
|
+
},
|
|
253
|
+
});
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Generated by `pnpm generate:openapi` — do not edit by hand.
|
|
2
|
+
openapi: 3.1.0
|
|
3
|
+
info:
|
|
4
|
+
title: Google AI Overview
|
|
5
|
+
description: Fetches Google's AI Overview and AI Mode answers using browser-attached Playwright.
|
|
6
|
+
version: 1.1.3
|
|
7
|
+
servers:
|
|
8
|
+
- url: https://google.com
|
|
9
|
+
- url: https://www.google.com
|
|
10
|
+
paths:
|
|
11
|
+
/:
|
|
12
|
+
servers:
|
|
13
|
+
- url: https://www.google.com
|
|
14
|
+
get:
|
|
15
|
+
summary: "Google AI Overview: GET /"
|
|
16
|
+
description: Fetches Google's AI Overview and AI Mode answers using browser-attached Playwright.
|
|
17
|
+
operationId: google_ai_get__
|
|
18
|
+
responses:
|
|
19
|
+
"200":
|
|
20
|
+
description: JSON response body (shape defined by the site, see its transform)
|
|
21
|
+
security: []
|
|
22
|
+
components:
|
|
23
|
+
securitySchemes:
|
|
24
|
+
chromeSession:
|
|
25
|
+
type: apiKey
|
|
26
|
+
in: cookie
|
|
27
|
+
name: session
|
|
28
|
+
description: "Authenticated via the user's real Chrome session: website-api injects decrypted Chrome
|
|
29
|
+
cookies for google.com into every request."
|
|
30
|
+
x-website-api:
|
|
31
|
+
id: google-ai
|
|
32
|
+
domain: google.com
|
|
33
|
+
cookieDomain: google.com
|
|
34
|
+
transport: browser
|
|
35
|
+
cookies: optional
|
|
36
|
+
requiresLogin: false
|
|
37
|
+
imperative: false
|
|
38
|
+
cli:
|
|
39
|
+
command: website-api google-ai
|
|
40
|
+
positionals:
|
|
41
|
+
- name: question
|
|
42
|
+
description: The search query or question to ask Google
|
|
43
|
+
required: true
|
|
44
|
+
variadic: true
|
|
45
|
+
parameters:
|
|
46
|
+
- flag: --raw-limit
|
|
47
|
+
type: number
|
|
48
|
+
description: Max raw response chars to include
|
|
49
|
+
default: 12000
|
|
50
|
+
required: false
|
|
51
|
+
- flag: --timeout
|
|
52
|
+
type: number
|
|
53
|
+
description: Playwright timeout in milliseconds
|
|
54
|
+
default: 90000
|
|
55
|
+
required: false
|
|
56
|
+
- flag: --text
|
|
57
|
+
type: boolean
|
|
58
|
+
description: Print only the extracted AI Overview answer text
|
|
59
|
+
required: false
|
|
@@ -1 +1,43 @@
|
|
|
1
|
-
import{defineSite
|
|
1
|
+
import { defineSite } from "../../core/define-site.js";
|
|
2
|
+
function escapeRegex(value) {
|
|
3
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
4
|
+
}
|
|
5
|
+
export function extractPlan(html) {
|
|
6
|
+
const pattern = /Cloud Usage[\s\S]*?<\/span>[\s\S]*?<span[^>]*>([\s\S]*?)<\/span/i;
|
|
7
|
+
return html.match(pattern)?.[1]?.trim() ?? "unknown";
|
|
8
|
+
}
|
|
9
|
+
export function extractUsage(html, label) {
|
|
10
|
+
const escapedLabel = escapeRegex(label);
|
|
11
|
+
const pattern = new RegExp(`<div[\\s\\S]*?<span[^>]*>\\s*${escapedLabel}\\s*<\\/span>[\\s\\S]*?aria-label="${escapedLabel}\\s+([^"]+)"[\\s\\S]*?data-time="([^"]+)"`, "i");
|
|
12
|
+
const match = html.match(pattern);
|
|
13
|
+
return {
|
|
14
|
+
usage: match?.[1]?.replace(/\s+used$/i, "").trim() ?? "unknown",
|
|
15
|
+
reset: match?.[2]?.trim() ?? "unknown",
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
/** Parses the Ollama settings HTML into a usage summary. Pure + testable. */
|
|
19
|
+
export function parseOllamaUsage(html) {
|
|
20
|
+
const sessionUsage = extractUsage(html, "Session usage");
|
|
21
|
+
const weeklyUsage = extractUsage(html, "Weekly usage");
|
|
22
|
+
return {
|
|
23
|
+
time: new Date().toISOString(),
|
|
24
|
+
Plan: extractPlan(html),
|
|
25
|
+
"Session Usage": sessionUsage.usage,
|
|
26
|
+
"Session Reset": sessionUsage.reset,
|
|
27
|
+
"Weekly Usage": weeklyUsage.usage,
|
|
28
|
+
"Weekly Reset": weeklyUsage.reset,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export default defineSite({
|
|
32
|
+
id: "ollama-usage",
|
|
33
|
+
name: "Ollama Usage",
|
|
34
|
+
domain: "ollama.com",
|
|
35
|
+
description: "Fetches Ollama plan and usage details from the authenticated settings page.",
|
|
36
|
+
endpoints: [
|
|
37
|
+
{
|
|
38
|
+
url: "https://ollama.com/settings",
|
|
39
|
+
responseType: "html",
|
|
40
|
+
transform: (body) => parseOllamaUsage(typeof body === "string" ? body : String(body)),
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
});
|