omnikey-cli 1.0.24 → 1.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend-dist/agent/agentServer.js +1 -1
- package/backend-dist/agent/utils.js +1 -1
- package/backend-dist/web-search/browser-playwright.js +613 -0
- package/backend-dist/web-search/index.js +17 -0
- package/backend-dist/web-search/llm-auth-check.js +127 -0
- package/backend-dist/{web-search-provider.js → web-search/web-search-provider.js} +98 -16
- package/package.json +3 -2
|
@@ -45,7 +45,7 @@ const subscription_1 = require("../models/subscription");
|
|
|
45
45
|
const subscriptionUsage_1 = require("../models/subscriptionUsage");
|
|
46
46
|
const agentPrompts_1 = require("./agentPrompts");
|
|
47
47
|
const featureRoutes_1 = require("../featureRoutes");
|
|
48
|
-
const web_search_provider_1 = require("../web-search-provider");
|
|
48
|
+
const web_search_provider_1 = require("../web-search/web-search-provider");
|
|
49
49
|
const agentAuth_1 = require("./agentAuth");
|
|
50
50
|
const utils_1 = require("./utils");
|
|
51
51
|
const ai_client_1 = require("../ai-client");
|
|
@@ -4,7 +4,7 @@ exports.buildAvailableTools = buildAvailableTools;
|
|
|
4
4
|
exports.createUserContent = createUserContent;
|
|
5
5
|
exports.sendFinalAnswer = sendFinalAnswer;
|
|
6
6
|
exports.pushToSessionHistory = pushToSessionHistory;
|
|
7
|
-
const web_search_provider_1 = require("../web-search-provider");
|
|
7
|
+
const web_search_provider_1 = require("../web-search/web-search-provider");
|
|
8
8
|
const ai_client_1 = require("../ai-client");
|
|
9
9
|
const config_1 = require("../config");
|
|
10
10
|
/**
|
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.isAnyBrowserRunning = isAnyBrowserRunning;
|
|
40
|
+
exports.isBrowserOpenWithUrl = isBrowserOpenWithUrl;
|
|
41
|
+
exports.fetchWithPlaywright = fetchWithPlaywright;
|
|
42
|
+
const axios_1 = __importDefault(require("axios"));
|
|
43
|
+
// Utility: Promise with timeout
|
|
44
|
+
async function withTimeout(promise, ms, label, log) {
|
|
45
|
+
let timeoutId;
|
|
46
|
+
return Promise.race([
|
|
47
|
+
promise,
|
|
48
|
+
new Promise((resolve) => {
|
|
49
|
+
timeoutId = setTimeout(() => {
|
|
50
|
+
log.warn('browser-playwright: fetch timed out', { label, ms });
|
|
51
|
+
resolve(null);
|
|
52
|
+
}, ms);
|
|
53
|
+
}),
|
|
54
|
+
]).then((result) => {
|
|
55
|
+
clearTimeout(timeoutId);
|
|
56
|
+
return result;
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Playwright-based web fetching using the user's installed browser profile.
|
|
61
|
+
*
|
|
62
|
+
* Key design decisions:
|
|
63
|
+
* 1. Detects which Chromium browsers are currently RUNNING and tries those
|
|
64
|
+
* first — the active browser is where the authenticated session lives.
|
|
65
|
+
* 2. Discovers the actual profile directory dynamically (Default, Profile 1,
|
|
66
|
+
* Profile 2 …) rather than hardcoding "Default".
|
|
67
|
+
* 3. Checks multiple executable locations (system /Applications and
|
|
68
|
+
* user ~/Applications).
|
|
69
|
+
* 4. Firefox is intentionally excluded from Playwright — headless Firefox
|
|
70
|
+
* on macOS has a known RenderCompositorSWGL rendering bug that causes
|
|
71
|
+
* 30-second timeouts. Cookies from Firefox are still extracted separately
|
|
72
|
+
* by browser-cookies.ts for the plain-HTTP fallback.
|
|
73
|
+
*
|
|
74
|
+
* macOS only. Returns null on other platforms.
|
|
75
|
+
*/
|
|
76
|
+
const child_process_1 = require("child_process");
|
|
77
|
+
const fs = __importStar(require("fs"));
|
|
78
|
+
const os = __importStar(require("os"));
|
|
79
|
+
const path = __importStar(require("path"));
|
|
80
|
+
const playwright_core_1 = __importDefault(require("playwright-core"));
|
|
81
|
+
const home = os.homedir();
|
|
82
|
+
const BROWSER_CATALOGUE = [
|
|
83
|
+
{
|
|
84
|
+
name: 'Chrome',
|
|
85
|
+
executablePaths: [
|
|
86
|
+
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
|
87
|
+
`${home}/Applications/Google Chrome.app/Contents/MacOS/Google Chrome`,
|
|
88
|
+
],
|
|
89
|
+
userDataDir: path.join(home, 'Library/Application Support/Google/Chrome'),
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: 'Brave',
|
|
93
|
+
executablePaths: [
|
|
94
|
+
'/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
|
|
95
|
+
`${home}/Applications/Brave Browser.app/Contents/MacOS/Brave Browser`,
|
|
96
|
+
],
|
|
97
|
+
userDataDir: path.join(home, 'Library/Application Support/BraveSoftware/Brave-Browser'),
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
name: 'Edge',
|
|
101
|
+
executablePaths: [
|
|
102
|
+
'/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
|
|
103
|
+
`${home}/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge`,
|
|
104
|
+
],
|
|
105
|
+
userDataDir: path.join(home, 'Library/Application Support/Microsoft Edge'),
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
name: 'Arc',
|
|
109
|
+
executablePaths: [
|
|
110
|
+
'/Applications/Arc.app/Contents/MacOS/Arc',
|
|
111
|
+
`${home}/Applications/Arc.app/Contents/MacOS/Arc`,
|
|
112
|
+
],
|
|
113
|
+
userDataDir: path.join(home, 'Library/Application Support/Arc/User Data'),
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
name: 'Vivaldi',
|
|
117
|
+
executablePaths: [
|
|
118
|
+
'/Applications/Vivaldi.app/Contents/MacOS/Vivaldi',
|
|
119
|
+
`${home}/Applications/Vivaldi.app/Contents/MacOS/Vivaldi`,
|
|
120
|
+
],
|
|
121
|
+
userDataDir: path.join(home, 'Library/Application Support/Vivaldi'),
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: 'Opera',
|
|
125
|
+
executablePaths: [
|
|
126
|
+
'/Applications/Opera.app/Contents/MacOS/Opera',
|
|
127
|
+
`${home}/Applications/Opera.app/Contents/MacOS/Opera`,
|
|
128
|
+
],
|
|
129
|
+
userDataDir: path.join(home, 'Library/Application Support/com.operasoftware.Opera'),
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
name: 'Chromium',
|
|
133
|
+
executablePaths: [
|
|
134
|
+
'/Applications/Chromium.app/Contents/MacOS/Chromium',
|
|
135
|
+
`${home}/Applications/Chromium.app/Contents/MacOS/Chromium`,
|
|
136
|
+
],
|
|
137
|
+
userDataDir: path.join(home, 'Library/Application Support/Chromium'),
|
|
138
|
+
},
|
|
139
|
+
];
|
|
140
|
+
// ─── Running browser detection ────────────────────────────────────────────────
|
|
141
|
+
/**
|
|
142
|
+
* Returns the names of browsers that are currently running.
|
|
143
|
+
* Used to sort the browser list so the active browser (with a live session)
|
|
144
|
+
* is tried first.
|
|
145
|
+
*/
|
|
146
|
+
function getRunningBrowserNames() {
|
|
147
|
+
const running = new Set();
|
|
148
|
+
try {
|
|
149
|
+
// ps -axco command lists only the process name (no path, no args)
|
|
150
|
+
const output = (0, child_process_1.execSync)('ps -axco command', {
|
|
151
|
+
encoding: 'utf8',
|
|
152
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
153
|
+
});
|
|
154
|
+
const lines = output.toLowerCase().split('\n');
|
|
155
|
+
const processMap = {
|
|
156
|
+
'google chrome': 'Chrome',
|
|
157
|
+
'brave browser': 'Brave',
|
|
158
|
+
'microsoft edge': 'Edge',
|
|
159
|
+
arc: 'Arc',
|
|
160
|
+
vivaldi: 'Vivaldi',
|
|
161
|
+
opera: 'Opera',
|
|
162
|
+
chromium: 'Chromium',
|
|
163
|
+
safari: 'Safari',
|
|
164
|
+
};
|
|
165
|
+
for (const [processName, browserName] of Object.entries(processMap)) {
|
|
166
|
+
if (processName === 'safari') {
|
|
167
|
+
// Only match the main Safari process exactly (case-insensitive, trimmed)
|
|
168
|
+
if (lines.some((l) => l.trim() === 'safari')) {
|
|
169
|
+
running.add(browserName);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
else {
|
|
173
|
+
// For other browsers, allow exact match or substring match
|
|
174
|
+
if (lines.some((l) => l.trim() === processName || l.includes(processName))) {
|
|
175
|
+
running.add(browserName);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// ps failed — proceed without running-browser info
|
|
182
|
+
}
|
|
183
|
+
return running;
|
|
184
|
+
}
|
|
185
|
+
// ─── Strategy -1: CDP via DevToolsActivePort ─────────────────────────────────
|
|
186
|
+
//
|
|
187
|
+
// When Chrome is launched with --remote-debugging-port (or --remote-debugging-port=0
|
|
188
|
+
// to let it pick a free port), it writes a DevToolsActivePort file to the user data
|
|
189
|
+
// directory containing the actual port. Connecting via CDP gives us direct access
|
|
190
|
+
// to the live, JS-rendered tab content without AppleScript permissions or cookie
|
|
191
|
+
// decryption. This is the fastest and most reliable path when available.
|
|
192
|
+
async function fetchWithCDP(url, browsersWithUrl, log) {
|
|
193
|
+
const targetBase = url.split('?')[0]; // strip query for prefix match
|
|
194
|
+
// Collect candidate ports:
|
|
195
|
+
// 1. DevToolsActivePort file (written when Chrome was started with --remote-debugging-port)
|
|
196
|
+
// 2. Well-known default ports developers commonly use
|
|
197
|
+
const candidatePorts = [];
|
|
198
|
+
for (const candidate of BROWSER_CATALOGUE) {
|
|
199
|
+
if (!browsersWithUrl.has(candidate.name))
|
|
200
|
+
continue;
|
|
201
|
+
if (candidate.name === 'Safari')
|
|
202
|
+
continue; // CDP is Chromium-only
|
|
203
|
+
const portFile = path.join(candidate.userDataDir, 'DevToolsActivePort');
|
|
204
|
+
if (fs.existsSync(portFile)) {
|
|
205
|
+
try {
|
|
206
|
+
const raw = fs.readFileSync(portFile, 'utf8');
|
|
207
|
+
const port = parseInt(raw.split('\n')[0].trim(), 10);
|
|
208
|
+
if (!isNaN(port) && port > 0 && !candidatePorts.includes(port)) {
|
|
209
|
+
candidatePorts.push(port);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
catch { }
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// Always probe the most common debug ports — many developers run Chrome with
|
|
216
|
+
// --remote-debugging-port=9222 and these checks are cheap (instant refusal if closed).
|
|
217
|
+
for (const p of [9222, 9229, 9333]) {
|
|
218
|
+
if (!candidatePorts.includes(p))
|
|
219
|
+
candidatePorts.push(p);
|
|
220
|
+
}
|
|
221
|
+
for (const port of candidatePorts) {
|
|
222
|
+
// Quick HTTP probe: /json/version returns immediately if the debug endpoint is up.
|
|
223
|
+
let endpointUp = false;
|
|
224
|
+
try {
|
|
225
|
+
const probe = await axios_1.default.get(`http://localhost:${port}/json/version`, { timeout: 800 });
|
|
226
|
+
endpointUp = probe.status === 200;
|
|
227
|
+
}
|
|
228
|
+
catch {
|
|
229
|
+
// Port not listening — skip without logging noise
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
if (!endpointUp)
|
|
233
|
+
continue;
|
|
234
|
+
log.info('browser-playwright: CDP — debug endpoint found, connecting', { port });
|
|
235
|
+
let cdpBrowser = null;
|
|
236
|
+
try {
|
|
237
|
+
cdpBrowser = await playwright_core_1.default.chromium.connectOverCDP(`http://localhost:${port}`, {
|
|
238
|
+
timeout: 5000,
|
|
239
|
+
});
|
|
240
|
+
let matchedPage = null;
|
|
241
|
+
for (const context of cdpBrowser.contexts()) {
|
|
242
|
+
for (const page of context.pages()) {
|
|
243
|
+
if (page.url().startsWith(targetBase)) {
|
|
244
|
+
matchedPage = page;
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
if (matchedPage)
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
if (!matchedPage) {
|
|
252
|
+
log.debug('browser-playwright: CDP — no tab found matching URL', { port, url });
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
log.info('browser-playwright: CDP — tab found, extracting content', {
|
|
256
|
+
port,
|
|
257
|
+
tabUrl: matchedPage.url(),
|
|
258
|
+
});
|
|
259
|
+
try {
|
|
260
|
+
await matchedPage.waitForFunction(() => (document.body?.innerText ?? '').trim().length > 200, { timeout: 5000 });
|
|
261
|
+
}
|
|
262
|
+
catch {
|
|
263
|
+
// Best-effort — extract whatever is rendered so far
|
|
264
|
+
}
|
|
265
|
+
const content = await matchedPage.evaluate(() => document.body.innerText ?? document.body.textContent ?? '');
|
|
266
|
+
log.info('browser-playwright: CDP — content extracted', {
|
|
267
|
+
port,
|
|
268
|
+
contentLength: content.trim().length,
|
|
269
|
+
});
|
|
270
|
+
const trimmed = content.trim();
|
|
271
|
+
return trimmed ? { content: trimmed, finalUrl: matchedPage.url() } : null;
|
|
272
|
+
}
|
|
273
|
+
catch (err) {
|
|
274
|
+
log.warn('browser-playwright: CDP — connection failed', {
|
|
275
|
+
port,
|
|
276
|
+
error: err instanceof Error ? err.message.split('\n')[0] : String(err),
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
finally {
|
|
280
|
+
if (cdpBrowser) {
|
|
281
|
+
try {
|
|
282
|
+
await cdpBrowser.close();
|
|
283
|
+
}
|
|
284
|
+
catch { }
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
291
|
+
/**
|
|
292
|
+
* Returns true if any supported Chromium browser is currently running.
|
|
293
|
+
*/
|
|
294
|
+
function isAnyBrowserRunning() {
|
|
295
|
+
return getRunningBrowserNames().size > 0;
|
|
296
|
+
}
|
|
297
|
+
const BROWSER_APPLESCRIPT = {
|
|
298
|
+
Chrome: { appName: 'Google Chrome', jsVerb: 'execute javascript' },
|
|
299
|
+
Brave: { appName: 'Brave Browser', jsVerb: 'execute javascript' },
|
|
300
|
+
Edge: { appName: 'Microsoft Edge', jsVerb: 'execute javascript' },
|
|
301
|
+
Arc: { appName: 'Arc', jsVerb: 'execute javascript' },
|
|
302
|
+
Vivaldi: { appName: 'Vivaldi', jsVerb: 'execute javascript' },
|
|
303
|
+
Opera: { appName: 'Opera', jsVerb: 'execute javascript' },
|
|
304
|
+
Chromium: { appName: 'Chromium', jsVerb: 'execute javascript' },
|
|
305
|
+
Safari: { appName: 'Safari', jsVerb: 'do JavaScript' },
|
|
306
|
+
};
|
|
307
|
+
// ─── Tab detection ────────────────────────────────────────────────────────────
|
|
308
|
+
/**
|
|
309
|
+
* Returns the names of running browsers that are confirmed to have the given
|
|
310
|
+
* URL's hostname open in a tab, via AppleScript.
|
|
311
|
+
* Only browsers where AppleScript succeeds AND the hostname is found are included.
|
|
312
|
+
* Browsers where AppleScript fails are silently skipped (not assumed to have it open).
|
|
313
|
+
*/
|
|
314
|
+
function getBrowsersWithUrlOpen(url, log) {
|
|
315
|
+
const confirmed = new Set();
|
|
316
|
+
let targetHostname;
|
|
317
|
+
try {
|
|
318
|
+
targetHostname = new URL(url).hostname;
|
|
319
|
+
}
|
|
320
|
+
catch {
|
|
321
|
+
return confirmed;
|
|
322
|
+
}
|
|
323
|
+
const runningBrowsers = getRunningBrowserNames();
|
|
324
|
+
if (runningBrowsers.size === 0)
|
|
325
|
+
return confirmed;
|
|
326
|
+
for (const browserName of runningBrowsers) {
|
|
327
|
+
const info = BROWSER_APPLESCRIPT[browserName];
|
|
328
|
+
if (!info)
|
|
329
|
+
continue;
|
|
330
|
+
try {
|
|
331
|
+
const script = `tell application "${info.appName}" to get URL of every tab of every window`;
|
|
332
|
+
const output = (0, child_process_1.execSync)(`osascript -e '${script}'`, {
|
|
333
|
+
encoding: 'utf8',
|
|
334
|
+
timeout: 5000,
|
|
335
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
336
|
+
});
|
|
337
|
+
const found = output
|
|
338
|
+
.split(/[,\n]/)
|
|
339
|
+
.map((u) => u.trim())
|
|
340
|
+
.some((u) => {
|
|
341
|
+
try {
|
|
342
|
+
return new URL(u).hostname === targetHostname;
|
|
343
|
+
}
|
|
344
|
+
catch {
|
|
345
|
+
return false;
|
|
346
|
+
}
|
|
347
|
+
});
|
|
348
|
+
log.debug('browser-playwright: tab check', { browser: browserName, targetHostname, found });
|
|
349
|
+
if (found)
|
|
350
|
+
confirmed.add(browserName);
|
|
351
|
+
}
|
|
352
|
+
catch {
|
|
353
|
+
log.debug('browser-playwright: AppleScript tab check failed — skipping browser', {
|
|
354
|
+
browser: browserName,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return confirmed;
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Returns true if the given URL's hostname is confirmed open in any running
|
|
362
|
+
* browser tab via AppleScript. Returns false if the check cannot be performed
|
|
363
|
+
* or if no browser has the URL open.
|
|
364
|
+
*/
|
|
365
|
+
function isBrowserOpenWithUrl(url, log) {
|
|
366
|
+
return getBrowsersWithUrlOpen(url, log).size > 0;
|
|
367
|
+
}
|
|
368
|
+
// ─── Strategy 0: Live-tab AppleScript extraction ──────────────────────────────
|
|
369
|
+
//
|
|
370
|
+
// When the user already has the URL open in a browser we can pull the rendered
|
|
371
|
+
// page text directly via AppleScript — no cookie decryption, no profile copying,
|
|
372
|
+
// no headless browser launch needed. This is the most reliable strategy for
|
|
373
|
+
// authenticated pages because the live tab already holds the valid session.
|
|
374
|
+
/**
|
|
375
|
+
* Writes an AppleScript to a temp file, executes it with `osascript`, then
|
|
376
|
+
* deletes the file. Using a temp file avoids heredoc parsing issues that arise
|
|
377
|
+
* when multi-line scripts are passed inline to execSync.
|
|
378
|
+
*
|
|
379
|
+
* On failure, the thrown Error includes the osascript stderr so callers can
|
|
380
|
+
* log the actual reason (e.g. "Allow JavaScript from Apple Events is not enabled").
|
|
381
|
+
*/
|
|
382
|
+
function runAppleScript(script, timeoutMs) {
|
|
383
|
+
const tmpPath = path.join(os.tmpdir(), `omnikey-as-${Date.now()}-${Math.random().toString(36).slice(2)}.applescript`);
|
|
384
|
+
fs.writeFileSync(tmpPath, script, 'utf8');
|
|
385
|
+
try {
|
|
386
|
+
return (0, child_process_1.execSync)(`osascript "${tmpPath}"`, {
|
|
387
|
+
encoding: 'utf8',
|
|
388
|
+
timeout: timeoutMs,
|
|
389
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
catch (err) {
|
|
393
|
+
// Enrich the error with osascript's stderr so callers get the real reason.
|
|
394
|
+
const stderr = err?.stderr?.toString?.().trim() ?? '';
|
|
395
|
+
const base = err instanceof Error ? err.message : String(err);
|
|
396
|
+
const enriched = new Error(stderr ? `${base}\n${stderr}` : base);
|
|
397
|
+
throw enriched;
|
|
398
|
+
}
|
|
399
|
+
finally {
|
|
400
|
+
try {
|
|
401
|
+
fs.unlinkSync(tmpPath);
|
|
402
|
+
}
|
|
403
|
+
catch { }
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Finds the window/tab index of `url` inside `appName` via AppleScript.
|
|
408
|
+
* Returns { winIdx, tabIdx } (1-based) or null if not found.
|
|
409
|
+
*/
|
|
410
|
+
function findTabLocation(appName, url) {
|
|
411
|
+
// Strip query-string for the prefix match so deep links still resolve.
|
|
412
|
+
const urlBase = url.split('?')[0].replace(/"/g, ''); // remove double-quotes to avoid breaking AppleScript string
|
|
413
|
+
const script = [
|
|
414
|
+
`tell application "${appName}"`,
|
|
415
|
+
` repeat with wIdx from 1 to count of windows`,
|
|
416
|
+
` repeat with tIdx from 1 to count of tabs of window wIdx`,
|
|
417
|
+
` if URL of tab tIdx of window wIdx starts with "${urlBase}" then`,
|
|
418
|
+
` return (wIdx as string) & ":" & (tIdx as string)`,
|
|
419
|
+
` end if`,
|
|
420
|
+
` end repeat`,
|
|
421
|
+
` end repeat`,
|
|
422
|
+
` return ""`,
|
|
423
|
+
`end tell`,
|
|
424
|
+
].join('\n');
|
|
425
|
+
try {
|
|
426
|
+
const result = runAppleScript(script, 5000).trim();
|
|
427
|
+
if (!result)
|
|
428
|
+
return null;
|
|
429
|
+
const [w, t] = result.split(':').map(Number);
|
|
430
|
+
if (!w || !t || isNaN(w) || isNaN(t))
|
|
431
|
+
return null;
|
|
432
|
+
return { winIdx: w, tabIdx: t };
|
|
433
|
+
}
|
|
434
|
+
catch {
|
|
435
|
+
return null;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Attempts to extract the rendered text of `url` directly from an open browser
|
|
440
|
+
* tab using AppleScript JS execution. Only tries browsers confirmed to have the
|
|
441
|
+
* URL open. Returns null if the URL is not open or extraction fails.
|
|
442
|
+
*/
|
|
443
|
+
async function fetchFromRunningBrowserTab(url, browsersWithUrl, log) {
|
|
444
|
+
if (process.platform !== 'darwin' || browsersWithUrl.size === 0)
|
|
445
|
+
return null;
|
|
446
|
+
for (const browserName of browsersWithUrl) {
|
|
447
|
+
const info = BROWSER_APPLESCRIPT[browserName];
|
|
448
|
+
if (!info)
|
|
449
|
+
continue;
|
|
450
|
+
const location = findTabLocation(info.appName, url);
|
|
451
|
+
if (!location) {
|
|
452
|
+
log.debug('browser-playwright: tab location not found', { browser: browserName, url });
|
|
453
|
+
continue;
|
|
454
|
+
}
|
|
455
|
+
const { winIdx, tabIdx } = location;
|
|
456
|
+
log.info('browser-playwright: extracting content from live tab', {
|
|
457
|
+
browser: browserName,
|
|
458
|
+
winIdx,
|
|
459
|
+
tabIdx,
|
|
460
|
+
url,
|
|
461
|
+
});
|
|
462
|
+
// ── Attempt A: execute JavaScript to get the JS-rendered innerText ────────
|
|
463
|
+
// Requires "Allow JavaScript from Apple Events" in Chrome (View → Developer)
|
|
464
|
+
// or Safari (Develop → Allow JavaScript from Apple Events).
|
|
465
|
+
//
|
|
466
|
+
// Chrome ONLY allows execute javascript on the ACTIVE tab of a window, even
|
|
467
|
+
// with "Allow JavaScript from Apple Events" enabled. We must set the active
|
|
468
|
+
// tab index first, then use the `tell tab` block form (not the `in tab` form)
|
|
469
|
+
// which is more reliably dispatched by Chrome's Apple Event handler.
|
|
470
|
+
const extractJsScript = browserName === 'Safari'
|
|
471
|
+
? [
|
|
472
|
+
`tell application "${info.appName}"`,
|
|
473
|
+
` ${info.jsVerb} "document.body.innerText || document.body.textContent || ''" in tab ${tabIdx} of window ${winIdx}`,
|
|
474
|
+
`end tell`,
|
|
475
|
+
].join('\n')
|
|
476
|
+
: [
|
|
477
|
+
`tell application "${info.appName}"`,
|
|
478
|
+
` set active tab index of window ${winIdx} to ${tabIdx}`,
|
|
479
|
+
` tell tab ${tabIdx} of window ${winIdx}`,
|
|
480
|
+
` execute javascript "document.body.innerText || document.body.textContent || ''"`,
|
|
481
|
+
` end tell`,
|
|
482
|
+
`end tell`,
|
|
483
|
+
].join('\n');
|
|
484
|
+
try {
|
|
485
|
+
const content = runAppleScript(extractJsScript, 10000).trim();
|
|
486
|
+
if (content && content.length > 100) {
|
|
487
|
+
log.info('browser-playwright: live tab JS content extracted', {
|
|
488
|
+
browser: browserName,
|
|
489
|
+
url,
|
|
490
|
+
contentLength: content.length,
|
|
491
|
+
});
|
|
492
|
+
return content;
|
|
493
|
+
}
|
|
494
|
+
log.debug('browser-playwright: live tab JS content too short or empty', {
|
|
495
|
+
browser: browserName,
|
|
496
|
+
url,
|
|
497
|
+
contentLength: content.length,
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
catch (err) {
|
|
501
|
+
// The first line of err.message is "Command failed: osascript ...".
|
|
502
|
+
// The second line (from stderr) is the real reason, e.g.:
|
|
503
|
+
// "Google Chrome got an error: Allow JavaScript from Apple Events is not enabled"
|
|
504
|
+
const lines = (err instanceof Error ? err.message : String(err)).split('\n');
|
|
505
|
+
const detail = lines.find((l) => l.trim() && !l.startsWith('Command failed')) ?? lines[0];
|
|
506
|
+
log.warn('browser-playwright: live tab JS extraction failed — falling back to page source', {
|
|
507
|
+
browser: browserName,
|
|
508
|
+
url,
|
|
509
|
+
reason: detail.trim(),
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
// ── Attempt B: get source of tab (Safari only) ───────────────────────────
|
|
513
|
+
// Chrome-family does NOT expose a `source` property on tab objects via
|
|
514
|
+
// AppleScript — the only content-extraction path is `execute javascript`
|
|
515
|
+
// (Attempt A), which requires "Allow JavaScript from Apple Events".
|
|
516
|
+
// Safari exposes `source` on `document` objects (not `tab`), so we compute
|
|
517
|
+
// the global document index by counting tabs across all windows in order.
|
|
518
|
+
if (browserName !== 'Safari') {
|
|
519
|
+
log.info('browser-playwright: live tab JS execution failed — ensure "Allow JavaScript from Apple Events" is enabled (Chrome: View → Developer → Allow JavaScript from Apple Events) and restart Chrome after enabling it', {
|
|
520
|
+
browser: browserName,
|
|
521
|
+
url,
|
|
522
|
+
});
|
|
523
|
+
continue;
|
|
524
|
+
}
|
|
525
|
+
const getSourceScript = [
|
|
526
|
+
`tell application "${info.appName}"`,
|
|
527
|
+
` set docIdx to 0`,
|
|
528
|
+
` repeat with w from 1 to count of windows`,
|
|
529
|
+
` repeat with t from 1 to count of tabs of window w`,
|
|
530
|
+
` set docIdx to docIdx + 1`,
|
|
531
|
+
` if w = ${winIdx} and t = ${tabIdx} then`,
|
|
532
|
+
` return source of document docIdx`,
|
|
533
|
+
` end if`,
|
|
534
|
+
` end repeat`,
|
|
535
|
+
` end repeat`,
|
|
536
|
+
` return ""`,
|
|
537
|
+
`end tell`,
|
|
538
|
+
].join('\n');
|
|
539
|
+
try {
|
|
540
|
+
const html = runAppleScript(getSourceScript, 10000).trim();
|
|
541
|
+
if (html && html.length > 200) {
|
|
542
|
+
const text = html
|
|
543
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
544
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
545
|
+
.replace(/<[^>]+>/g, ' ')
|
|
546
|
+
.replace(/\s+/g, ' ')
|
|
547
|
+
.trim();
|
|
548
|
+
if (text.length > 100) {
|
|
549
|
+
log.info('browser-playwright: live tab page source extracted', {
|
|
550
|
+
browser: browserName,
|
|
551
|
+
url,
|
|
552
|
+
contentLength: text.length,
|
|
553
|
+
});
|
|
554
|
+
return text;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
log.debug('browser-playwright: live tab page source too short or empty', {
|
|
558
|
+
browser: browserName,
|
|
559
|
+
url,
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
const lines = (err instanceof Error ? err.message : String(err)).split('\n');
|
|
564
|
+
const detail = lines.find((l) => l.trim() && !l.startsWith('Command failed')) ?? lines[0];
|
|
565
|
+
log.warn('browser-playwright: live tab page source extraction failed', {
|
|
566
|
+
browser: browserName,
|
|
567
|
+
url,
|
|
568
|
+
reason: detail.trim(),
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
return null;
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Fetches a URL using the user's browser session.
|
|
576
|
+
*
|
|
577
|
+
* Only browsers that are confirmed (via AppleScript) to have the URL open are
|
|
578
|
+
* tried — this avoids wasting time on browsers or profiles that don't hold the
|
|
579
|
+
* active session.
|
|
580
|
+
*
|
|
581
|
+
* Strategies in order:
|
|
582
|
+
* 0. Live-tab extraction — reads content directly from the open tab via
|
|
583
|
+
* AppleScript JS execution. No cookie decryption required.
|
|
584
|
+
* 1. Cookie injection — decrypts cookies and injects into a fresh headless
|
|
585
|
+
* Chromium context (handles cookie-based auth when live tab unavailable).
|
|
586
|
+
* 2. Profile copy — copies Local Storage + IndexedDB to a temp dir (handles
|
|
587
|
+
* localStorage/sessionStorage token auth flows).
|
|
588
|
+
* 3. Safari Playwright — WebKit with injected Safari cookies (Safari only).
|
|
589
|
+
*/
|
|
590
|
+
async function fetchWithPlaywright(url, log) {
|
|
591
|
+
// Determine which browsers have the URL open right now.
|
|
592
|
+
const browsersWithUrl = getBrowsersWithUrlOpen(url, log);
|
|
593
|
+
log.info('browser-playwright: browsers with URL open', {
|
|
594
|
+
url,
|
|
595
|
+
browsers: [...browsersWithUrl],
|
|
596
|
+
});
|
|
597
|
+
// ── Strategy -1: CDP via DevToolsActivePort ──────────────────────────────
|
|
598
|
+
// Fastest path — connects directly to the live browser's JS-rendered tab.
|
|
599
|
+
// Only works when Chrome was launched with --remote-debugging-port.
|
|
600
|
+
if (browsersWithUrl.size > 0) {
|
|
601
|
+
const cdpResult = await fetchWithCDP(url, browsersWithUrl, log);
|
|
602
|
+
if (cdpResult) {
|
|
603
|
+
return cdpResult.content;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
// ── Strategy 0: extract from the live tab directly ────────────────────────
|
|
607
|
+
const liveContent = await fetchFromRunningBrowserTab(url, browsersWithUrl, log);
|
|
608
|
+
if (liveContent) {
|
|
609
|
+
return liveContent;
|
|
610
|
+
}
|
|
611
|
+
log.warn('browser-playwright: all strategies exhausted', { url });
|
|
612
|
+
return null;
|
|
613
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./web-search-provider"), exports);
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isPageAuthenticated = isPageAuthenticated;
|
|
4
|
+
const ai_client_1 = require("../ai-client");
|
|
5
|
+
const ai_client_2 = require("../ai-client");
|
|
6
|
+
const SYSTEM_PROMPT = 'You are an expert at detecting whether a web page is showing the real requested content. ' +
|
|
7
|
+
'Given a URL and the visible text content of a web page, answer "yes" if EITHER: ' +
|
|
8
|
+
'(1) The URL looks like a public resource that does not require authentication — such as documentation sites, ' +
|
|
9
|
+
'public wikis, news articles, open-source repos, package registries, developer references, or any URL whose ' +
|
|
10
|
+
'hostname/path strongly suggests publicly accessible content (e.g. docs.*, developer.*, wikipedia.org, github.com public repos, ' +
|
|
11
|
+
'stackoverflow.com, npmjs.com, medium.com, reddit.com, youtube.com, etc.). ' +
|
|
12
|
+
'(2) The page is showing the actual content that an authenticated user would see at that URL. ' +
|
|
13
|
+
'Answer "no" if the page is: a login/sign-in page, an access denied or unauthorized page, a redirect away from the requested URL, ' +
|
|
14
|
+
'a generic 404/not-found or error page that could be an auth redirect in disguise (e.g. shows a not-found message but ' +
|
|
15
|
+
'the URL was a valid authenticated route), or any page that does not correspond to the requested resource. ' +
|
|
16
|
+
'When in doubt about whether a URL is public, lean towards "yes". Reply with only one word: "yes" or "no".';
|
|
17
|
+
const PUBLIC_URL_PATTERNS = [
|
|
18
|
+
/^https?:\/\/(www\.)?github\.com\/(?!.*\/settings|.*\/account)/,
|
|
19
|
+
/^https?:\/\/(www\.)?stackoverflow\.com/,
|
|
20
|
+
/^https?:\/\/(www\.)?wikipedia\.org/,
|
|
21
|
+
/^https?:\/\/docs\./,
|
|
22
|
+
/^https?:\/\/developer\./,
|
|
23
|
+
/^https?:\/\/(www\.)?npmjs\.com/,
|
|
24
|
+
/^https?:\/\/(www\.)?pypi\.org/,
|
|
25
|
+
/^https?:\/\/(www\.)?medium\.com/,
|
|
26
|
+
/^https?:\/\/(www\.)?reddit\.com/,
|
|
27
|
+
/^https?:\/\/(www\.)?youtube\.com/,
|
|
28
|
+
/^https?:\/\/(www\.)?news\.ycombinator\.com/,
|
|
29
|
+
// Package registries & language docs
|
|
30
|
+
/^https?:\/\/(www\.)?crates\.io/,
|
|
31
|
+
/^https?:\/\/(www\.)?rubygems\.org/,
|
|
32
|
+
/^https?:\/\/(www\.)?packagist\.org/,
|
|
33
|
+
/^https?:\/\/(www\.)?pkg\.go\.dev/,
|
|
34
|
+
/^https?:\/\/(www\.)?hex\.pm/,
|
|
35
|
+
/^https?:\/\/(www\.)?nuget\.org/,
|
|
36
|
+
/^https?:\/\/(www\.)?maven\.apache\.org/,
|
|
37
|
+
/^https?:\/\/central\.sonatype\.com/,
|
|
38
|
+
// Official language & runtime docs
|
|
39
|
+
/^https?:\/\/(www\.)?python\.org/,
|
|
40
|
+
/^https?:\/\/(www\.)?rust-lang\.org/,
|
|
41
|
+
/^https?:\/\/(www\.)?golang\.org/,
|
|
42
|
+
/^https?:\/\/(www\.)?go\.dev/,
|
|
43
|
+
/^https?:\/\/(www\.)?ruby-lang\.org/,
|
|
44
|
+
/^https?:\/\/(www\.)?php\.net/,
|
|
45
|
+
/^https?:\/\/(www\.)?kotlinlang\.org/,
|
|
46
|
+
/^https?:\/\/(www\.)?swift\.org/,
|
|
47
|
+
/^https?:\/\/learn\.microsoft\.com/,
|
|
48
|
+
/^https?:\/\/msdn\.microsoft\.com/,
|
|
49
|
+
/^https?:\/\/devblogs\.microsoft\.com/,
|
|
50
|
+
/^https?:\/\/(www\.)?w3\.org/,
|
|
51
|
+
/^https?:\/\/(www\.)?w3schools\.com/,
|
|
52
|
+
/^https?:\/\/(www\.)?mdn\./,
|
|
53
|
+
/^https?:\/\/developer\.mozilla\.org/,
|
|
54
|
+
// Source code & open-source platforms
|
|
55
|
+
/^https?:\/\/(www\.)?gitlab\.com\/(?!.*\/-\/settings)/,
|
|
56
|
+
/^https?:\/\/(www\.)?bitbucket\.org\/(?!.*\/admin)/,
|
|
57
|
+
/^https?:\/\/(www\.)?sourceforge\.net/,
|
|
58
|
+
/^https?:\/\/(www\.)?codepen\.io/,
|
|
59
|
+
/^https?:\/\/(www\.)?jsfiddle\.net/,
|
|
60
|
+
/^https?:\/\/(www\.)?codesandbox\.io/,
|
|
61
|
+
// Q&A, forums & community sites
|
|
62
|
+
/^https?:\/\/(www\.)?stackexchange\.com/,
|
|
63
|
+
/^https?:\/\/(www\.)?superuser\.com/,
|
|
64
|
+
/^https?:\/\/(www\.)?serverfault\.com/,
|
|
65
|
+
/^https?:\/\/(www\.)?askubuntu\.com/,
|
|
66
|
+
/^https?:\/\/(www\.)?quora\.com/,
|
|
67
|
+
/^https?:\/\/(www\.)?dev\.to/,
|
|
68
|
+
/^https?:\/\/(www\.)?hashnode\.com/,
|
|
69
|
+
/^https?:\/\/(www\.)?lobste\.rs/,
|
|
70
|
+
// News & tech media
|
|
71
|
+
/^https?:\/\/(www\.)?techcrunch\.com/,
|
|
72
|
+
/^https?:\/\/(www\.)?theverge\.com/,
|
|
73
|
+
/^https?:\/\/(www\.)?wired\.com/,
|
|
74
|
+
/^https?:\/\/(www\.)?arstechnica\.com/,
|
|
75
|
+
/^https?:\/\/(www\.)?thenextweb\.com/,
|
|
76
|
+
/^https?:\/\/(www\.)?infoq\.com/,
|
|
77
|
+
/^https?:\/\/(www\.)?smashingmagazine\.com/,
|
|
78
|
+
/^https?:\/\/(www\.)?css-tricks\.com/,
|
|
79
|
+
// Reference & encyclopedias
|
|
80
|
+
/^https?:\/\/[a-z-]+\.wikipedia\.org/,
|
|
81
|
+
/^https?:\/\/(www\.)?wikidata\.org/,
|
|
82
|
+
/^https?:\/\/(www\.)?wikimedia\.org/,
|
|
83
|
+
/^https?:\/\/(www\.)?archive\.org/,
|
|
84
|
+
// Cloud provider public docs
|
|
85
|
+
/^https?:\/\/cloud\.google\.com\/(?!.*\/console)/,
|
|
86
|
+
/^https?:\/\/aws\.amazon\.com\/(?!(.*\/console|.*\/signin))/,
|
|
87
|
+
/^https?:\/\/(www\.)?azure\.microsoft\.com/,
|
|
88
|
+
/^https?:\/\/registry\./,
|
|
89
|
+
];
|
|
90
|
+
const AUTH_PATH_PATTERN = /[/?#](login|log-in|signin|sign-in|signup|sign-up|register|auth|authenticate|oauth|sso|saml|forgot-password|reset-password|verify|two-factor|2fa|mfa)([/?#]|$)/i;
|
|
91
|
+
function isPublicUrl(url) {
|
|
92
|
+
if (AUTH_PATH_PATTERN.test(url))
|
|
93
|
+
return false;
|
|
94
|
+
return PUBLIC_URL_PATTERNS.some((pattern) => pattern.test(url));
|
|
95
|
+
}
|
|
96
|
+
async function isPageAuthenticated(content, url, log, finalUrl) {
|
|
97
|
+
if (finalUrl) {
|
|
98
|
+
const normalize = (u) => u.replace(/#.*$/, '').replace(/\/$/, '');
|
|
99
|
+
if (normalize(finalUrl) !== normalize(url)) {
|
|
100
|
+
log.info('llm-auth-check: redirect detected, treating as not authenticated', {
|
|
101
|
+
requestUrl: url,
|
|
102
|
+
finalUrl,
|
|
103
|
+
});
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (isPublicUrl(url)) {
|
|
108
|
+
log.info('llm-auth-check: public URL, skipping auth check', { url });
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
const model = (0, ai_client_2.getDefaultModel)(ai_client_1.aiClient.getProvider(), 'fast');
|
|
112
|
+
const messages = [
|
|
113
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
114
|
+
{ role: 'user', content: `URL: ${url}\n\nPage content:\n${content}` },
|
|
115
|
+
];
|
|
116
|
+
try {
|
|
117
|
+
const result = await ai_client_1.aiClient.complete(model, messages, { temperature: 0, maxTokens: 1 });
|
|
118
|
+
const answer = result.content.trim().toLowerCase();
|
|
119
|
+
log.info('llm-auth-check: LLM response', { url, answer });
|
|
120
|
+
return answer === 'yes';
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
log.error('llm-auth-check: LLM call failed', { url, error: String(err) });
|
|
124
|
+
// If LLM call fails, default to not authorized
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
@@ -7,7 +7,9 @@ exports.MAX_TOOL_CONTENT_CHARS = exports.MAX_WEB_FETCH_BYTES = exports.WEB_SEARC
|
|
|
7
7
|
exports.executeWebSearch = executeWebSearch;
|
|
8
8
|
exports.executeTool = executeTool;
|
|
9
9
|
const axios_1 = __importDefault(require("axios"));
|
|
10
|
-
const config_1 = require("
|
|
10
|
+
const config_1 = require("../config");
|
|
11
|
+
const browser_playwright_1 = require("./browser-playwright");
|
|
12
|
+
const llm_auth_check_1 = require("./llm-auth-check");
|
|
11
13
|
exports.WEB_FETCH_TOOL = {
|
|
12
14
|
name: 'web_fetch',
|
|
13
15
|
description: "Fetch the text content of any publicly accessible URL. Use this to retrieve documentation, error references, API guides, release notes, or any web resource that would help answer the user's question.",
|
|
@@ -134,27 +136,107 @@ async function executeWebSearch(query, log) {
|
|
|
134
136
|
log.info('web_search: using DuckDuckGo (free fallback)', { query });
|
|
135
137
|
return formatSearchResults(await searchWithDuckDuckGo(query));
|
|
136
138
|
}
|
|
139
|
+
function stripHtml(raw) {
|
|
140
|
+
return raw
|
|
141
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
142
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
143
|
+
.replace(/<[^>]+>/g, ' ')
|
|
144
|
+
.replace(/\s+/g, ' ')
|
|
145
|
+
.trim();
|
|
146
|
+
}
|
|
147
|
+
const BASE_FETCH_HEADERS = {
|
|
148
|
+
'User-Agent': 'Mozilla/5.0 (compatible; OmniKeyAgent/1.0)',
|
|
149
|
+
};
|
|
150
|
+
// ── Step 1: plain HTTP fetch ──────────────────────────────────────────────────
|
|
151
|
+
async function fetchPlainHttp(url, log) {
|
|
152
|
+
try {
|
|
153
|
+
const response = await axios_1.default.get(url, {
|
|
154
|
+
timeout: 15000,
|
|
155
|
+
responseType: 'text',
|
|
156
|
+
maxContentLength: exports.MAX_WEB_FETCH_BYTES,
|
|
157
|
+
headers: BASE_FETCH_HEADERS,
|
|
158
|
+
});
|
|
159
|
+
const finalUrl = response.request?.res?.responseUrl ?? url;
|
|
160
|
+
return { html: String(response.data), authBlocked: false, finalUrl };
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
const status = axios_1.default.isAxiosError(err) ? err.response?.status : undefined;
|
|
164
|
+
log.warn('Initial fetch failed', {
|
|
165
|
+
url,
|
|
166
|
+
error: err instanceof Error ? err.message : String(err),
|
|
167
|
+
status,
|
|
168
|
+
});
|
|
169
|
+
// If a browser is running, any failure could be auth-related —
|
|
170
|
+
// sites use redirects, 302s, custom error pages, or soft-blocks
|
|
171
|
+
// rather than a clean 401/403, so checking status codes alone is
|
|
172
|
+
// unreliable. Fall through to the browser-session path instead.
|
|
173
|
+
if (isSelfHostedMacOS && (0, browser_playwright_1.isBrowserOpenWithUrl)(url, log)) {
|
|
174
|
+
return { html: null, authBlocked: true, finalUrl: url };
|
|
175
|
+
}
|
|
176
|
+
if (status === 401 || status === 403) {
|
|
177
|
+
return { html: null, authBlocked: true, finalUrl: url };
|
|
178
|
+
}
|
|
179
|
+
throw err;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// ── Step 2: LLM auth check on plain response ──────────────────────────────────
|
|
183
|
+
async function checkPlainResponseAuth(plainText, url, log, finalUrl) {
|
|
184
|
+
const authenticated = await (0, llm_auth_check_1.isPageAuthenticated)(plainText.slice(0, 5000), url, log, finalUrl);
|
|
185
|
+
if (!authenticated) {
|
|
186
|
+
log.info('web_fetch: plain response failed auth check — trying active-tab strategy', { url });
|
|
187
|
+
}
|
|
188
|
+
return authenticated;
|
|
189
|
+
}
|
|
190
|
+
// ── Step 3: active-tab extraction (self-hosted macOS only) ───────────────────
|
|
191
|
+
async function fetchFromActiveTab(url, log) {
|
|
192
|
+
log.info('web_fetch: falling back to active-tab extraction', { url });
|
|
193
|
+
return (0, browser_playwright_1.fetchWithPlaywright)(url, log);
|
|
194
|
+
}
|
|
195
|
+
const isSelfHostedMacOS = config_1.config.isSelfHosted && config_1.config.terminalPlatform === 'macos';
|
|
196
|
+
async function executeWebFetch(url, log) {
|
|
197
|
+
log.info('Executing web_fetch tool', { url });
|
|
198
|
+
// ── Step 1: plain HTTP request ────────────────────────────────────────────
|
|
199
|
+
const { html, authBlocked, finalUrl } = await fetchPlainHttp(url, log);
|
|
200
|
+
const plainText = html ? stripHtml(html) : '';
|
|
201
|
+
if (!isSelfHostedMacOS) {
|
|
202
|
+
if (authBlocked) {
|
|
203
|
+
log.warn('Error: page requires authentication. Run OmniKey in self-hosted mode on macOS to enable browser-session access.');
|
|
204
|
+
}
|
|
205
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
206
|
+
}
|
|
207
|
+
// ── Step 2 (self-hosted macOS only): LLM auth check on plain response ─────
|
|
208
|
+
let looksUnauthenticated = false;
|
|
209
|
+
if (!authBlocked && plainText) {
|
|
210
|
+
log.info('web_fetch: performing LLM auth check on plain HTTP response', { url });
|
|
211
|
+
const authenticated = await checkPlainResponseAuth(plainText, url, log, finalUrl);
|
|
212
|
+
if (authenticated) {
|
|
213
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
214
|
+
}
|
|
215
|
+
looksUnauthenticated = true;
|
|
216
|
+
}
|
|
217
|
+
// ── Step 3 (self-hosted macOS only): active-tab extraction ───────────────
|
|
218
|
+
// Only attempted when there is evidence authentication is required.
|
|
219
|
+
const needsAuth = authBlocked || looksUnauthenticated;
|
|
220
|
+
if (needsAuth) {
|
|
221
|
+
log.info('web_fetch: evidence of authentication requirement, attempting active-tab extraction', { url });
|
|
222
|
+
const activeTabText = await fetchFromActiveTab(url, log);
|
|
223
|
+
if (activeTabText) {
|
|
224
|
+
return activeTabText.slice(0, exports.MAX_TOOL_CONTENT_CHARS);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// All strategies exhausted.
|
|
228
|
+
if (authBlocked) {
|
|
229
|
+
log.warn('Error: page requires authentication. Open the page in Chrome and ensure "Allow JavaScript from Apple Events" is enabled (View → Developer → Allow JavaScript from Apple Events).');
|
|
230
|
+
}
|
|
231
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
232
|
+
}
|
|
137
233
|
async function executeTool(name, args, log) {
|
|
138
234
|
if (name === 'web_fetch') {
|
|
139
235
|
const url = args.url;
|
|
140
236
|
if (!url)
|
|
141
237
|
return 'Error: url parameter is required';
|
|
142
238
|
try {
|
|
143
|
-
|
|
144
|
-
const response = await axios_1.default.get(url, {
|
|
145
|
-
timeout: 15000,
|
|
146
|
-
responseType: 'text',
|
|
147
|
-
maxContentLength: exports.MAX_WEB_FETCH_BYTES,
|
|
148
|
-
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; OmniKeyAgent/1.0)' },
|
|
149
|
-
});
|
|
150
|
-
const text = String(response.data)
|
|
151
|
-
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
152
|
-
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
153
|
-
.replace(/<[^>]+>/g, ' ')
|
|
154
|
-
.replace(/\s+/g, ' ')
|
|
155
|
-
.trim()
|
|
156
|
-
.slice(0, exports.MAX_TOOL_CONTENT_CHARS);
|
|
157
|
-
return text || 'No content retrieved';
|
|
239
|
+
return await executeWebFetch(url, log);
|
|
158
240
|
}
|
|
159
241
|
catch (err) {
|
|
160
242
|
log.warn('web_fetch tool failed', {
|
package/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"access": "public",
|
|
5
5
|
"registry": "https://registry.npmjs.org/"
|
|
6
6
|
},
|
|
7
|
-
"version": "1.0.
|
|
7
|
+
"version": "1.0.25",
|
|
8
8
|
"description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
|
|
9
9
|
"engines": {
|
|
10
10
|
"node": ">=14.0.0",
|
|
@@ -44,7 +44,8 @@
|
|
|
44
44
|
"sqlite3": "^5.1.6",
|
|
45
45
|
"winston": "^3.19.0",
|
|
46
46
|
"ws": "^8.18.0",
|
|
47
|
-
"zod": "^4.3.6"
|
|
47
|
+
"zod": "^4.3.6",
|
|
48
|
+
"playwright-core": "^1.50.0"
|
|
48
49
|
},
|
|
49
50
|
"devDependencies": {
|
|
50
51
|
"@types/inquirer": "^9.0.9",
|