omnikey-cli 1.0.24 → 1.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend-dist/agent/agentAuth.js +4 -0
- package/backend-dist/agent/agentServer.js +253 -9
- package/backend-dist/agent/utils.js +4 -3
- package/backend-dist/authMiddleware.js +4 -0
- package/backend-dist/config.js +1 -0
- package/backend-dist/db.js +7 -0
- package/backend-dist/index.js +5 -24
- package/backend-dist/models/agentSession.js +80 -0
- package/backend-dist/subscriptionRoutes.js +4 -0
- package/backend-dist/taskInstructionRoutes.js +11 -8
- package/backend-dist/web-search/browser-playwright.js +613 -0
- package/backend-dist/web-search/index.js +17 -0
- package/backend-dist/web-search/llm-auth-check.js +127 -0
- package/backend-dist/{web-search-provider.js → web-search/web-search-provider.js} +98 -16
- package/package.json +3 -2
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isPageAuthenticated = isPageAuthenticated;
|
|
4
|
+
const ai_client_1 = require("../ai-client");
|
|
5
|
+
const ai_client_2 = require("../ai-client");
|
|
6
|
+
const SYSTEM_PROMPT = 'You are an expert at detecting whether a web page is showing the real requested content. ' +
|
|
7
|
+
'Given a URL and the visible text content of a web page, answer "yes" if EITHER: ' +
|
|
8
|
+
'(1) The URL looks like a public resource that does not require authentication — such as documentation sites, ' +
|
|
9
|
+
'public wikis, news articles, open-source repos, package registries, developer references, or any URL whose ' +
|
|
10
|
+
'hostname/path strongly suggests publicly accessible content (e.g. docs.*, developer.*, wikipedia.org, github.com public repos, ' +
|
|
11
|
+
'stackoverflow.com, npmjs.com, medium.com, reddit.com, youtube.com, etc.). ' +
|
|
12
|
+
'(2) The page is showing the actual content that an authenticated user would see at that URL. ' +
|
|
13
|
+
'Answer "no" if the page is: a login/sign-in page, an access denied or unauthorized page, a redirect away from the requested URL, ' +
|
|
14
|
+
'a generic 404/not-found or error page that could be an auth redirect in disguise (e.g. shows a not-found message but ' +
|
|
15
|
+
'the URL was a valid authenticated route), or any page that does not correspond to the requested resource. ' +
|
|
16
|
+
'When in doubt about whether a URL is public, lean towards "yes". Reply with only one word: "yes" or "no".';
|
|
17
|
+
const PUBLIC_URL_PATTERNS = [
|
|
18
|
+
/^https?:\/\/(www\.)?github\.com\/(?!.*\/settings|.*\/account)/,
|
|
19
|
+
/^https?:\/\/(www\.)?stackoverflow\.com/,
|
|
20
|
+
/^https?:\/\/(www\.)?wikipedia\.org/,
|
|
21
|
+
/^https?:\/\/docs\./,
|
|
22
|
+
/^https?:\/\/developer\./,
|
|
23
|
+
/^https?:\/\/(www\.)?npmjs\.com/,
|
|
24
|
+
/^https?:\/\/(www\.)?pypi\.org/,
|
|
25
|
+
/^https?:\/\/(www\.)?medium\.com/,
|
|
26
|
+
/^https?:\/\/(www\.)?reddit\.com/,
|
|
27
|
+
/^https?:\/\/(www\.)?youtube\.com/,
|
|
28
|
+
/^https?:\/\/(www\.)?news\.ycombinator\.com/,
|
|
29
|
+
// Package registries & language docs
|
|
30
|
+
/^https?:\/\/(www\.)?crates\.io/,
|
|
31
|
+
/^https?:\/\/(www\.)?rubygems\.org/,
|
|
32
|
+
/^https?:\/\/(www\.)?packagist\.org/,
|
|
33
|
+
/^https?:\/\/(www\.)?pkg\.go\.dev/,
|
|
34
|
+
/^https?:\/\/(www\.)?hex\.pm/,
|
|
35
|
+
/^https?:\/\/(www\.)?nuget\.org/,
|
|
36
|
+
/^https?:\/\/(www\.)?maven\.apache\.org/,
|
|
37
|
+
/^https?:\/\/central\.sonatype\.com/,
|
|
38
|
+
// Official language & runtime docs
|
|
39
|
+
/^https?:\/\/(www\.)?python\.org/,
|
|
40
|
+
/^https?:\/\/(www\.)?rust-lang\.org/,
|
|
41
|
+
/^https?:\/\/(www\.)?golang\.org/,
|
|
42
|
+
/^https?:\/\/(www\.)?go\.dev/,
|
|
43
|
+
/^https?:\/\/(www\.)?ruby-lang\.org/,
|
|
44
|
+
/^https?:\/\/(www\.)?php\.net/,
|
|
45
|
+
/^https?:\/\/(www\.)?kotlinlang\.org/,
|
|
46
|
+
/^https?:\/\/(www\.)?swift\.org/,
|
|
47
|
+
/^https?:\/\/learn\.microsoft\.com/,
|
|
48
|
+
/^https?:\/\/msdn\.microsoft\.com/,
|
|
49
|
+
/^https?:\/\/devblogs\.microsoft\.com/,
|
|
50
|
+
/^https?:\/\/(www\.)?w3\.org/,
|
|
51
|
+
/^https?:\/\/(www\.)?w3schools\.com/,
|
|
52
|
+
/^https?:\/\/(www\.)?mdn\./,
|
|
53
|
+
/^https?:\/\/developer\.mozilla\.org/,
|
|
54
|
+
// Source code & open-source platforms
|
|
55
|
+
/^https?:\/\/(www\.)?gitlab\.com\/(?!.*\/-\/settings)/,
|
|
56
|
+
/^https?:\/\/(www\.)?bitbucket\.org\/(?!.*\/admin)/,
|
|
57
|
+
/^https?:\/\/(www\.)?sourceforge\.net/,
|
|
58
|
+
/^https?:\/\/(www\.)?codepen\.io/,
|
|
59
|
+
/^https?:\/\/(www\.)?jsfiddle\.net/,
|
|
60
|
+
/^https?:\/\/(www\.)?codesandbox\.io/,
|
|
61
|
+
// Q&A, forums & community sites
|
|
62
|
+
/^https?:\/\/(www\.)?stackexchange\.com/,
|
|
63
|
+
/^https?:\/\/(www\.)?superuser\.com/,
|
|
64
|
+
/^https?:\/\/(www\.)?serverfault\.com/,
|
|
65
|
+
/^https?:\/\/(www\.)?askubuntu\.com/,
|
|
66
|
+
/^https?:\/\/(www\.)?quora\.com/,
|
|
67
|
+
/^https?:\/\/(www\.)?dev\.to/,
|
|
68
|
+
/^https?:\/\/(www\.)?hashnode\.com/,
|
|
69
|
+
/^https?:\/\/(www\.)?lobste\.rs/,
|
|
70
|
+
// News & tech media
|
|
71
|
+
/^https?:\/\/(www\.)?techcrunch\.com/,
|
|
72
|
+
/^https?:\/\/(www\.)?theverge\.com/,
|
|
73
|
+
/^https?:\/\/(www\.)?wired\.com/,
|
|
74
|
+
/^https?:\/\/(www\.)?arstechnica\.com/,
|
|
75
|
+
/^https?:\/\/(www\.)?thenextweb\.com/,
|
|
76
|
+
/^https?:\/\/(www\.)?infoq\.com/,
|
|
77
|
+
/^https?:\/\/(www\.)?smashingmagazine\.com/,
|
|
78
|
+
/^https?:\/\/(www\.)?css-tricks\.com/,
|
|
79
|
+
// Reference & encyclopedias
|
|
80
|
+
/^https?:\/\/[a-z-]+\.wikipedia\.org/,
|
|
81
|
+
/^https?:\/\/(www\.)?wikidata\.org/,
|
|
82
|
+
/^https?:\/\/(www\.)?wikimedia\.org/,
|
|
83
|
+
/^https?:\/\/(www\.)?archive\.org/,
|
|
84
|
+
// Cloud provider public docs
|
|
85
|
+
/^https?:\/\/cloud\.google\.com\/(?!.*\/console)/,
|
|
86
|
+
/^https?:\/\/aws\.amazon\.com\/(?!(.*\/console|.*\/signin))/,
|
|
87
|
+
/^https?:\/\/(www\.)?azure\.microsoft\.com/,
|
|
88
|
+
/^https?:\/\/registry\./,
|
|
89
|
+
];
|
|
90
|
+
const AUTH_PATH_PATTERN = /[/?#](login|log-in|signin|sign-in|signup|sign-up|register|auth|authenticate|oauth|sso|saml|forgot-password|reset-password|verify|two-factor|2fa|mfa)([/?#]|$)/i;
|
|
91
|
+
function isPublicUrl(url) {
|
|
92
|
+
if (AUTH_PATH_PATTERN.test(url))
|
|
93
|
+
return false;
|
|
94
|
+
return PUBLIC_URL_PATTERNS.some((pattern) => pattern.test(url));
|
|
95
|
+
}
|
|
96
|
+
async function isPageAuthenticated(content, url, log, finalUrl) {
|
|
97
|
+
if (finalUrl) {
|
|
98
|
+
const normalize = (u) => u.replace(/#.*$/, '').replace(/\/$/, '');
|
|
99
|
+
if (normalize(finalUrl) !== normalize(url)) {
|
|
100
|
+
log.info('llm-auth-check: redirect detected, treating as not authenticated', {
|
|
101
|
+
requestUrl: url,
|
|
102
|
+
finalUrl,
|
|
103
|
+
});
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (isPublicUrl(url)) {
|
|
108
|
+
log.info('llm-auth-check: public URL, skipping auth check', { url });
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
const model = (0, ai_client_2.getDefaultModel)(ai_client_1.aiClient.getProvider(), 'fast');
|
|
112
|
+
const messages = [
|
|
113
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
114
|
+
{ role: 'user', content: `URL: ${url}\n\nPage content:\n${content}` },
|
|
115
|
+
];
|
|
116
|
+
try {
|
|
117
|
+
const result = await ai_client_1.aiClient.complete(model, messages, { temperature: 0, maxTokens: 1 });
|
|
118
|
+
const answer = result.content.trim().toLowerCase();
|
|
119
|
+
log.info('llm-auth-check: LLM response', { url, answer });
|
|
120
|
+
return answer === 'yes';
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
log.error('llm-auth-check: LLM call failed', { url, error: String(err) });
|
|
124
|
+
// If LLM call fails, default to not authorized
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
@@ -7,7 +7,9 @@ exports.MAX_TOOL_CONTENT_CHARS = exports.MAX_WEB_FETCH_BYTES = exports.WEB_SEARC
|
|
|
7
7
|
exports.executeWebSearch = executeWebSearch;
|
|
8
8
|
exports.executeTool = executeTool;
|
|
9
9
|
const axios_1 = __importDefault(require("axios"));
|
|
10
|
-
const config_1 = require("
|
|
10
|
+
const config_1 = require("../config");
|
|
11
|
+
const browser_playwright_1 = require("./browser-playwright");
|
|
12
|
+
const llm_auth_check_1 = require("./llm-auth-check");
|
|
11
13
|
exports.WEB_FETCH_TOOL = {
|
|
12
14
|
name: 'web_fetch',
|
|
13
15
|
description: "Fetch the text content of any publicly accessible URL. Use this to retrieve documentation, error references, API guides, release notes, or any web resource that would help answer the user's question.",
|
|
@@ -134,27 +136,107 @@ async function executeWebSearch(query, log) {
|
|
|
134
136
|
log.info('web_search: using DuckDuckGo (free fallback)', { query });
|
|
135
137
|
return formatSearchResults(await searchWithDuckDuckGo(query));
|
|
136
138
|
}
|
|
139
|
+
function stripHtml(raw) {
|
|
140
|
+
return raw
|
|
141
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
142
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
143
|
+
.replace(/<[^>]+>/g, ' ')
|
|
144
|
+
.replace(/\s+/g, ' ')
|
|
145
|
+
.trim();
|
|
146
|
+
}
|
|
147
|
+
const BASE_FETCH_HEADERS = {
|
|
148
|
+
'User-Agent': 'Mozilla/5.0 (compatible; OmniKeyAgent/1.0)',
|
|
149
|
+
};
|
|
150
|
+
// ── Step 1: plain HTTP fetch ──────────────────────────────────────────────────
|
|
151
|
+
async function fetchPlainHttp(url, log) {
|
|
152
|
+
try {
|
|
153
|
+
const response = await axios_1.default.get(url, {
|
|
154
|
+
timeout: 15000,
|
|
155
|
+
responseType: 'text',
|
|
156
|
+
maxContentLength: exports.MAX_WEB_FETCH_BYTES,
|
|
157
|
+
headers: BASE_FETCH_HEADERS,
|
|
158
|
+
});
|
|
159
|
+
const finalUrl = response.request?.res?.responseUrl ?? url;
|
|
160
|
+
return { html: String(response.data), authBlocked: false, finalUrl };
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
const status = axios_1.default.isAxiosError(err) ? err.response?.status : undefined;
|
|
164
|
+
log.warn('Initial fetch failed', {
|
|
165
|
+
url,
|
|
166
|
+
error: err instanceof Error ? err.message : String(err),
|
|
167
|
+
status,
|
|
168
|
+
});
|
|
169
|
+
// If a browser is running, any failure could be auth-related —
|
|
170
|
+
// sites use redirects, 302s, custom error pages, or soft-blocks
|
|
171
|
+
// rather than a clean 401/403, so checking status codes alone is
|
|
172
|
+
// unreliable. Fall through to the browser-session path instead.
|
|
173
|
+
if (isSelfHostedMacOS && (0, browser_playwright_1.isBrowserOpenWithUrl)(url, log)) {
|
|
174
|
+
return { html: null, authBlocked: true, finalUrl: url };
|
|
175
|
+
}
|
|
176
|
+
if (status === 401 || status === 403) {
|
|
177
|
+
return { html: null, authBlocked: true, finalUrl: url };
|
|
178
|
+
}
|
|
179
|
+
throw err;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// ── Step 2: LLM auth check on plain response ──────────────────────────────────
|
|
183
|
+
async function checkPlainResponseAuth(plainText, url, log, finalUrl) {
|
|
184
|
+
const authenticated = await (0, llm_auth_check_1.isPageAuthenticated)(plainText.slice(0, 5000), url, log, finalUrl);
|
|
185
|
+
if (!authenticated) {
|
|
186
|
+
log.info('web_fetch: plain response failed auth check — trying active-tab strategy', { url });
|
|
187
|
+
}
|
|
188
|
+
return authenticated;
|
|
189
|
+
}
|
|
190
|
+
// ── Step 3: active-tab extraction (self-hosted macOS only) ───────────────────
|
|
191
|
+
async function fetchFromActiveTab(url, log) {
|
|
192
|
+
log.info('web_fetch: falling back to active-tab extraction', { url });
|
|
193
|
+
return (0, browser_playwright_1.fetchWithPlaywright)(url, log);
|
|
194
|
+
}
|
|
195
|
+
const isSelfHostedMacOS = config_1.config.isSelfHosted && config_1.config.terminalPlatform === 'macos';
|
|
196
|
+
async function executeWebFetch(url, log) {
|
|
197
|
+
log.info('Executing web_fetch tool', { url });
|
|
198
|
+
// ── Step 1: plain HTTP request ────────────────────────────────────────────
|
|
199
|
+
const { html, authBlocked, finalUrl } = await fetchPlainHttp(url, log);
|
|
200
|
+
const plainText = html ? stripHtml(html) : '';
|
|
201
|
+
if (!isSelfHostedMacOS) {
|
|
202
|
+
if (authBlocked) {
|
|
203
|
+
log.warn('Error: page requires authentication. Run OmniKey in self-hosted mode on macOS to enable browser-session access.');
|
|
204
|
+
}
|
|
205
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
206
|
+
}
|
|
207
|
+
// ── Step 2 (self-hosted macOS only): LLM auth check on plain response ─────
|
|
208
|
+
let looksUnauthenticated = false;
|
|
209
|
+
if (!authBlocked && plainText) {
|
|
210
|
+
log.info('web_fetch: performing LLM auth check on plain HTTP response', { url });
|
|
211
|
+
const authenticated = await checkPlainResponseAuth(plainText, url, log, finalUrl);
|
|
212
|
+
if (authenticated) {
|
|
213
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
214
|
+
}
|
|
215
|
+
looksUnauthenticated = true;
|
|
216
|
+
}
|
|
217
|
+
// ── Step 3 (self-hosted macOS only): active-tab extraction ───────────────
|
|
218
|
+
// Only attempted when there is evidence authentication is required.
|
|
219
|
+
const needsAuth = authBlocked || looksUnauthenticated;
|
|
220
|
+
if (needsAuth) {
|
|
221
|
+
log.info('web_fetch: evidence of authentication requirement, attempting active-tab extraction', { url });
|
|
222
|
+
const activeTabText = await fetchFromActiveTab(url, log);
|
|
223
|
+
if (activeTabText) {
|
|
224
|
+
return activeTabText.slice(0, exports.MAX_TOOL_CONTENT_CHARS);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// All strategies exhausted.
|
|
228
|
+
if (authBlocked) {
|
|
229
|
+
log.warn('Error: page requires authentication. Open the page in Chrome and ensure "Allow JavaScript from Apple Events" is enabled (View → Developer → Allow JavaScript from Apple Events).');
|
|
230
|
+
}
|
|
231
|
+
return plainText.slice(0, exports.MAX_TOOL_CONTENT_CHARS) || 'No content retrieved';
|
|
232
|
+
}
|
|
137
233
|
async function executeTool(name, args, log) {
|
|
138
234
|
if (name === 'web_fetch') {
|
|
139
235
|
const url = args.url;
|
|
140
236
|
if (!url)
|
|
141
237
|
return 'Error: url parameter is required';
|
|
142
238
|
try {
|
|
143
|
-
|
|
144
|
-
const response = await axios_1.default.get(url, {
|
|
145
|
-
timeout: 15000,
|
|
146
|
-
responseType: 'text',
|
|
147
|
-
maxContentLength: exports.MAX_WEB_FETCH_BYTES,
|
|
148
|
-
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; OmniKeyAgent/1.0)' },
|
|
149
|
-
});
|
|
150
|
-
const text = String(response.data)
|
|
151
|
-
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
152
|
-
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
153
|
-
.replace(/<[^>]+>/g, ' ')
|
|
154
|
-
.replace(/\s+/g, ' ')
|
|
155
|
-
.trim()
|
|
156
|
-
.slice(0, exports.MAX_TOOL_CONTENT_CHARS);
|
|
157
|
-
return text || 'No content retrieved';
|
|
239
|
+
return await executeWebFetch(url, log);
|
|
158
240
|
}
|
|
159
241
|
catch (err) {
|
|
160
242
|
log.warn('web_fetch tool failed', {
|
package/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"access": "public",
|
|
5
5
|
"registry": "https://registry.npmjs.org/"
|
|
6
6
|
},
|
|
7
|
-
"version": "1.0.
|
|
7
|
+
"version": "1.0.26",
|
|
8
8
|
"description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
|
|
9
9
|
"engines": {
|
|
10
10
|
"node": ">=14.0.0",
|
|
@@ -44,7 +44,8 @@
|
|
|
44
44
|
"sqlite3": "^5.1.6",
|
|
45
45
|
"winston": "^3.19.0",
|
|
46
46
|
"ws": "^8.18.0",
|
|
47
|
-
"zod": "^4.3.6"
|
|
47
|
+
"zod": "^4.3.6",
|
|
48
|
+
"playwright-core": "^1.50.0"
|
|
48
49
|
},
|
|
49
50
|
"devDependencies": {
|
|
50
51
|
"@types/inquirer": "^9.0.9",
|