@gajae-code/coding-agent 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/dist/types/cli/mcp-cli.d.ts +25 -0
- package/dist/types/cli/notify-cli.d.ts +2 -0
- package/dist/types/cli.d.ts +6 -0
- package/dist/types/commands/mcp.d.ts +70 -0
- package/dist/types/config/keybindings.d.ts +2 -2
- package/dist/types/config/settings-schema.d.ts +39 -2
- package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
- package/dist/types/extensibility/shared-events.d.ts +1 -0
- package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
- package/dist/types/lsp/types.d.ts +2 -0
- package/dist/types/modes/components/custom-editor.d.ts +1 -1
- package/dist/types/modes/components/model-selector.d.ts +2 -0
- package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
- package/dist/types/modes/theme/defaults/index.d.ts +99 -0
- package/dist/types/notifications/attachment-registry.d.ts +17 -0
- package/dist/types/notifications/chat-adapters.d.ts +9 -0
- package/dist/types/notifications/config.d.ts +9 -1
- package/dist/types/notifications/engine.d.ts +59 -0
- package/dist/types/notifications/managed-daemon.d.ts +48 -0
- package/dist/types/notifications/operator-runtime.d.ts +52 -0
- package/dist/types/notifications/telegram-daemon.d.ts +73 -16
- package/dist/types/notifications/threaded-inbound.d.ts +19 -0
- package/dist/types/notifications/threaded-render.d.ts +6 -1
- package/dist/types/notifications/topic-registry.d.ts +2 -0
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/composer-bash-policy.d.ts +14 -0
- package/dist/types/tools/fetch.d.ts +23 -0
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/telegram-send.d.ts +32 -0
- package/dist/types/web/insane/bridge.d.ts +103 -0
- package/dist/types/web/insane/url-guard.d.ts +25 -0
- package/dist/types/web/scrapers/types.d.ts +5 -0
- package/dist/types/web/scrapers/utils.d.ts +7 -1
- package/dist/types/web/search/provider.d.ts +18 -1
- package/dist/types/web/search/providers/insane.d.ts +53 -0
- package/dist/types/web/search/providers/text-citations.d.ts +23 -0
- package/dist/types/web/search/types.d.ts +12 -4
- package/package.json +10 -8
- package/scripts/verify-insane-vendor.ts +132 -0
- package/src/cli/args.ts +1 -1
- package/src/cli/fast-help.ts +1 -1
- package/src/cli/mcp-cli.ts +272 -0
- package/src/cli/notify-cli.ts +152 -5
- package/src/cli.ts +6 -2
- package/src/commands/mcp.ts +117 -0
- package/src/commands/team.ts +1 -1
- package/src/config/keybindings.ts +2 -2
- package/src/config/settings-schema.ts +30 -1
- package/src/deep-interview/plaintext-gate-guard.ts +94 -0
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
- package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
- package/src/defaults/gjc/skills/team/SKILL.md +3 -2
- package/src/extensibility/extensions/runner.ts +1 -0
- package/src/extensibility/shared-events.ts +1 -0
- package/src/gjc-runtime/launch-tmux.ts +17 -3
- package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
- package/src/gjc-runtime/ralplan-runtime.ts +2 -2
- package/src/gjc-runtime/tmux-common.ts +3 -1
- package/src/gjc-runtime/ultragoal-guard.ts +25 -8
- package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
- package/src/gjc-runtime/workflow-manifest.ts +7 -2
- package/src/hooks/skill-state.ts +57 -0
- package/src/internal-urls/docs-index.generated.ts +14 -11
- package/src/lsp/config.ts +16 -3
- package/src/lsp/defaults.json +7 -0
- package/src/lsp/types.ts +2 -0
- package/src/modes/bridge/bridge-mode.ts +11 -0
- package/src/modes/components/custom-editor.ts +2 -0
- package/src/modes/components/footer.ts +2 -3
- package/src/modes/components/model-selector.ts +12 -0
- package/src/modes/components/status-line/git-utils.ts +25 -0
- package/src/modes/components/status-line.ts +10 -11
- package/src/modes/components/welcome.ts +2 -3
- package/src/modes/controllers/event-controller.ts +15 -0
- package/src/modes/controllers/selector-controller.ts +3 -0
- package/src/modes/interactive-mode.ts +48 -3
- package/src/modes/shared/agent-wire/scopes.ts +1 -1
- package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
- package/src/modes/theme/defaults/index.ts +2 -0
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/notifications/attachment-registry.ts +23 -0
- package/src/notifications/chat-adapters.ts +147 -0
- package/src/notifications/config.ts +23 -2
- package/src/notifications/engine.ts +100 -0
- package/src/notifications/index.ts +180 -38
- package/src/notifications/managed-daemon.ts +163 -0
- package/src/notifications/operator-runtime.ts +171 -0
- package/src/notifications/telegram-daemon.ts +553 -236
- package/src/notifications/threaded-inbound.ts +60 -4
- package/src/notifications/threaded-render.ts +20 -2
- package/src/notifications/topic-registry.ts +5 -0
- package/src/session/agent-session.ts +82 -51
- package/src/slash-commands/helpers/parse.ts +2 -1
- package/src/tools/bash.ts +9 -0
- package/src/tools/composer-bash-policy.ts +96 -0
- package/src/tools/fetch.ts +94 -1
- package/src/tools/index.ts +3 -0
- package/src/tools/telegram-send.ts +137 -0
- package/src/web/insane/bridge.ts +350 -0
- package/src/web/insane/url-guard.ts +159 -0
- package/src/web/scrapers/types.ts +143 -45
- package/src/web/scrapers/utils.ts +70 -19
- package/src/web/search/provider.ts +77 -18
- package/src/web/search/providers/anthropic.ts +70 -3
- package/src/web/search/providers/codex.ts +1 -119
- package/src/web/search/providers/gemini.ts +99 -0
- package/src/web/search/providers/insane.ts +551 -0
- package/src/web/search/providers/openai-compatible.ts +66 -32
- package/src/web/search/providers/text-citations.ts +111 -0
- package/src/web/search/types.ts +13 -2
- package/vendor/insane-search/LICENSE +21 -0
- package/vendor/insane-search/MANIFEST.json +24 -0
- package/vendor/insane-search/engine/__init__.py +23 -0
- package/vendor/insane-search/engine/__main__.py +128 -0
- package/vendor/insane-search/engine/bias_check.py +183 -0
- package/vendor/insane-search/engine/executor.py +254 -0
- package/vendor/insane-search/engine/fetch_chain.py +725 -0
- package/vendor/insane-search/engine/learning.py +175 -0
- package/vendor/insane-search/engine/phase0.py +214 -0
- package/vendor/insane-search/engine/safety.py +91 -0
- package/vendor/insane-search/engine/templates/package.json +11 -0
- package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
- package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
- package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
- package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
- package/vendor/insane-search/engine/tests/test_u1.py +200 -0
- package/vendor/insane-search/engine/tests/test_u4.py +131 -0
- package/vendor/insane-search/engine/tests/test_u5.py +163 -0
- package/vendor/insane-search/engine/tests/test_u7.py +124 -0
- package/vendor/insane-search/engine/transport.py +211 -0
- package/vendor/insane-search/engine/url_transforms.py +98 -0
- package/vendor/insane-search/engine/validators.py +331 -0
- package/vendor/insane-search/engine/waf_detector.py +214 -0
- package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
|
@@ -0,0 +1,551 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Insane Search Provider
|
|
3
|
+
*
|
|
4
|
+
* Native TypeScript, fail-closed adaptation of the MIT-licensed upstream
|
|
5
|
+
* fivetaku/insane-search public-route strategy. This ports only safe Phase 0
|
|
6
|
+
* concepts: deterministic no-auth public endpoints plus route-attempt tracing.
|
|
7
|
+
*
|
|
8
|
+
* Deliberately excluded from upstream: TLS impersonation, browser/cookie warming,
|
|
9
|
+
* CAPTCHA/paywall/login bypasses, credential storage, Playwright automation, and
|
|
10
|
+
* auto dependency installation. Unsupported or terminal auth/paywall/block states
|
|
11
|
+
* throw instead of pretending a shallow fetch succeeded.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { AuthStorage } from "@gajae-code/ai";
|
|
15
|
+
|
|
16
|
+
import type { SearchResponse, SearchSource } from "../../../web/search/types";
|
|
17
|
+
import { SearchProviderError } from "../../../web/search/types";
|
|
18
|
+
import { clampNumResults } from "../utils";
|
|
19
|
+
import type { SearchParams } from "./base";
|
|
20
|
+
import { SearchProvider } from "./base";
|
|
21
|
+
import { searchDuckDuckGo } from "./duckduckgo";
|
|
22
|
+
import { withHardTimeout } from "./utils";
|
|
23
|
+
|
|
24
|
+
const DEFAULT_NUM_RESULTS = 10;
|
|
25
|
+
const MAX_NUM_RESULTS = 20;
|
|
26
|
+
const PUBLIC_ROUTE_TIMEOUT_MS = 15_000;
|
|
27
|
+
const DISCOVERY_LIMIT = 8;
|
|
28
|
+
|
|
29
|
+
const USER_AGENT = "Gajae-Code insane-search safe-public-routes/1.0 (+https://github.com/Yeachan-Heo/gajae-code)";
|
|
30
|
+
|
|
31
|
+
const BLOCK_MARKERS = [
|
|
32
|
+
"access denied",
|
|
33
|
+
"attention required! | cloudflare",
|
|
34
|
+
"captcha",
|
|
35
|
+
"cf-chl-bypass",
|
|
36
|
+
"checking your browser",
|
|
37
|
+
"datadome",
|
|
38
|
+
"just a moment...",
|
|
39
|
+
"login required",
|
|
40
|
+
"paywall",
|
|
41
|
+
"please enable js",
|
|
42
|
+
"request unsuccessful. incapsula",
|
|
43
|
+
"sec-if-cpt-container",
|
|
44
|
+
"sign in to continue",
|
|
45
|
+
"the requested url was rejected",
|
|
46
|
+
] as const;
|
|
47
|
+
|
|
48
|
+
const HTML_ENTITY_MAP: Record<string, string> = {
|
|
49
|
+
amp: "&",
|
|
50
|
+
apos: "'",
|
|
51
|
+
gt: ">",
|
|
52
|
+
lt: "<",
|
|
53
|
+
nbsp: " ",
|
|
54
|
+
quot: '"',
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export interface InsaneRouteAttempt {
|
|
58
|
+
platform: InsanePlatform;
|
|
59
|
+
route: string;
|
|
60
|
+
ok: boolean;
|
|
61
|
+
status: number;
|
|
62
|
+
bytes: number;
|
|
63
|
+
note?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
type InsanePlatform = "reddit" | "x" | "youtube" | "hackernews";
|
|
67
|
+
|
|
68
|
+
interface RouteSuccess {
|
|
69
|
+
platform: InsanePlatform;
|
|
70
|
+
route: string;
|
|
71
|
+
finalUrl: string;
|
|
72
|
+
sources: SearchSource[];
|
|
73
|
+
attempts: InsaneRouteAttempt[];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
interface RouteFailure {
|
|
77
|
+
platform: InsanePlatform;
|
|
78
|
+
attempts: InsaneRouteAttempt[];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
type RouteResult = RouteSuccess | RouteFailure | null;
|
|
82
|
+
|
|
83
|
+
function decodeEntities(input: string): string {
|
|
84
|
+
return input
|
|
85
|
+
.replace(/&(#x[0-9a-f]+|#\d+|[a-z]+);/gi, (match, entity: string) => {
|
|
86
|
+
if (entity.startsWith("#x")) return String.fromCodePoint(Number.parseInt(entity.slice(2), 16));
|
|
87
|
+
if (entity.startsWith("#")) return String.fromCodePoint(Number.parseInt(entity.slice(1), 10));
|
|
88
|
+
return HTML_ENTITY_MAP[entity.toLowerCase()] ?? match;
|
|
89
|
+
})
|
|
90
|
+
.replace(/\s+/g, " ")
|
|
91
|
+
.trim();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function stripTags(input: string): string {
|
|
95
|
+
return decodeEntities(
|
|
96
|
+
input
|
|
97
|
+
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
|
98
|
+
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
|
99
|
+
.replace(/<[^>]+>/g, " "),
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function hostnameWithoutWww(url: URL): string {
|
|
104
|
+
const host = url.hostname.toLowerCase();
|
|
105
|
+
return host.startsWith("www.") ? host.slice(4) : host;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function parseHttpUrl(raw: string): URL | null {
|
|
109
|
+
try {
|
|
110
|
+
const url = new URL(raw.trim());
|
|
111
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") return null;
|
|
112
|
+
if (url.username || url.password) return null;
|
|
113
|
+
return url;
|
|
114
|
+
} catch {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function detectPlatform(url: URL): InsanePlatform | null {
|
|
120
|
+
const host = hostnameWithoutWww(url);
|
|
121
|
+
if (host === "redd.it" || host === "reddit.com" || host.endsWith(".reddit.com")) return "reddit";
|
|
122
|
+
if (host === "x.com" || host.endsWith(".x.com") || host === "twitter.com" || host.endsWith(".twitter.com"))
|
|
123
|
+
return "x";
|
|
124
|
+
if (host === "youtu.be" || host === "youtube.com" || host.endsWith(".youtube.com")) return "youtube";
|
|
125
|
+
if (host === "news.ycombinator.com" || host === "hn.algolia.com") return "hackernews";
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function isBlockedBody(text: string): boolean {
|
|
130
|
+
const lower = text.toLowerCase();
|
|
131
|
+
return BLOCK_MARKERS.some(marker => lower.includes(marker));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function isSuccess(result: RouteResult): result is RouteSuccess {
|
|
135
|
+
return result !== null && "sources" in result;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function attempt(
|
|
139
|
+
platform: InsanePlatform,
|
|
140
|
+
route: string,
|
|
141
|
+
ok: boolean,
|
|
142
|
+
status: number,
|
|
143
|
+
body: string,
|
|
144
|
+
note?: string,
|
|
145
|
+
): InsaneRouteAttempt {
|
|
146
|
+
return { platform, route, ok, status, bytes: new TextEncoder().encode(body).byteLength, note };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async function fetchText(
|
|
150
|
+
url: string,
|
|
151
|
+
signal?: AbortSignal,
|
|
152
|
+
): Promise<{ status: number; text: string; contentType: string }> {
|
|
153
|
+
const response = await fetch(url, {
|
|
154
|
+
headers: {
|
|
155
|
+
Accept: "application/json, application/atom+xml, application/rss+xml, text/xml, text/html;q=0.8, */*;q=0.5",
|
|
156
|
+
"User-Agent": USER_AGENT,
|
|
157
|
+
},
|
|
158
|
+
redirect: "follow",
|
|
159
|
+
signal: withHardTimeout(signal, PUBLIC_ROUTE_TIMEOUT_MS),
|
|
160
|
+
});
|
|
161
|
+
const text = await response.text();
|
|
162
|
+
return { status: response.status, text, contentType: response.headers.get("content-type") ?? "" };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function attr(input: string, name: string): string | undefined {
|
|
166
|
+
const re = new RegExp(`${name}=["']([^"']+)["']`, "i");
|
|
167
|
+
return input.match(re)?.[1];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function firstTag(input: string, tag: string): string | undefined {
|
|
171
|
+
return input.match(new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)<\\/${tag}>`, "i"))?.[1];
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function tagText(input: string, tag: string): string | undefined {
|
|
175
|
+
const raw = firstTag(input, tag);
|
|
176
|
+
return raw === undefined ? undefined : stripTags(raw);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function parseFeedEntries(xml: string, fallbackUrl: string): SearchSource[] {
|
|
180
|
+
const sources: SearchSource[] = [];
|
|
181
|
+
for (const entry of xml.matchAll(/<entry\b[\s\S]*?<\/entry>/gi)) {
|
|
182
|
+
const chunk = entry[0];
|
|
183
|
+
const linkTag = chunk.match(/<link\b[^>]*>/i)?.[0] ?? "";
|
|
184
|
+
const href = attr(linkTag, "href") ?? tagText(chunk, "link");
|
|
185
|
+
const title = tagText(chunk, "title");
|
|
186
|
+
if (!title || !href) continue;
|
|
187
|
+
sources.push({
|
|
188
|
+
title,
|
|
189
|
+
url: href,
|
|
190
|
+
snippet: tagText(chunk, "summary") ?? tagText(chunk, "content"),
|
|
191
|
+
publishedDate: tagText(chunk, "updated") ?? tagText(chunk, "published"),
|
|
192
|
+
author: tagText(chunk, "name"),
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
for (const item of xml.matchAll(/<item\b[\s\S]*?<\/item>/gi)) {
|
|
196
|
+
const chunk = item[0];
|
|
197
|
+
const title = tagText(chunk, "title");
|
|
198
|
+
const link = tagText(chunk, "link") ?? fallbackUrl;
|
|
199
|
+
if (!title) continue;
|
|
200
|
+
sources.push({
|
|
201
|
+
title,
|
|
202
|
+
url: link,
|
|
203
|
+
snippet: tagText(chunk, "description"),
|
|
204
|
+
publishedDate: tagText(chunk, "pubDate"),
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
return sources;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function redditFeedUrls(url: URL): string[] {
|
|
211
|
+
const base = `${url.origin}${url.pathname}`.replace(/\/+$/, "");
|
|
212
|
+
if (/\/comments\//.test(url.pathname)) return [`${base}.rss`];
|
|
213
|
+
return [`${base}/.rss`, `${base}.rss`];
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
async function routeReddit(url: URL, signal?: AbortSignal): Promise<RouteResult> {
|
|
217
|
+
const attempts: InsaneRouteAttempt[] = [];
|
|
218
|
+
for (const feedUrl of redditFeedUrls(url)) {
|
|
219
|
+
try {
|
|
220
|
+
const response = await fetchText(feedUrl, signal);
|
|
221
|
+
const ok = response.status === 200 && /<(feed|rss)\b/i.test(response.text) && !isBlockedBody(response.text);
|
|
222
|
+
attempts.push(attempt("reddit", "rss", ok, response.status, response.text, ok ? "feed" : "no-feed-markers"));
|
|
223
|
+
if (ok)
|
|
224
|
+
return {
|
|
225
|
+
platform: "reddit",
|
|
226
|
+
route: "rss",
|
|
227
|
+
finalUrl: feedUrl,
|
|
228
|
+
sources: parseFeedEntries(response.text, feedUrl),
|
|
229
|
+
attempts,
|
|
230
|
+
};
|
|
231
|
+
} catch (error) {
|
|
232
|
+
attempts.push(attempt("reddit", "rss", false, 0, "", error instanceof Error ? error.name : "fetch_error"));
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return { platform: "reddit", attempts };
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function tweetId(url: URL): string | null {
|
|
239
|
+
return url.pathname.match(/\/status(?:es)?\/(\d+)/)?.[1] ?? null;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function xHandle(url: URL): string | null {
|
|
243
|
+
const handle = url.pathname.split("/").filter(Boolean)[0]?.replace(/^@/, "");
|
|
244
|
+
if (!handle) return null;
|
|
245
|
+
const reserved = new Set(["explore", "hashtag", "home", "i", "messages", "notifications", "search", "settings"]);
|
|
246
|
+
return reserved.has(handle.toLowerCase()) ? null : handle;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function sourceFromTweetJson(raw: string, url: string): SearchSource | null {
|
|
250
|
+
const data = JSON.parse(raw) as {
|
|
251
|
+
text?: string;
|
|
252
|
+
user?: { name?: string; screen_name?: string };
|
|
253
|
+
created_at?: string;
|
|
254
|
+
};
|
|
255
|
+
if (!data.text) return null;
|
|
256
|
+
const author = data.user?.screen_name ? `@${data.user.screen_name}` : data.user?.name;
|
|
257
|
+
return {
|
|
258
|
+
title: author ? `${author}: ${data.text.slice(0, 80)}` : data.text.slice(0, 80),
|
|
259
|
+
url,
|
|
260
|
+
snippet: data.text,
|
|
261
|
+
publishedDate: data.created_at,
|
|
262
|
+
author,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function sourceFromOEmbed(raw: string, url: string): SearchSource | null {
|
|
267
|
+
const data = JSON.parse(raw) as { title?: string; author_name?: string; html?: string; url?: string };
|
|
268
|
+
const snippet = data.html ? stripTags(data.html) : undefined;
|
|
269
|
+
const title = data.title ?? snippet?.slice(0, 100) ?? data.author_name;
|
|
270
|
+
if (!title) return null;
|
|
271
|
+
return { title, url: data.url ?? url, snippet, author: data.author_name };
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
async function routeX(url: URL, signal?: AbortSignal): Promise<RouteResult> {
|
|
275
|
+
const attempts: InsaneRouteAttempt[] = [];
|
|
276
|
+
const id = tweetId(url);
|
|
277
|
+
if (id) {
|
|
278
|
+
const tweetResultUrl = `https://cdn.syndication.twimg.com/tweet-result?id=${encodeURIComponent(id)}&token=a`;
|
|
279
|
+
try {
|
|
280
|
+
const response = await fetchText(tweetResultUrl, signal);
|
|
281
|
+
const source =
|
|
282
|
+
response.status === 200 && !isBlockedBody(response.text)
|
|
283
|
+
? sourceFromTweetJson(response.text, url.toString())
|
|
284
|
+
: null;
|
|
285
|
+
attempts.push(
|
|
286
|
+
attempt("x", "tweet-result", !!source, response.status, response.text, source ? "has-text" : "no-text"),
|
|
287
|
+
);
|
|
288
|
+
if (source)
|
|
289
|
+
return { platform: "x", route: "tweet-result", finalUrl: tweetResultUrl, sources: [source], attempts };
|
|
290
|
+
} catch (error) {
|
|
291
|
+
attempts.push(attempt("x", "tweet-result", false, 0, "", error instanceof Error ? error.name : "fetch_error"));
|
|
292
|
+
}
|
|
293
|
+
const oembedUrl = `https://publish.twitter.com/oembed?url=${encodeURIComponent(`https://twitter.com/i/status/${id}`)}&omit_script=1`;
|
|
294
|
+
try {
|
|
295
|
+
const response = await fetchText(oembedUrl, signal);
|
|
296
|
+
const source =
|
|
297
|
+
response.status === 200 && !isBlockedBody(response.text)
|
|
298
|
+
? sourceFromOEmbed(response.text, oembedUrl)
|
|
299
|
+
: null;
|
|
300
|
+
attempts.push(
|
|
301
|
+
attempt("x", "oembed", !!source, response.status, response.text, source ? "has-html" : "no-html"),
|
|
302
|
+
);
|
|
303
|
+
if (source) return { platform: "x", route: "oembed", finalUrl: oembedUrl, sources: [source], attempts };
|
|
304
|
+
} catch (error) {
|
|
305
|
+
attempts.push(attempt("x", "oembed", false, 0, "", error instanceof Error ? error.name : "fetch_error"));
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
const handle = xHandle(url);
|
|
309
|
+
if (handle) {
|
|
310
|
+
const timelineUrl = `https://syndication.twitter.com/srv/timeline-profile/screen-name/${encodeURIComponent(handle)}`;
|
|
311
|
+
try {
|
|
312
|
+
const response = await fetchText(timelineUrl, signal);
|
|
313
|
+
const ok = response.status === 200 && response.text.includes("__NEXT_DATA__") && !isBlockedBody(response.text);
|
|
314
|
+
attempts.push(
|
|
315
|
+
attempt("x", "syndication-timeline", ok, response.status, response.text, ok ? "timeline" : "no-next-data"),
|
|
316
|
+
);
|
|
317
|
+
if (ok)
|
|
318
|
+
return {
|
|
319
|
+
platform: "x",
|
|
320
|
+
route: "syndication-timeline",
|
|
321
|
+
finalUrl: timelineUrl,
|
|
322
|
+
sources: parseTimelineHtml(response.text, timelineUrl),
|
|
323
|
+
attempts,
|
|
324
|
+
};
|
|
325
|
+
} catch (error) {
|
|
326
|
+
attempts.push(
|
|
327
|
+
attempt("x", "syndication-timeline", false, 0, "", error instanceof Error ? error.name : "fetch_error"),
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return { platform: "x", attempts };
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function parseTimelineHtml(html: string, url: string): SearchSource[] {
|
|
335
|
+
const title = tagText(html, "title") ?? "X public timeline";
|
|
336
|
+
const text = stripTags(html).slice(0, 500);
|
|
337
|
+
return [{ title, url, snippet: text || undefined }];
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function youtubeVideoId(url: URL): string | null {
|
|
341
|
+
if (hostnameWithoutWww(url) === "youtu.be") return url.pathname.split("/").filter(Boolean)[0] ?? null;
|
|
342
|
+
return url.searchParams.get("v") ?? url.pathname.match(/\/shorts\/([^/?#]+)/)?.[1] ?? null;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
async function routeYouTube(url: URL, signal?: AbortSignal): Promise<RouteResult> {
|
|
346
|
+
const attempts: InsaneRouteAttempt[] = [];
|
|
347
|
+
const videoId = youtubeVideoId(url);
|
|
348
|
+
if (videoId) {
|
|
349
|
+
const watchUrl = `https://www.youtube.com/watch?v=${encodeURIComponent(videoId)}`;
|
|
350
|
+
const oembedUrl = `https://www.youtube.com/oembed?url=${encodeURIComponent(watchUrl)}&format=json`;
|
|
351
|
+
try {
|
|
352
|
+
const response = await fetchText(oembedUrl, signal);
|
|
353
|
+
const source =
|
|
354
|
+
response.status === 200 && !isBlockedBody(response.text) ? sourceFromOEmbed(response.text, watchUrl) : null;
|
|
355
|
+
attempts.push(
|
|
356
|
+
attempt("youtube", "oembed", !!source, response.status, response.text, source ? "metadata" : "no-metadata"),
|
|
357
|
+
);
|
|
358
|
+
if (source) return { platform: "youtube", route: "oembed", finalUrl: oembedUrl, sources: [source], attempts };
|
|
359
|
+
} catch (error) {
|
|
360
|
+
attempts.push(attempt("youtube", "oembed", false, 0, "", error instanceof Error ? error.name : "fetch_error"));
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
const channelId = url.pathname.match(/\/channel\/([^/?#]+)/)?.[1];
|
|
364
|
+
if (channelId) {
|
|
365
|
+
const feedUrl = `https://www.youtube.com/feeds/videos.xml?channel_id=${encodeURIComponent(channelId)}`;
|
|
366
|
+
try {
|
|
367
|
+
const response = await fetchText(feedUrl, signal);
|
|
368
|
+
const ok = response.status === 200 && /<feed\b/i.test(response.text) && !isBlockedBody(response.text);
|
|
369
|
+
attempts.push(attempt("youtube", "feed", ok, response.status, response.text, ok ? "feed" : "no-feed"));
|
|
370
|
+
if (ok)
|
|
371
|
+
return {
|
|
372
|
+
platform: "youtube",
|
|
373
|
+
route: "feed",
|
|
374
|
+
finalUrl: feedUrl,
|
|
375
|
+
sources: parseFeedEntries(response.text, feedUrl),
|
|
376
|
+
attempts,
|
|
377
|
+
};
|
|
378
|
+
} catch (error) {
|
|
379
|
+
attempts.push(attempt("youtube", "feed", false, 0, "", error instanceof Error ? error.name : "fetch_error"));
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
return { platform: "youtube", attempts };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function hnItemId(url: URL): string | null {
|
|
386
|
+
return url.searchParams.get("id") ?? url.pathname.match(/item\?id=(\d+)/)?.[1] ?? null;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function hnSourceFromItem(raw: string): SearchSource | null {
|
|
390
|
+
const data = JSON.parse(raw) as {
|
|
391
|
+
by?: string;
|
|
392
|
+
descendants?: number;
|
|
393
|
+
id?: number;
|
|
394
|
+
score?: number;
|
|
395
|
+
text?: string;
|
|
396
|
+
time?: number;
|
|
397
|
+
title?: string;
|
|
398
|
+
url?: string;
|
|
399
|
+
};
|
|
400
|
+
if (!data.title && !data.text) return null;
|
|
401
|
+
const discussionUrl = data.id ? `https://news.ycombinator.com/item?id=${data.id}` : "https://news.ycombinator.com/";
|
|
402
|
+
const parts = [
|
|
403
|
+
data.score === undefined ? undefined : `${data.score} points`,
|
|
404
|
+
data.descendants === undefined ? undefined : `${data.descendants} comments`,
|
|
405
|
+
].filter(Boolean);
|
|
406
|
+
return {
|
|
407
|
+
title: data.title ?? stripTags(data.text ?? "").slice(0, 100),
|
|
408
|
+
url: data.url ?? discussionUrl,
|
|
409
|
+
snippet: [stripTags(data.text ?? ""), parts.join(" · ")].filter(Boolean).join(" — ") || undefined,
|
|
410
|
+
publishedDate: data.time ? new Date(data.time * 1000).toISOString() : undefined,
|
|
411
|
+
author: data.by,
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
async function routeHackerNews(url: URL, signal?: AbortSignal): Promise<RouteResult> {
|
|
416
|
+
const attempts: InsaneRouteAttempt[] = [];
|
|
417
|
+
const id = hnItemId(url);
|
|
418
|
+
if (!id) return { platform: "hackernews", attempts };
|
|
419
|
+
const itemUrl = `https://hacker-news.firebaseio.com/v0/item/${encodeURIComponent(id)}.json`;
|
|
420
|
+
try {
|
|
421
|
+
const response = await fetchText(itemUrl, signal);
|
|
422
|
+
const source = response.status === 200 && !isBlockedBody(response.text) ? hnSourceFromItem(response.text) : null;
|
|
423
|
+
attempts.push(
|
|
424
|
+
attempt("hackernews", "firebase-item", !!source, response.status, response.text, source ? "item" : "no-item"),
|
|
425
|
+
);
|
|
426
|
+
if (source)
|
|
427
|
+
return { platform: "hackernews", route: "firebase-item", finalUrl: itemUrl, sources: [source], attempts };
|
|
428
|
+
} catch (error) {
|
|
429
|
+
attempts.push(
|
|
430
|
+
attempt("hackernews", "firebase-item", false, 0, "", error instanceof Error ? error.name : "fetch_error"),
|
|
431
|
+
);
|
|
432
|
+
}
|
|
433
|
+
return { platform: "hackernews", attempts };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
export async function routeInsanePublicUrl(rawUrl: string, signal?: AbortSignal): Promise<RouteResult> {
|
|
437
|
+
const url = parseHttpUrl(rawUrl);
|
|
438
|
+
if (!url) return null;
|
|
439
|
+
const platform = detectPlatform(url);
|
|
440
|
+
if (platform === "reddit") return routeReddit(url, signal);
|
|
441
|
+
if (platform === "x") return routeX(url, signal);
|
|
442
|
+
if (platform === "youtube") return routeYouTube(url, signal);
|
|
443
|
+
if (platform === "hackernews") return routeHackerNews(url, signal);
|
|
444
|
+
return null;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
function discoveryQuery(query: string): string {
|
|
448
|
+
const lower = query.toLowerCase();
|
|
449
|
+
if (/\breddit\b/.test(lower)) return `${query} site:reddit.com`;
|
|
450
|
+
if (/\b(x|twitter)\b/.test(lower)) return `${query} site:x.com OR site:twitter.com`;
|
|
451
|
+
if (/\byoutube\b/.test(lower)) return `${query} site:youtube.com OR site:youtu.be`;
|
|
452
|
+
if (/\b(hacker news|hn)\b/.test(lower)) return `${query} site:news.ycombinator.com`;
|
|
453
|
+
return query;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function routeSummary(attempts: InsaneRouteAttempt[]): string {
|
|
457
|
+
const tried = attempts
|
|
458
|
+
.map(item => `${item.platform}/${item.route}:${item.status || item.note || "error"}`)
|
|
459
|
+
.join(", ");
|
|
460
|
+
return tried ? `insane public-route attempts: ${tried}` : "insane public-route attempts: none";
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
function withRouteSnippet(source: SearchSource, route: RouteSuccess): SearchSource {
|
|
464
|
+
const routeNote = `via ${route.platform}/${route.route}`;
|
|
465
|
+
return {
|
|
466
|
+
...source,
|
|
467
|
+
snippet: source.snippet ? `${routeNote}: ${source.snippet}` : routeNote,
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/** Execute safe Insane Search public-route discovery. */
|
|
472
|
+
export async function searchInsane(params: {
|
|
473
|
+
query: string;
|
|
474
|
+
num_results?: number;
|
|
475
|
+
recency?: "day" | "week" | "month" | "year";
|
|
476
|
+
signal?: AbortSignal;
|
|
477
|
+
}): Promise<SearchResponse> {
|
|
478
|
+
const numResults = clampNumResults(params.num_results, DEFAULT_NUM_RESULTS, MAX_NUM_RESULTS);
|
|
479
|
+
const direct = await routeInsanePublicUrl(params.query, params.signal);
|
|
480
|
+
if (isSuccess(direct) && direct.sources.length > 0) {
|
|
481
|
+
return {
|
|
482
|
+
provider: "insane",
|
|
483
|
+
sources: direct.sources.slice(0, numResults).map(source => withRouteSnippet(source, direct)),
|
|
484
|
+
searchQueries: [routeSummary(direct.attempts)],
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
if (direct && "attempts" in direct) {
|
|
488
|
+
throw new SearchProviderError("insane", `insane: public routes failed closed (${routeSummary(direct.attempts)})`);
|
|
489
|
+
}
|
|
490
|
+
if (parseHttpUrl(params.query)) {
|
|
491
|
+
throw new SearchProviderError(
|
|
492
|
+
"insane",
|
|
493
|
+
"insane: no supported public route found; unsafe upstream TLS/browser/auth bypasses are intentionally disabled",
|
|
494
|
+
);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const discovery = await searchDuckDuckGo({
|
|
498
|
+
query: discoveryQuery(params.query),
|
|
499
|
+
num_results: Math.min(DISCOVERY_LIMIT, Math.max(numResults, 3)),
|
|
500
|
+
recency: params.recency,
|
|
501
|
+
signal: params.signal,
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
const attempts: InsaneRouteAttempt[] = [];
|
|
505
|
+
const routedSources: SearchSource[] = [];
|
|
506
|
+
const seenUrls = new Set<string>();
|
|
507
|
+
for (const candidate of discovery.sources) {
|
|
508
|
+
if (routedSources.length >= numResults) break;
|
|
509
|
+
const routed = await routeInsanePublicUrl(candidate.url, params.signal);
|
|
510
|
+
if (routed && "attempts" in routed) attempts.push(...routed.attempts);
|
|
511
|
+
if (!isSuccess(routed)) continue;
|
|
512
|
+
for (const source of routed.sources) {
|
|
513
|
+
if (routedSources.length >= numResults) break;
|
|
514
|
+
if (seenUrls.has(source.url)) continue;
|
|
515
|
+
seenUrls.add(source.url);
|
|
516
|
+
routedSources.push(withRouteSnippet(source, routed));
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
if (routedSources.length > 0) {
|
|
521
|
+
return { provider: "insane", sources: routedSources, searchQueries: [routeSummary(attempts)] };
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (attempts.length > 0) {
|
|
525
|
+
throw new SearchProviderError("insane", `insane: public routes failed closed (${routeSummary(attempts)})`);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
throw new SearchProviderError(
|
|
529
|
+
"insane",
|
|
530
|
+
"insane: no supported public route found; unsafe upstream TLS/browser/auth bypasses are intentionally disabled",
|
|
531
|
+
);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
/** Keyless provider that ports safe upstream public-route fallbacks only. */
|
|
535
|
+
export class InsaneProvider extends SearchProvider {
|
|
536
|
+
readonly id = "insane";
|
|
537
|
+
readonly label = "Insane";
|
|
538
|
+
|
|
539
|
+
isAvailable(_authStorage: AuthStorage): boolean {
|
|
540
|
+
return true;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
search(params: SearchParams): Promise<SearchResponse> {
|
|
544
|
+
return searchInsane({
|
|
545
|
+
query: params.query,
|
|
546
|
+
num_results: params.numSearchResults ?? params.limit,
|
|
547
|
+
recency: params.recency,
|
|
548
|
+
signal: params.signal,
|
|
549
|
+
});
|
|
550
|
+
}
|
|
551
|
+
}
|
|
@@ -2,8 +2,22 @@ import type { SearchCitation, SearchResponse, SearchSource } from "../types";
|
|
|
2
2
|
import { SearchProviderError } from "../types";
|
|
3
3
|
import type { SearchParams } from "./base";
|
|
4
4
|
import { SearchProvider } from "./base";
|
|
5
|
+
import { extractTextSources } from "./text-citations";
|
|
5
6
|
import { classifyProviderHttpError, withHardTimeout } from "./utils";
|
|
6
7
|
|
|
8
|
+
/**
|
|
9
|
+
* Whether the response carries independent proof that a web search ran. Used to
|
|
10
|
+
* gate inline-citation recovery so a stray prose URL in a non-search answer is
|
|
11
|
+
* never promoted to a citation.
|
|
12
|
+
*/
|
|
13
|
+
function webSearchPerformed(json: any): boolean {
|
|
14
|
+
if (Array.isArray(json?.output) && json.output.some((item: any) => item?.type === "web_search_call")) {
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
const numRequests = json?.tool_usage?.web_search?.num_requests;
|
|
18
|
+
return typeof numRequests === "number" && numRequests > 0;
|
|
19
|
+
}
|
|
20
|
+
|
|
7
21
|
function endpoint(baseUrl: string, api: string): string {
|
|
8
22
|
const base = baseUrl.replace(/\/+$/, "");
|
|
9
23
|
return api === "openai-completions" ? `${base}/chat/completions` : `${base}/responses`;
|
|
@@ -94,35 +108,44 @@ export class OpenAICompatibleSearchProvider extends SearchProvider {
|
|
|
94
108
|
});
|
|
95
109
|
if (!apiKey) throw new SearchProviderError(this.id, `No credentials for ${ctx.provider}`, 401);
|
|
96
110
|
const model = ctx.wireModelId ?? ctx.modelId;
|
|
111
|
+
const baseUrl = ctx.baseUrl ?? "";
|
|
97
112
|
const headers = { ...(ctx.headers ?? {}), Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" };
|
|
98
|
-
const
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
113
|
+
const messages = [
|
|
114
|
+
{ role: "system", content: params.systemPrompt },
|
|
115
|
+
{ role: "user", content: params.query },
|
|
116
|
+
];
|
|
117
|
+
const responsesBody = {
|
|
118
|
+
model,
|
|
119
|
+
input: messages,
|
|
120
|
+
tools: [{ type: "web_search" }],
|
|
121
|
+
temperature: params.temperature,
|
|
122
|
+
max_output_tokens: params.maxOutputTokens,
|
|
123
|
+
};
|
|
124
|
+
const chatBody = {
|
|
125
|
+
model,
|
|
126
|
+
messages,
|
|
127
|
+
web_search_options: {},
|
|
128
|
+
temperature: params.temperature,
|
|
129
|
+
max_tokens: params.maxOutputTokens,
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
const post = (api: "openai-responses" | "openai-completions", payload: unknown) =>
|
|
133
|
+
fetch(endpoint(baseUrl, api), {
|
|
134
|
+
method: "POST",
|
|
135
|
+
headers,
|
|
136
|
+
body: JSON.stringify(payload),
|
|
137
|
+
signal: withHardTimeout(params.signal),
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// Web search is a Responses-API capability: many OpenAI-compatible
|
|
141
|
+
// endpoints (incl. proxies fronting chat-only models) only ground search
|
|
142
|
+
// through `/responses`, while `/chat/completions` answers from the model's
|
|
143
|
+
// stale knowledge. Prefer `/responses` regardless of the model's chat wire,
|
|
144
|
+
// and fall back to `/chat/completions` only when `/responses` is absent.
|
|
145
|
+
let response = await post("openai-responses", responsesBody);
|
|
146
|
+
if (response.status === 404 || response.status === 405) {
|
|
147
|
+
response = await post("openai-completions", chatBody);
|
|
148
|
+
}
|
|
126
149
|
const text = await response.text();
|
|
127
150
|
if (!response.ok) {
|
|
128
151
|
const classified = classifyProviderHttpError(this.id, response.status, text);
|
|
@@ -135,14 +158,25 @@ export class OpenAICompatibleSearchProvider extends SearchProvider {
|
|
|
135
158
|
}
|
|
136
159
|
const json = text ? JSON.parse(text) : {};
|
|
137
160
|
const citations = parseCitations(json);
|
|
138
|
-
|
|
161
|
+
const answer = textFromResponse(json);
|
|
162
|
+
const limit = params.limit ?? params.numSearchResults ?? 10;
|
|
163
|
+
let sources = toSources(citations, limit);
|
|
164
|
+
const searched = webSearchPerformed(json);
|
|
165
|
+
// Recover inline-cited sources only when a search demonstrably ran
|
|
166
|
+
// (Responses `web_search_call` / `tool_usage.web_search`). This refuses to
|
|
167
|
+
// promote a model's guessed prose URLs from a non-search answer — exactly
|
|
168
|
+
// what a chat endpoint that ignores `web_search_options` returns.
|
|
169
|
+
if (sources.length === 0 && searched && answer) {
|
|
170
|
+
sources = extractTextSources(answer).slice(0, limit);
|
|
171
|
+
}
|
|
172
|
+
if (sources.length === 0 && !searched) {
|
|
139
173
|
throw new SearchProviderError(this.id, "OpenAI-compatible web search returned no citations", 424);
|
|
140
174
|
}
|
|
141
175
|
return {
|
|
142
176
|
provider: this.id,
|
|
143
|
-
answer
|
|
144
|
-
sources
|
|
145
|
-
citations,
|
|
177
|
+
answer,
|
|
178
|
+
sources,
|
|
179
|
+
citations: citations.length > 0 ? citations : undefined,
|
|
146
180
|
model,
|
|
147
181
|
requestId: json.id,
|
|
148
182
|
authMode: "api-key",
|