@gajae-code/coding-agent 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/dist/types/cli/notify-cli.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +39 -2
- package/dist/types/extensibility/shared-events.d.ts +1 -0
- package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
- package/dist/types/lsp/types.d.ts +2 -0
- package/dist/types/notifications/attachment-registry.d.ts +17 -0
- package/dist/types/notifications/chat-adapters.d.ts +9 -0
- package/dist/types/notifications/config.d.ts +9 -1
- package/dist/types/notifications/engine.d.ts +59 -0
- package/dist/types/notifications/managed-daemon.d.ts +48 -0
- package/dist/types/notifications/telegram-daemon.d.ts +19 -0
- package/dist/types/notifications/threaded-inbound.d.ts +19 -0
- package/dist/types/notifications/threaded-render.d.ts +6 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/fetch.d.ts +23 -0
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/telegram-send.d.ts +32 -0
- package/dist/types/web/insane/bridge.d.ts +103 -0
- package/dist/types/web/insane/url-guard.d.ts +22 -0
- package/dist/types/web/search/provider.d.ts +18 -1
- package/dist/types/web/search/providers/insane.d.ts +53 -0
- package/dist/types/web/search/providers/text-citations.d.ts +23 -0
- package/dist/types/web/search/types.d.ts +12 -4
- package/package.json +10 -8
- package/scripts/verify-insane-vendor.ts +132 -0
- package/src/cli/args.ts +1 -1
- package/src/cli/fast-help.ts +1 -1
- package/src/cli/notify-cli.ts +152 -5
- package/src/commands/team.ts +1 -1
- package/src/config/settings-schema.ts +30 -1
- package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
- package/src/extensibility/shared-events.ts +1 -0
- package/src/gjc-runtime/launch-tmux.ts +17 -3
- package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
- package/src/gjc-runtime/ralplan-runtime.ts +2 -2
- package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
- package/src/gjc-runtime/workflow-manifest.ts +7 -2
- package/src/internal-urls/docs-index.generated.ts +7 -7
- package/src/lsp/config.ts +16 -3
- package/src/lsp/defaults.json +7 -0
- package/src/lsp/types.ts +2 -0
- package/src/modes/controllers/event-controller.ts +15 -0
- package/src/modes/interactive-mode.ts +46 -2
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/notifications/attachment-registry.ts +23 -0
- package/src/notifications/chat-adapters.ts +147 -0
- package/src/notifications/config.ts +23 -2
- package/src/notifications/engine.ts +100 -0
- package/src/notifications/index.ts +180 -38
- package/src/notifications/managed-daemon.ts +163 -0
- package/src/notifications/telegram-daemon.ts +235 -14
- package/src/notifications/threaded-inbound.ts +60 -4
- package/src/notifications/threaded-render.ts +20 -2
- package/src/session/agent-session.ts +82 -51
- package/src/tools/fetch.ts +78 -1
- package/src/tools/index.ts +3 -0
- package/src/tools/telegram-send.ts +137 -0
- package/src/web/insane/bridge.ts +350 -0
- package/src/web/insane/url-guard.ts +155 -0
- package/src/web/search/provider.ts +77 -18
- package/src/web/search/providers/anthropic.ts +70 -3
- package/src/web/search/providers/codex.ts +1 -119
- package/src/web/search/providers/gemini.ts +99 -0
- package/src/web/search/providers/insane.ts +551 -0
- package/src/web/search/providers/openai-compatible.ts +66 -32
- package/src/web/search/providers/text-citations.ts +111 -0
- package/src/web/search/types.ts +13 -2
- package/vendor/insane-search/LICENSE +21 -0
- package/vendor/insane-search/MANIFEST.json +24 -0
- package/vendor/insane-search/engine/__init__.py +23 -0
- package/vendor/insane-search/engine/__main__.py +128 -0
- package/vendor/insane-search/engine/bias_check.py +183 -0
- package/vendor/insane-search/engine/executor.py +254 -0
- package/vendor/insane-search/engine/fetch_chain.py +725 -0
- package/vendor/insane-search/engine/learning.py +175 -0
- package/vendor/insane-search/engine/phase0.py +214 -0
- package/vendor/insane-search/engine/safety.py +91 -0
- package/vendor/insane-search/engine/templates/package.json +11 -0
- package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
- package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
- package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
- package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
- package/vendor/insane-search/engine/tests/test_u1.py +200 -0
- package/vendor/insane-search/engine/tests/test_u4.py +131 -0
- package/vendor/insane-search/engine/tests/test_u5.py +163 -0
- package/vendor/insane-search/engine/tests/test_u7.py +124 -0
- package/vendor/insane-search/engine/transport.py +211 -0
- package/vendor/insane-search/engine/url_transforms.py +98 -0
- package/vendor/insane-search/engine/validators.py +331 -0
- package/vendor/insane-search/engine/waf_detector.py +214 -0
- package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bridge from TypeScript to the vendored insane-search Python engine.
|
|
3
|
+
*
|
|
4
|
+
* Invokes `python3 -m engine "<url>" --json` per fallback attempt (cwd + PYTHONPATH
|
|
5
|
+
* pointed at the vendored engine), validates the JSON envelope, and maps it onto a
|
|
6
|
+
* discriminated result. Hardened: clamped timeout, AbortSignal propagation that
|
|
7
|
+
* kills+reaps the child, bounded stdout/stderr capture, and a per-process
|
|
8
|
+
* concurrency cap so blocked reads cannot fork-storm.
|
|
9
|
+
*
|
|
10
|
+
* Fail-closed: missing dependencies / bad output / auth-required never throw past
|
|
11
|
+
* the caller and never auto-install anything; they return ok:false with a stable,
|
|
12
|
+
* bounded note so `read` can continue with its normal degraded result.
|
|
13
|
+
*/
|
|
14
|
+
import { type ChildProcess, spawn as nodeSpawn } from "node:child_process";
|
|
15
|
+
import * as path from "node:path";
|
|
16
|
+
import { fileURLToPath } from "node:url";
|
|
17
|
+
import { $which } from "@gajae-code/utils";
|
|
18
|
+
|
|
19
|
+
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
/** packages/coding-agent/vendor/insane-search */
|
|
21
|
+
export const INSANE_VENDOR_DIR = path.resolve(HERE, "../../../vendor/insane-search");
|
|
22
|
+
const TEMPLATES_DIR = path.join(INSANE_VENDOR_DIR, "engine", "templates");
|
|
23
|
+
|
|
24
|
+
const MAX_STDOUT_BYTES = 8 * 1024 * 1024;
|
|
25
|
+
const MAX_STDERR_BYTES = 64 * 1024;
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 25_000;
|
|
27
|
+
const MIN_TIMEOUT_MS = 1_000;
|
|
28
|
+
const MAX_TIMEOUT_MS = 120_000;
|
|
29
|
+
const DEFAULT_CONCURRENCY = 2;
|
|
30
|
+
const KILL_GRACE_MS = 2_000;
|
|
31
|
+
|
|
32
|
+
/** Stable note prefixes — tests assert on these without depending on full stderr. */
|
|
33
|
+
export const INSANE_NOTES = {
|
|
34
|
+
guardBlocked: (reason: string) => `insane fallback blocked: target URL is not public HTTP(S): ${reason}`,
|
|
35
|
+
vendorMissing: `insane fallback unavailable: vendor engine missing at packages/coding-agent/vendor/insane-search`,
|
|
36
|
+
noPython: `insane fallback unavailable: python3 not found; install python3 and curl_cffi, then retry with web.insaneFallback=true`,
|
|
37
|
+
noCurlCffi: `insane fallback unavailable: python3 cannot import curl_cffi; install curl_cffi for Phase 0-2`,
|
|
38
|
+
noBrowser: `insane fallback unavailable: node/playwright/stealth dependencies missing for Phase 3; install dependencies under packages/coding-agent/vendor/insane-search/engine/templates`,
|
|
39
|
+
timeout: (seconds: number) => `insane fallback timed out after ${seconds}s; normal read fallback preserved`,
|
|
40
|
+
invalidJson: `insane fallback failed: engine returned invalid JSON`,
|
|
41
|
+
authRequired: `insane fallback stopped: authentication required`,
|
|
42
|
+
verdict: (verdict: string) => `insane fallback failed: engine returned verdict=${verdict}`,
|
|
43
|
+
untried: (routes: string) => `insane fallback routes not tried: ${routes}`,
|
|
44
|
+
mustBrowserMcp: `insane fallback requires browser MCP/manual phase: must_invoke_playwright_mcp=true`,
|
|
45
|
+
concurrency: `insane fallback skipped: max concurrent engine attempts reached`,
|
|
46
|
+
emptyContent: `insane fallback failed: engine reported ok but returned no content`,
|
|
47
|
+
} as const;
|
|
48
|
+
|
|
49
|
+
/** Raw JSON envelope produced by `python3 -m engine --json`. */
|
|
50
|
+
export interface InsaneFetchResultRaw {
|
|
51
|
+
ok?: boolean;
|
|
52
|
+
verdict?: string;
|
|
53
|
+
content?: string;
|
|
54
|
+
profile_used?: string;
|
|
55
|
+
trace?: unknown;
|
|
56
|
+
untried_routes?: string[];
|
|
57
|
+
must_invoke_playwright_mcp?: boolean;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface InsaneSuccess {
|
|
61
|
+
ok: true;
|
|
62
|
+
content: string;
|
|
63
|
+
profileUsed?: string;
|
|
64
|
+
notes: string[];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface InsaneFailure {
|
|
68
|
+
ok: false;
|
|
69
|
+
reason: string;
|
|
70
|
+
verdict?: string;
|
|
71
|
+
notes: string[];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export type InsaneBridgeResult = InsaneSuccess | InsaneFailure;
|
|
75
|
+
|
|
76
|
+
export interface EngineInvocation {
|
|
77
|
+
url: string;
|
|
78
|
+
timeoutMs: number;
|
|
79
|
+
signal?: AbortSignal;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface EngineRawOutput {
|
|
83
|
+
code: number | null;
|
|
84
|
+
stdout: string;
|
|
85
|
+
stderr: string;
|
|
86
|
+
timedOut: boolean;
|
|
87
|
+
aborted: boolean;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Seam: run the engine subprocess. Default spawns python3. */
|
|
91
|
+
export type EngineRunner = (inv: EngineInvocation) => Promise<EngineRawOutput>;
|
|
92
|
+
|
|
93
|
+
export interface InsaneDependencyStatus {
|
|
94
|
+
vendorPresent: boolean;
|
|
95
|
+
python: boolean;
|
|
96
|
+
curlCffi: boolean;
|
|
97
|
+
browser: boolean;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Seam: probe dependencies. Default probes the real environment (cached). */
|
|
101
|
+
export type DependencyProber = () => Promise<InsaneDependencyStatus>;
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Subprocess runner
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
type SpawnImpl = typeof nodeSpawn;
|
|
108
|
+
|
|
109
|
+
function clampTimeoutMs(timeoutMs: number | undefined): number {
|
|
110
|
+
const value = timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
111
|
+
if (!Number.isFinite(value)) return DEFAULT_TIMEOUT_MS;
|
|
112
|
+
return Math.max(MIN_TIMEOUT_MS, Math.min(MAX_TIMEOUT_MS, Math.floor(value)));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function appendCapped(buffer: string, chunk: string, cap: number): string {
|
|
116
|
+
if (buffer.length >= cap) return buffer;
|
|
117
|
+
const remaining = cap - buffer.length;
|
|
118
|
+
return buffer + (chunk.length > remaining ? chunk.slice(0, remaining) : chunk);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/** Kill a child and its group, escalating to SIGKILL after a grace period. */
|
|
122
|
+
function killChild(child: ChildProcess): void {
|
|
123
|
+
try {
|
|
124
|
+
child.kill("SIGTERM");
|
|
125
|
+
} catch {
|
|
126
|
+
// already gone
|
|
127
|
+
}
|
|
128
|
+
const timer = setTimeout(() => {
|
|
129
|
+
try {
|
|
130
|
+
child.kill("SIGKILL");
|
|
131
|
+
} catch {
|
|
132
|
+
// already gone
|
|
133
|
+
}
|
|
134
|
+
}, KILL_GRACE_MS);
|
|
135
|
+
timer.unref?.();
|
|
136
|
+
child.once("exit", () => clearTimeout(timer));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Real engine runner: `python3 -m engine "<url>" --json`. */
|
|
140
|
+
export function runEngineSubprocess(
|
|
141
|
+
inv: EngineInvocation,
|
|
142
|
+
options: { spawnImpl?: SpawnImpl } = {},
|
|
143
|
+
): Promise<EngineRawOutput> {
|
|
144
|
+
const spawnImpl = options.spawnImpl ?? nodeSpawn;
|
|
145
|
+
return new Promise<EngineRawOutput>(resolve => {
|
|
146
|
+
let stdout = "";
|
|
147
|
+
let stderr = "";
|
|
148
|
+
let settled = false;
|
|
149
|
+
let timedOut = false;
|
|
150
|
+
let aborted = false;
|
|
151
|
+
|
|
152
|
+
const child = spawnImpl("python3", ["-m", "engine", inv.url, "--json"], {
|
|
153
|
+
cwd: INSANE_VENDOR_DIR,
|
|
154
|
+
env: { ...process.env, PYTHONPATH: INSANE_VENDOR_DIR },
|
|
155
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
const finish = (code: number | null): void => {
|
|
159
|
+
if (settled) return;
|
|
160
|
+
settled = true;
|
|
161
|
+
clearTimeout(timer);
|
|
162
|
+
inv.signal?.removeEventListener("abort", onAbort);
|
|
163
|
+
resolve({ code, stdout, stderr, timedOut, aborted });
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
const timer = setTimeout(() => {
|
|
167
|
+
timedOut = true;
|
|
168
|
+
killChild(child);
|
|
169
|
+
}, inv.timeoutMs);
|
|
170
|
+
timer.unref?.();
|
|
171
|
+
|
|
172
|
+
const onAbort = (): void => {
|
|
173
|
+
aborted = true;
|
|
174
|
+
killChild(child);
|
|
175
|
+
};
|
|
176
|
+
if (inv.signal) {
|
|
177
|
+
if (inv.signal.aborted) onAbort();
|
|
178
|
+
else inv.signal.addEventListener("abort", onAbort, { once: true });
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
child.stdout?.on("data", (chunk: Buffer) => {
|
|
182
|
+
stdout = appendCapped(stdout, chunk.toString("utf8"), MAX_STDOUT_BYTES);
|
|
183
|
+
});
|
|
184
|
+
child.stderr?.on("data", (chunk: Buffer) => {
|
|
185
|
+
stderr = appendCapped(stderr, chunk.toString("utf8"), MAX_STDERR_BYTES);
|
|
186
|
+
});
|
|
187
|
+
child.on("error", () => finish(null));
|
|
188
|
+
child.on("close", code => finish(code));
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Dependency probes (cached)
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
|
|
196
|
+
let probeCache: Promise<InsaneDependencyStatus> | null = null;
|
|
197
|
+
|
|
198
|
+
/** Reset the probe cache between tests so probe state never leaks. */
|
|
199
|
+
export function resetInsaneProbeCacheForTest(): void {
|
|
200
|
+
probeCache = null;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function runProbeCommand(cmd: string, args: string[], cwd?: string): Promise<boolean> {
|
|
204
|
+
return new Promise<boolean>(resolve => {
|
|
205
|
+
let settled = false;
|
|
206
|
+
const done = (ok: boolean): void => {
|
|
207
|
+
if (settled) return;
|
|
208
|
+
settled = true;
|
|
209
|
+
clearTimeout(timer);
|
|
210
|
+
resolve(ok);
|
|
211
|
+
};
|
|
212
|
+
const child = nodeSpawn(cmd, args, { cwd, stdio: "ignore" });
|
|
213
|
+
const timer = setTimeout(() => {
|
|
214
|
+
try {
|
|
215
|
+
child.kill("SIGKILL");
|
|
216
|
+
} catch {
|
|
217
|
+
// gone
|
|
218
|
+
}
|
|
219
|
+
done(false);
|
|
220
|
+
}, 10_000);
|
|
221
|
+
timer.unref?.();
|
|
222
|
+
child.on("error", () => done(false));
|
|
223
|
+
child.on("close", code => done(code === 0));
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
async function probeRealDependencies(): Promise<InsaneDependencyStatus> {
|
|
228
|
+
const { existsSync } = await import("node:fs");
|
|
229
|
+
const vendorPresent = existsSync(path.join(INSANE_VENDOR_DIR, "engine", "__main__.py"));
|
|
230
|
+
if (!vendorPresent) {
|
|
231
|
+
return { vendorPresent: false, python: false, curlCffi: false, browser: false };
|
|
232
|
+
}
|
|
233
|
+
const python = Boolean($which("python3"));
|
|
234
|
+
const curlCffi = python ? await runProbeCommand("python3", ["-c", "import curl_cffi"]) : false;
|
|
235
|
+
const node = Boolean($which("node"));
|
|
236
|
+
const browser = node
|
|
237
|
+
? await runProbeCommand(
|
|
238
|
+
"node",
|
|
239
|
+
[
|
|
240
|
+
"-e",
|
|
241
|
+
"require.resolve('playwright');require.resolve('playwright-extra');require.resolve('puppeteer-extra-plugin-stealth')",
|
|
242
|
+
],
|
|
243
|
+
TEMPLATES_DIR,
|
|
244
|
+
)
|
|
245
|
+
: false;
|
|
246
|
+
return { vendorPresent, python, curlCffi, browser };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Probe (and cache) the insane-search runtime dependencies. */
|
|
250
|
+
export function probeInsaneDependencies(): Promise<InsaneDependencyStatus> {
|
|
251
|
+
if (!probeCache) probeCache = probeRealDependencies();
|
|
252
|
+
return probeCache;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// ---------------------------------------------------------------------------
|
|
256
|
+
// Concurrency gate
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
let inFlight = 0;
|
|
260
|
+
|
|
261
|
+
export function resetInsaneConcurrencyForTest(): void {
|
|
262
|
+
inFlight = 0;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ---------------------------------------------------------------------------
|
|
266
|
+
// High-level bridge
|
|
267
|
+
// ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
export interface TryInsaneFetchOptions {
|
|
270
|
+
timeoutMs?: number;
|
|
271
|
+
signal?: AbortSignal;
|
|
272
|
+
concurrencyLimit?: number;
|
|
273
|
+
/** Seam: dependency prober (default real, cached). */
|
|
274
|
+
prober?: DependencyProber;
|
|
275
|
+
/** Seam: engine runner (default real subprocess). */
|
|
276
|
+
runner?: EngineRunner;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function mapEngineOutput(raw: EngineRawOutput, timeoutMs: number): InsaneBridgeResult {
|
|
280
|
+
const notes: string[] = [];
|
|
281
|
+
if (raw.aborted) {
|
|
282
|
+
return { ok: false, reason: "aborted", notes };
|
|
283
|
+
}
|
|
284
|
+
if (raw.timedOut) {
|
|
285
|
+
notes.push(INSANE_NOTES.timeout(Math.round(timeoutMs / 1000)));
|
|
286
|
+
return { ok: false, reason: "timeout", notes };
|
|
287
|
+
}
|
|
288
|
+
let parsed: InsaneFetchResultRaw;
|
|
289
|
+
try {
|
|
290
|
+
parsed = JSON.parse(raw.stdout) as InsaneFetchResultRaw;
|
|
291
|
+
} catch {
|
|
292
|
+
notes.push(INSANE_NOTES.invalidJson);
|
|
293
|
+
return { ok: false, reason: "invalid-json", notes };
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
const verdict = parsed.verdict?.trim();
|
|
297
|
+
// The engine emits the Verdict enum value `auth_required` (401/407); also tolerate
|
|
298
|
+
// the human-readable phrase defensively. Either is a terminal public-content boundary.
|
|
299
|
+
if (verdict && /^(?:auth_required|authentication required)$/i.test(verdict)) {
|
|
300
|
+
notes.push(INSANE_NOTES.authRequired);
|
|
301
|
+
return { ok: false, reason: "auth-required", verdict, notes };
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (parsed.untried_routes && parsed.untried_routes.length > 0) {
|
|
305
|
+
notes.push(INSANE_NOTES.untried(parsed.untried_routes.slice(0, 8).join(", ")));
|
|
306
|
+
}
|
|
307
|
+
if (parsed.must_invoke_playwright_mcp) {
|
|
308
|
+
notes.push(INSANE_NOTES.mustBrowserMcp);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
if (parsed.ok && typeof parsed.content === "string" && parsed.content.trim().length > 0) {
|
|
312
|
+
return { ok: true, content: parsed.content, profileUsed: parsed.profile_used, notes };
|
|
313
|
+
}
|
|
314
|
+
if (parsed.ok) {
|
|
315
|
+
notes.push(INSANE_NOTES.emptyContent);
|
|
316
|
+
return { ok: false, reason: "empty-content", notes };
|
|
317
|
+
}
|
|
318
|
+
notes.push(INSANE_NOTES.verdict(verdict || "unknown"));
|
|
319
|
+
return { ok: false, reason: "engine-failed", verdict, notes };
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Attempt to read `url` through the insane-search engine. The caller is
|
|
324
|
+
* responsible for the opt-in gate, raw-mode skip, and the public-URL guard
|
|
325
|
+
* (which MUST run before this is called). Never throws; always returns a result.
|
|
326
|
+
*/
|
|
327
|
+
export async function tryInsaneFetch(url: string, options: TryInsaneFetchOptions = {}): Promise<InsaneBridgeResult> {
|
|
328
|
+
const prober = options.prober ?? probeInsaneDependencies;
|
|
329
|
+
const runner = options.runner ?? (inv => runEngineSubprocess(inv));
|
|
330
|
+
const limit = options.concurrencyLimit ?? DEFAULT_CONCURRENCY;
|
|
331
|
+
|
|
332
|
+
const deps = await prober();
|
|
333
|
+
if (!deps.vendorPresent) return { ok: false, reason: "vendor-missing", notes: [INSANE_NOTES.vendorMissing] };
|
|
334
|
+
if (!deps.python) return { ok: false, reason: "no-python", notes: [INSANE_NOTES.noPython] };
|
|
335
|
+
if (!deps.curlCffi) return { ok: false, reason: "no-curl-cffi", notes: [INSANE_NOTES.noCurlCffi] };
|
|
336
|
+
if (!deps.browser) return { ok: false, reason: "no-browser", notes: [INSANE_NOTES.noBrowser] };
|
|
337
|
+
|
|
338
|
+
if (inFlight >= limit) {
|
|
339
|
+
return { ok: false, reason: "concurrency", notes: [INSANE_NOTES.concurrency] };
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
inFlight++;
|
|
343
|
+
try {
|
|
344
|
+
const timeoutMs = clampTimeoutMs(options.timeoutMs);
|
|
345
|
+
const raw = await runner({ url, timeoutMs, signal: options.signal });
|
|
346
|
+
return mapEngineOutput(raw, timeoutMs);
|
|
347
|
+
} finally {
|
|
348
|
+
inFlight--;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public HTTP(S) URL guard for the insane-search read fallback.
|
|
3
|
+
*
|
|
4
|
+
* The vendored insane-search engine performs its own network requests (curl_cffi,
|
|
5
|
+
* a real browser) entirely outside the TypeScript fetch path, so the normal
|
|
6
|
+
* `loadPage()` flow cannot protect against SSRF. This guard MUST run before any
|
|
7
|
+
* dependency probe or engine subprocess is spawned. It is fail-closed: anything
|
|
8
|
+
* it cannot prove is a public, non-credentialed http/https target is rejected.
|
|
9
|
+
*
|
|
10
|
+
* It does NOT follow or re-validate redirects — the engine may follow redirects
|
|
11
|
+
* internally that this guard never sees. That residual risk is documented in the
|
|
12
|
+
* plan and mitigated by validating the input target and keeping the feature
|
|
13
|
+
* opt-in (default off).
|
|
14
|
+
*/
|
|
15
|
+
import * as dns from "node:dns/promises";
|
|
16
|
+
import * as net from "node:net";
|
|
17
|
+
|
|
18
|
+
export interface PublicUrlAccepted {
|
|
19
|
+
ok: true;
|
|
20
|
+
url: URL;
|
|
21
|
+
addresses: string[];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface PublicUrlRejected {
|
|
25
|
+
ok: false;
|
|
26
|
+
reason: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export type PublicUrlResult = PublicUrlAccepted | PublicUrlRejected;
|
|
30
|
+
|
|
31
|
+
/** Resolver seam so tests can inject DNS results without real lookups. */
|
|
32
|
+
export type AddressResolver = (hostname: string) => Promise<string[]>;
|
|
33
|
+
|
|
34
|
+
const defaultResolver: AddressResolver = async hostname => {
|
|
35
|
+
const records = await dns.lookup(hostname, { all: true, verbatim: true });
|
|
36
|
+
return records.map(record => record.address);
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const BLOCKED_HOSTNAMES = new Set(["localhost", "localhost.localdomain", "0.0.0.0", ""]);
|
|
40
|
+
|
|
41
|
+
function isBlockedHostname(hostname: string): boolean {
|
|
42
|
+
const normalized = hostname.toLowerCase().replace(/\.$/, "");
|
|
43
|
+
return (
|
|
44
|
+
BLOCKED_HOSTNAMES.has(normalized) ||
|
|
45
|
+
normalized === "localhost" ||
|
|
46
|
+
normalized.endsWith(".localhost") ||
|
|
47
|
+
normalized.endsWith(".local") ||
|
|
48
|
+
normalized.endsWith(".internal") ||
|
|
49
|
+
normalized.endsWith(".home.arpa")
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function isPrivateIPv4(address: string): boolean {
|
|
54
|
+
const parts = address.split(".").map(part => Number.parseInt(part, 10));
|
|
55
|
+
if (parts.length !== 4 || parts.some(part => !Number.isInteger(part) || part < 0 || part > 255)) return true;
|
|
56
|
+
const [a, b] = parts;
|
|
57
|
+
return (
|
|
58
|
+
a === 0 || // unspecified / "this network"
|
|
59
|
+
a === 10 || // RFC1918
|
|
60
|
+
a === 127 || // loopback
|
|
61
|
+
(a === 100 && b >= 64 && b <= 127) || // CGNAT 100.64/10
|
|
62
|
+
(a === 169 && b === 254) || // link-local
|
|
63
|
+
(a === 172 && b >= 16 && b <= 31) || // RFC1918
|
|
64
|
+
(a === 192 && b === 0) || // 192.0.0/24 & 192.0.2/24 (documentation/reserved)
|
|
65
|
+
(a === 192 && b === 168) || // RFC1918
|
|
66
|
+
(a === 198 && (b === 18 || b === 19)) || // benchmarking 198.18/15
|
|
67
|
+
(a === 198 && b === 51) || // 198.51.100/24 documentation
|
|
68
|
+
(a === 203 && b === 0) || // 203.0.113/24 documentation
|
|
69
|
+
a >= 224 // multicast (224/4) + reserved (240/4) + broadcast
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function normalizeIPv4MappedIPv6(address: string): string {
|
|
74
|
+
return address.toLowerCase().startsWith("::ffff:") ? address.slice(7) : address;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function isPrivateIPv6(address: string): boolean {
|
|
78
|
+
const normalized = address.toLowerCase();
|
|
79
|
+
const mapped = normalizeIPv4MappedIPv6(normalized);
|
|
80
|
+
if (mapped !== normalized && net.isIP(mapped) === 4) return isPrivateIPv4(mapped);
|
|
81
|
+
return (
|
|
82
|
+
normalized === "::" || // unspecified
|
|
83
|
+
normalized === "::1" || // loopback
|
|
84
|
+
normalized.startsWith("fc") || // ULA fc00::/7
|
|
85
|
+
normalized.startsWith("fd") || // ULA
|
|
86
|
+
normalized.startsWith("fe8") || // link-local fe80::/10
|
|
87
|
+
normalized.startsWith("fe9") ||
|
|
88
|
+
normalized.startsWith("fea") ||
|
|
89
|
+
normalized.startsWith("feb") ||
|
|
90
|
+
normalized.startsWith("ff") || // multicast ff00::/8
|
|
91
|
+
normalized.startsWith("2001:db8") || // documentation
|
|
92
|
+
normalized.startsWith("::ffff:") // any remaining IPv4-mapped form we could not classify
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** True for any address that is not a routable public unicast address. */
|
|
97
|
+
export function isPrivateOrSpecialAddress(address: string): boolean {
|
|
98
|
+
const normalized = normalizeIPv4MappedIPv6(address);
|
|
99
|
+
const family = net.isIP(normalized);
|
|
100
|
+
if (family === 4) return isPrivateIPv4(normalized);
|
|
101
|
+
if (family === 6) return isPrivateIPv6(normalized);
|
|
102
|
+
// Re-check the raw value in case it was an IPv4-mapped IPv6 literal.
|
|
103
|
+
if (net.isIP(address) === 6) return isPrivateIPv6(address);
|
|
104
|
+
return true; // not a recognizable IP -> treat as unsafe
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Validate that `rawUrl` is a public http/https target safe to hand to the
|
|
109
|
+
* insane-search engine. Resolves DNS names and rejects any that map to a
|
|
110
|
+
* private/special address. Never throws; returns a discriminated result.
|
|
111
|
+
*/
|
|
112
|
+
export async function validatePublicHttpUrlForInsane(
|
|
113
|
+
rawUrl: string,
|
|
114
|
+
options: { resolver?: AddressResolver } = {},
|
|
115
|
+
): Promise<PublicUrlResult> {
|
|
116
|
+
const resolver = options.resolver ?? defaultResolver;
|
|
117
|
+
|
|
118
|
+
let url: URL;
|
|
119
|
+
try {
|
|
120
|
+
url = new URL(rawUrl);
|
|
121
|
+
} catch {
|
|
122
|
+
return { ok: false, reason: "invalid URL" };
|
|
123
|
+
}
|
|
124
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
125
|
+
return { ok: false, reason: `unsupported scheme ${url.protocol}` };
|
|
126
|
+
}
|
|
127
|
+
if (url.username || url.password) {
|
|
128
|
+
return { ok: false, reason: "URL credentials are not allowed" };
|
|
129
|
+
}
|
|
130
|
+
if (isBlockedHostname(url.hostname)) {
|
|
131
|
+
return { ok: false, reason: "localhost or internal host" };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const literalFamily = net.isIP(url.hostname);
|
|
135
|
+
if (literalFamily !== 0) {
|
|
136
|
+
if (isPrivateOrSpecialAddress(url.hostname)) {
|
|
137
|
+
return { ok: false, reason: "private, loopback, link-local, or reserved IP literal" };
|
|
138
|
+
}
|
|
139
|
+
return { ok: true, url, addresses: [url.hostname] };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
let addresses: string[];
|
|
143
|
+
try {
|
|
144
|
+
addresses = await resolver(url.hostname);
|
|
145
|
+
} catch {
|
|
146
|
+
return { ok: false, reason: "host could not be resolved" };
|
|
147
|
+
}
|
|
148
|
+
if (addresses.length === 0) {
|
|
149
|
+
return { ok: false, reason: "host resolved to no addresses" };
|
|
150
|
+
}
|
|
151
|
+
if (addresses.some(isPrivateOrSpecialAddress)) {
|
|
152
|
+
return { ok: false, reason: "host resolves to a private or reserved address" };
|
|
153
|
+
}
|
|
154
|
+
return { ok: true, url, addresses };
|
|
155
|
+
}
|
|
@@ -72,6 +72,11 @@ const PROVIDER_META: Record<SearchProviderId, ProviderMeta> = {
|
|
|
72
72
|
label: "DuckDuckGo",
|
|
73
73
|
load: async () => new (await import("./providers/duckduckgo")).DuckDuckGoProvider(),
|
|
74
74
|
},
|
|
75
|
+
insane: {
|
|
76
|
+
id: "insane",
|
|
77
|
+
label: "Insane",
|
|
78
|
+
load: async () => new (await import("./providers/insane")).InsaneProvider(),
|
|
79
|
+
},
|
|
75
80
|
"openai-compatible": {
|
|
76
81
|
id: "openai-compatible",
|
|
77
82
|
label: "OpenAI-compatible",
|
|
@@ -97,6 +102,7 @@ export async function getSearchProvider(id: SearchProviderId): Promise<SearchPro
|
|
|
97
102
|
|
|
98
103
|
export const SEARCH_PROVIDER_ORDER: SearchProviderId[] = [
|
|
99
104
|
"duckduckgo",
|
|
105
|
+
"insane",
|
|
100
106
|
"tavily",
|
|
101
107
|
"perplexity",
|
|
102
108
|
"brave",
|
|
@@ -234,14 +240,41 @@ export function isLocalBaseUrl(baseUrl: string | undefined): boolean {
|
|
|
234
240
|
return false;
|
|
235
241
|
}
|
|
236
242
|
|
|
243
|
+
/**
|
|
244
|
+
* Whether `baseUrl` is an official OpenAI endpoint (or absent, i.e. the default
|
|
245
|
+
* hosted OpenAI). The dedicated `codex` provider authenticates against the
|
|
246
|
+
* ChatGPT backend with the user's *local* Codex OAuth, so it must only be
|
|
247
|
+
* selected when the active model is genuinely served by OpenAI/ChatGPT — never
|
|
248
|
+
* for a custom/proxy endpoint, which should reuse its own credentials through
|
|
249
|
+
* the `openai-compatible` adapter instead.
|
|
250
|
+
*/
|
|
251
|
+
function isOpenAIOfficialBaseUrl(baseUrl: string | undefined): boolean {
|
|
252
|
+
if (!baseUrl?.trim()) return true;
|
|
253
|
+
let host: string;
|
|
254
|
+
try {
|
|
255
|
+
host = new URL(baseUrl).hostname.toLowerCase();
|
|
256
|
+
} catch {
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
return (
|
|
260
|
+
host === "api.openai.com" ||
|
|
261
|
+
host === "chatgpt.com" ||
|
|
262
|
+
host.endsWith(".openai.com") ||
|
|
263
|
+
host.endsWith(".chatgpt.com")
|
|
264
|
+
);
|
|
265
|
+
}
|
|
266
|
+
|
|
237
267
|
export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
|
|
238
268
|
if (!ctx || ctx.webSearch === "off") return undefined;
|
|
239
269
|
const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
|
|
240
270
|
if (modelId.startsWith("claude-") && isAnthropicWire(ctx.api)) return "anthropic";
|
|
241
271
|
if (modelId.startsWith("gemini-") && isGoogleWire(ctx.api)) return "gemini";
|
|
242
272
|
if (looksXaiFamilyModelId(ctx) && isOpenAICompatWire(ctx.api)) return "xai";
|
|
243
|
-
|
|
244
|
-
|
|
273
|
+
// `codex` hits the ChatGPT backend with local Codex OAuth, so only infer it
|
|
274
|
+
// for genuine OpenAI endpoints. Custom/proxy OpenAI-compatible models fall
|
|
275
|
+
// through to `activeContextNativeId` → `openai-compatible` (their own creds).
|
|
276
|
+
if (looksOpenAIFamilyModelId(ctx) && isOpenAICompatWire(ctx.api) && isOpenAIOfficialBaseUrl(ctx.baseUrl)) {
|
|
277
|
+
return "codex";
|
|
245
278
|
}
|
|
246
279
|
return undefined;
|
|
247
280
|
}
|
|
@@ -249,8 +282,9 @@ export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | und
|
|
|
249
282
|
function canUseDirectProviderMapping(ctx: ActiveSearchModelContext, id: SearchProviderId): boolean {
|
|
250
283
|
if (ctx.webSearch === "off") return false;
|
|
251
284
|
if (id !== "codex") return true;
|
|
252
|
-
|
|
253
|
-
|
|
285
|
+
// Same constraint as inference: the ChatGPT-backed codex provider is valid
|
|
286
|
+
// only for official OpenAI endpoints, not custom/proxy base URLs.
|
|
287
|
+
return isOpenAIOfficialBaseUrl(ctx.baseUrl);
|
|
254
288
|
}
|
|
255
289
|
|
|
256
290
|
export async function canUseGenericCredentials(
|
|
@@ -268,17 +302,35 @@ export async function canUseGenericCredentials(
|
|
|
268
302
|
return Boolean(key);
|
|
269
303
|
}
|
|
270
304
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
305
|
+
/**
|
|
306
|
+
* Native web-search provider to attempt by reusing the ACTIVE model's own
|
|
307
|
+
* credentials + baseUrl, dispatched by the model's wire protocol.
|
|
308
|
+
*
|
|
309
|
+
* This is the "native search over a proxy" path: when a model is served through
|
|
310
|
+
* a proxy/custom endpoint, its canonical search credentials (e.g. a dedicated
|
|
311
|
+
* `anthropic` key, or ChatGPT OAuth for `codex`) are usually absent, but the
|
|
312
|
+
* credential that authenticates the model itself — stored under the active
|
|
313
|
+
* provider id and aimed at `ctx.baseUrl` — can drive native web search just as
|
|
314
|
+
* well. Each provider's `search()` falls back to those active credentials when
|
|
315
|
+
* its canonical ones are missing.
|
|
316
|
+
*
|
|
317
|
+
* Returned ids are matched purely from the wire `api` (+ model-id family where a
|
|
318
|
+
* native tool only makes sense for that family); the providers themselves fail
|
|
319
|
+
* closed (and the chain falls through to DuckDuckGo) if the endpoint does not
|
|
320
|
+
* actually support web search.
|
|
321
|
+
*/
|
|
322
|
+
export function activeContextNativeId(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
|
|
323
|
+
if (!ctx || ctx.webSearch === "off") return undefined;
|
|
324
|
+
const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
|
|
325
|
+
// Dispatch must match exactly what each provider can service by reusing the
|
|
326
|
+
// active credential: the OpenAI-compatible adapter only speaks the two plain
|
|
327
|
+
// OpenAI wires (not azure), and the Gemini active path only speaks the public
|
|
328
|
+
// Generative Language wire (not vertex/cloud-code). Returning an id the
|
|
329
|
+
// provider would reject just wastes a guaranteed-fail attempt before DuckDuckGo.
|
|
330
|
+
if (isAnthropicWire(ctx.api) && modelId.startsWith("claude-")) return "anthropic";
|
|
331
|
+
if (ctx.api === "openai-responses" || ctx.api === "openai-completions") return "openai-compatible";
|
|
332
|
+
if (ctx.api === "google-generative-ai" && modelId.startsWith("gemini-")) return "gemini";
|
|
333
|
+
return undefined;
|
|
282
334
|
}
|
|
283
335
|
|
|
284
336
|
export async function resolveProviderChain(options: ResolveProviderChainOptions): Promise<SearchProvider[]> {
|
|
@@ -304,9 +356,16 @@ export async function resolveProviderChain(options: ResolveProviderChainOptions)
|
|
|
304
356
|
await appendAvailable(chain, directId, authStorage);
|
|
305
357
|
const inferred = inferNativeProviderFromModel(activeModelContext);
|
|
306
358
|
if (inferred) await appendAvailable(chain, inferred, authStorage);
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
359
|
+
// Native-over-proxy: when no canonical native provider was selected above,
|
|
360
|
+
// fall back to the model's own credentials (resolved under the active
|
|
361
|
+
// provider id against its baseUrl) to drive native web search. Gated on
|
|
362
|
+
// those credentials actually resolving; otherwise the chain ends at the
|
|
363
|
+
// keyless DuckDuckGo terminal fallback.
|
|
364
|
+
if (chain.length === 0) {
|
|
365
|
+
const activeNativeId = activeContextNativeId(activeModelContext);
|
|
366
|
+
if (activeNativeId && (await canUseGenericCredentials(authStorage, activeModelContext, sessionId, signal)))
|
|
367
|
+
chain.push(activeNativeId);
|
|
368
|
+
}
|
|
310
369
|
}
|
|
311
370
|
|
|
312
371
|
// Configured fallbacks are user-facing only: the internal `openai-compatible`
|