@gajae-code/coding-agent 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/dist/types/cli/mcp-cli.d.ts +25 -0
- package/dist/types/cli/notify-cli.d.ts +2 -0
- package/dist/types/cli.d.ts +6 -0
- package/dist/types/commands/mcp.d.ts +70 -0
- package/dist/types/config/keybindings.d.ts +2 -2
- package/dist/types/config/settings-schema.d.ts +39 -2
- package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
- package/dist/types/extensibility/shared-events.d.ts +1 -0
- package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
- package/dist/types/lsp/types.d.ts +2 -0
- package/dist/types/modes/components/custom-editor.d.ts +1 -1
- package/dist/types/modes/components/model-selector.d.ts +2 -0
- package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
- package/dist/types/modes/theme/defaults/index.d.ts +99 -0
- package/dist/types/notifications/attachment-registry.d.ts +17 -0
- package/dist/types/notifications/chat-adapters.d.ts +9 -0
- package/dist/types/notifications/config.d.ts +9 -1
- package/dist/types/notifications/engine.d.ts +59 -0
- package/dist/types/notifications/managed-daemon.d.ts +48 -0
- package/dist/types/notifications/operator-runtime.d.ts +52 -0
- package/dist/types/notifications/telegram-daemon.d.ts +73 -16
- package/dist/types/notifications/threaded-inbound.d.ts +19 -0
- package/dist/types/notifications/threaded-render.d.ts +6 -1
- package/dist/types/notifications/topic-registry.d.ts +2 -0
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/composer-bash-policy.d.ts +14 -0
- package/dist/types/tools/fetch.d.ts +23 -0
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/telegram-send.d.ts +32 -0
- package/dist/types/web/insane/bridge.d.ts +103 -0
- package/dist/types/web/insane/url-guard.d.ts +25 -0
- package/dist/types/web/scrapers/types.d.ts +5 -0
- package/dist/types/web/scrapers/utils.d.ts +7 -1
- package/dist/types/web/search/provider.d.ts +18 -1
- package/dist/types/web/search/providers/insane.d.ts +53 -0
- package/dist/types/web/search/providers/text-citations.d.ts +23 -0
- package/dist/types/web/search/types.d.ts +12 -4
- package/package.json +10 -8
- package/scripts/verify-insane-vendor.ts +132 -0
- package/src/cli/args.ts +1 -1
- package/src/cli/fast-help.ts +1 -1
- package/src/cli/mcp-cli.ts +272 -0
- package/src/cli/notify-cli.ts +152 -5
- package/src/cli.ts +6 -2
- package/src/commands/mcp.ts +117 -0
- package/src/commands/team.ts +1 -1
- package/src/config/keybindings.ts +2 -2
- package/src/config/settings-schema.ts +30 -1
- package/src/deep-interview/plaintext-gate-guard.ts +94 -0
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
- package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
- package/src/defaults/gjc/skills/team/SKILL.md +3 -2
- package/src/extensibility/extensions/runner.ts +1 -0
- package/src/extensibility/shared-events.ts +1 -0
- package/src/gjc-runtime/launch-tmux.ts +17 -3
- package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
- package/src/gjc-runtime/ralplan-runtime.ts +2 -2
- package/src/gjc-runtime/tmux-common.ts +3 -1
- package/src/gjc-runtime/ultragoal-guard.ts +25 -8
- package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
- package/src/gjc-runtime/workflow-manifest.ts +7 -2
- package/src/hooks/skill-state.ts +57 -0
- package/src/internal-urls/docs-index.generated.ts +14 -11
- package/src/lsp/config.ts +16 -3
- package/src/lsp/defaults.json +7 -0
- package/src/lsp/types.ts +2 -0
- package/src/modes/bridge/bridge-mode.ts +11 -0
- package/src/modes/components/custom-editor.ts +2 -0
- package/src/modes/components/footer.ts +2 -3
- package/src/modes/components/model-selector.ts +12 -0
- package/src/modes/components/status-line/git-utils.ts +25 -0
- package/src/modes/components/status-line.ts +10 -11
- package/src/modes/components/welcome.ts +2 -3
- package/src/modes/controllers/event-controller.ts +15 -0
- package/src/modes/controllers/selector-controller.ts +3 -0
- package/src/modes/interactive-mode.ts +48 -3
- package/src/modes/shared/agent-wire/scopes.ts +1 -1
- package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
- package/src/modes/theme/defaults/index.ts +2 -0
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/notifications/attachment-registry.ts +23 -0
- package/src/notifications/chat-adapters.ts +147 -0
- package/src/notifications/config.ts +23 -2
- package/src/notifications/engine.ts +100 -0
- package/src/notifications/index.ts +180 -38
- package/src/notifications/managed-daemon.ts +163 -0
- package/src/notifications/operator-runtime.ts +171 -0
- package/src/notifications/telegram-daemon.ts +553 -236
- package/src/notifications/threaded-inbound.ts +60 -4
- package/src/notifications/threaded-render.ts +20 -2
- package/src/notifications/topic-registry.ts +5 -0
- package/src/session/agent-session.ts +82 -51
- package/src/slash-commands/helpers/parse.ts +2 -1
- package/src/tools/bash.ts +9 -0
- package/src/tools/composer-bash-policy.ts +96 -0
- package/src/tools/fetch.ts +94 -1
- package/src/tools/index.ts +3 -0
- package/src/tools/telegram-send.ts +137 -0
- package/src/web/insane/bridge.ts +350 -0
- package/src/web/insane/url-guard.ts +159 -0
- package/src/web/scrapers/types.ts +143 -45
- package/src/web/scrapers/utils.ts +70 -19
- package/src/web/search/provider.ts +77 -18
- package/src/web/search/providers/anthropic.ts +70 -3
- package/src/web/search/providers/codex.ts +1 -119
- package/src/web/search/providers/gemini.ts +99 -0
- package/src/web/search/providers/insane.ts +551 -0
- package/src/web/search/providers/openai-compatible.ts +66 -32
- package/src/web/search/providers/text-citations.ts +111 -0
- package/src/web/search/types.ts +13 -2
- package/vendor/insane-search/LICENSE +21 -0
- package/vendor/insane-search/MANIFEST.json +24 -0
- package/vendor/insane-search/engine/__init__.py +23 -0
- package/vendor/insane-search/engine/__main__.py +128 -0
- package/vendor/insane-search/engine/bias_check.py +183 -0
- package/vendor/insane-search/engine/executor.py +254 -0
- package/vendor/insane-search/engine/fetch_chain.py +725 -0
- package/vendor/insane-search/engine/learning.py +175 -0
- package/vendor/insane-search/engine/phase0.py +214 -0
- package/vendor/insane-search/engine/safety.py +91 -0
- package/vendor/insane-search/engine/templates/package.json +11 -0
- package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
- package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
- package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
- package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
- package/vendor/insane-search/engine/tests/test_u1.py +200 -0
- package/vendor/insane-search/engine/tests/test_u4.py +131 -0
- package/vendor/insane-search/engine/tests/test_u5.py +163 -0
- package/vendor/insane-search/engine/tests/test_u7.py +124 -0
- package/vendor/insane-search/engine/transport.py +211 -0
- package/vendor/insane-search/engine/url_transforms.py +98 -0
- package/vendor/insane-search/engine/validators.py +331 -0
- package/vendor/insane-search/engine/waf_detector.py +214 -0
- package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public HTTP(S) URL guard for user-supplied web fetch targets.
|
|
3
|
+
*
|
|
4
|
+
* Network-capable URL readers MUST run this guard before the first request and
|
|
5
|
+
* before following any redirect target. It is fail-closed: anything it cannot
|
|
6
|
+
* prove is a public, non-credentialed http/https target is rejected.
|
|
7
|
+
*
|
|
8
|
+
* The vendored insane-search engine performs its own redirects outside the
|
|
9
|
+
* TypeScript fetch path, so its fallback remains opt-in and is guarded before
|
|
10
|
+
* any dependency probe or engine subprocess is spawned.
|
|
11
|
+
*/
|
|
12
|
+
import * as dns from "node:dns/promises";
|
|
13
|
+
import * as net from "node:net";
|
|
14
|
+
|
|
15
|
+
export interface PublicUrlAccepted {
|
|
16
|
+
ok: true;
|
|
17
|
+
url: URL;
|
|
18
|
+
addresses: string[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface PublicUrlRejected {
|
|
22
|
+
ok: false;
|
|
23
|
+
reason: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type PublicUrlResult = PublicUrlAccepted | PublicUrlRejected;
|
|
27
|
+
|
|
28
|
+
/** Resolver seam so tests can inject DNS results without real lookups. */
|
|
29
|
+
export type AddressResolver = (hostname: string) => Promise<string[]>;
|
|
30
|
+
|
|
31
|
+
const defaultResolver: AddressResolver = async hostname => {
|
|
32
|
+
const records = await dns.lookup(hostname, { all: true, verbatim: true });
|
|
33
|
+
return records.map(record => record.address);
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const BLOCKED_HOSTNAMES = new Set(["localhost", "localhost.localdomain", "0.0.0.0", ""]);
|
|
37
|
+
|
|
38
|
+
function isBlockedHostname(hostname: string): boolean {
|
|
39
|
+
const normalized = hostname.toLowerCase().replace(/\.$/, "");
|
|
40
|
+
return (
|
|
41
|
+
BLOCKED_HOSTNAMES.has(normalized) ||
|
|
42
|
+
normalized === "localhost" ||
|
|
43
|
+
normalized.endsWith(".localhost") ||
|
|
44
|
+
normalized.endsWith(".local") ||
|
|
45
|
+
normalized.endsWith(".internal") ||
|
|
46
|
+
normalized.endsWith(".home.arpa")
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function isPrivateIPv4(address: string): boolean {
|
|
51
|
+
const parts = address.split(".").map(part => Number.parseInt(part, 10));
|
|
52
|
+
if (parts.length !== 4 || parts.some(part => !Number.isInteger(part) || part < 0 || part > 255)) return true;
|
|
53
|
+
const [a, b] = parts;
|
|
54
|
+
return (
|
|
55
|
+
a === 0 || // unspecified / "this network"
|
|
56
|
+
a === 10 || // RFC1918
|
|
57
|
+
a === 127 || // loopback
|
|
58
|
+
(a === 100 && b >= 64 && b <= 127) || // CGNAT 100.64/10
|
|
59
|
+
(a === 169 && b === 254) || // link-local
|
|
60
|
+
(a === 172 && b >= 16 && b <= 31) || // RFC1918
|
|
61
|
+
(a === 192 && b === 0) || // 192.0.0/24 & 192.0.2/24 (documentation/reserved)
|
|
62
|
+
(a === 192 && b === 168) || // RFC1918
|
|
63
|
+
(a === 198 && (b === 18 || b === 19)) || // benchmarking 198.18/15
|
|
64
|
+
(a === 198 && b === 51) || // 198.51.100/24 documentation
|
|
65
|
+
(a === 203 && b === 0) || // 203.0.113/24 documentation
|
|
66
|
+
a >= 224 // multicast (224/4) + reserved (240/4) + broadcast
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function normalizeIPv4MappedIPv6(address: string): string {
|
|
71
|
+
return address.toLowerCase().startsWith("::ffff:") ? address.slice(7) : address;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function isPrivateIPv6(address: string): boolean {
|
|
75
|
+
const normalized = address.toLowerCase();
|
|
76
|
+
const mapped = normalizeIPv4MappedIPv6(normalized);
|
|
77
|
+
if (mapped !== normalized && net.isIP(mapped) === 4) return isPrivateIPv4(mapped);
|
|
78
|
+
return (
|
|
79
|
+
normalized === "::" || // unspecified
|
|
80
|
+
normalized === "::1" || // loopback
|
|
81
|
+
normalized.startsWith("fc") || // ULA fc00::/7
|
|
82
|
+
normalized.startsWith("fd") || // ULA
|
|
83
|
+
normalized.startsWith("fe8") || // link-local fe80::/10
|
|
84
|
+
normalized.startsWith("fe9") ||
|
|
85
|
+
normalized.startsWith("fea") ||
|
|
86
|
+
normalized.startsWith("feb") ||
|
|
87
|
+
normalized.startsWith("ff") || // multicast ff00::/8
|
|
88
|
+
normalized.startsWith("2001:db8") || // documentation
|
|
89
|
+
normalized.startsWith("::ffff:") // any remaining IPv4-mapped form we could not classify
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** True for any address that is not a routable public unicast address. */
|
|
94
|
+
export function isPrivateOrSpecialAddress(address: string): boolean {
|
|
95
|
+
const normalized = normalizeIPv4MappedIPv6(address);
|
|
96
|
+
const family = net.isIP(normalized);
|
|
97
|
+
if (family === 4) return isPrivateIPv4(normalized);
|
|
98
|
+
if (family === 6) return isPrivateIPv6(normalized);
|
|
99
|
+
// Re-check the raw value in case it was an IPv4-mapped IPv6 literal.
|
|
100
|
+
if (net.isIP(address) === 6) return isPrivateIPv6(address);
|
|
101
|
+
return true; // not a recognizable IP -> treat as unsafe
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Validate that `rawUrl` is a public http/https target. Resolves DNS names and
|
|
106
|
+
* rejects any that map to a private/special address. Never throws; returns a
|
|
107
|
+
* discriminated result.
|
|
108
|
+
*/
|
|
109
|
+
export async function validatePublicHttpUrl(
|
|
110
|
+
rawUrl: string,
|
|
111
|
+
options: { resolver?: AddressResolver } = {},
|
|
112
|
+
): Promise<PublicUrlResult> {
|
|
113
|
+
const resolver = options.resolver ?? defaultResolver;
|
|
114
|
+
|
|
115
|
+
let url: URL;
|
|
116
|
+
try {
|
|
117
|
+
url = new URL(rawUrl);
|
|
118
|
+
} catch {
|
|
119
|
+
return { ok: false, reason: "invalid URL" };
|
|
120
|
+
}
|
|
121
|
+
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
|
122
|
+
return { ok: false, reason: `unsupported scheme ${url.protocol}` };
|
|
123
|
+
}
|
|
124
|
+
if (url.username || url.password) {
|
|
125
|
+
return { ok: false, reason: "URL credentials are not allowed" };
|
|
126
|
+
}
|
|
127
|
+
if (isBlockedHostname(url.hostname)) {
|
|
128
|
+
return { ok: false, reason: "localhost or internal host" };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const literalFamily = net.isIP(url.hostname);
|
|
132
|
+
if (literalFamily !== 0) {
|
|
133
|
+
if (isPrivateOrSpecialAddress(url.hostname)) {
|
|
134
|
+
return { ok: false, reason: "private, loopback, link-local, or reserved IP literal" };
|
|
135
|
+
}
|
|
136
|
+
return { ok: true, url, addresses: [url.hostname] };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
let addresses: string[];
|
|
140
|
+
try {
|
|
141
|
+
addresses = await resolver(url.hostname);
|
|
142
|
+
} catch {
|
|
143
|
+
return { ok: false, reason: "host could not be resolved" };
|
|
144
|
+
}
|
|
145
|
+
if (addresses.length === 0) {
|
|
146
|
+
return { ok: false, reason: "host resolved to no addresses" };
|
|
147
|
+
}
|
|
148
|
+
if (addresses.some(isPrivateOrSpecialAddress)) {
|
|
149
|
+
return { ok: false, reason: "host resolves to a private or reserved address" };
|
|
150
|
+
}
|
|
151
|
+
return { ok: true, url, addresses };
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export async function validatePublicHttpUrlForInsane(
|
|
155
|
+
rawUrl: string,
|
|
156
|
+
options: { resolver?: AddressResolver } = {},
|
|
157
|
+
): Promise<PublicUrlResult> {
|
|
158
|
+
return validatePublicHttpUrl(rawUrl, options);
|
|
159
|
+
}
|
|
@@ -6,6 +6,8 @@ import type TurndownService from "turndown";
|
|
|
6
6
|
|
|
7
7
|
import type { AgentStorage } from "../../session/agent-storage";
|
|
8
8
|
import { ToolAbortError } from "../../tools/tool-errors";
|
|
9
|
+
import type { AddressResolver } from "../insane/url-guard";
|
|
10
|
+
import { validatePublicHttpUrl } from "../insane/url-guard";
|
|
9
11
|
|
|
10
12
|
export { formatNumber } from "@gajae-code/utils";
|
|
11
13
|
|
|
@@ -35,6 +37,7 @@ const USER_AGENTS = [
|
|
|
35
37
|
"Mozilla/5.0 (compatible; TextBot/1.0)",
|
|
36
38
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
37
39
|
];
|
|
40
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
38
41
|
|
|
39
42
|
function isBotBlocked(status: number, content: string): boolean {
|
|
40
43
|
if (status === 403 || status === 503) {
|
|
@@ -70,6 +73,9 @@ export interface LoadPageOptions {
|
|
|
70
73
|
body?: string;
|
|
71
74
|
maxBytes?: number;
|
|
72
75
|
signal?: AbortSignal;
|
|
76
|
+
publicUrlGuard?: boolean;
|
|
77
|
+
resolver?: AddressResolver;
|
|
78
|
+
maxRedirects?: number;
|
|
73
79
|
}
|
|
74
80
|
|
|
75
81
|
export interface LoadPageResult {
|
|
@@ -78,87 +84,179 @@ export interface LoadPageResult {
|
|
|
78
84
|
finalUrl: string;
|
|
79
85
|
ok: boolean;
|
|
80
86
|
status?: number;
|
|
87
|
+
error?: string;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function guardPublicFetchUrl(
|
|
91
|
+
rawUrl: string,
|
|
92
|
+
resolver: AddressResolver | undefined,
|
|
93
|
+
context: string,
|
|
94
|
+
): Promise<{ ok: true; url: string } | { ok: false; error: string; finalUrl: string }> {
|
|
95
|
+
const guard = await validatePublicHttpUrl(rawUrl, { resolver });
|
|
96
|
+
if (guard.ok) return { ok: true, url: guard.url.toString() };
|
|
97
|
+
return {
|
|
98
|
+
ok: false,
|
|
99
|
+
error: `${context}: target URL is not public HTTP(S): ${guard.reason}`,
|
|
100
|
+
finalUrl: rawUrl,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function shouldRewriteRedirectMethod(status: number, method: string): boolean {
|
|
105
|
+
const normalized = method.toUpperCase();
|
|
106
|
+
return status === 303 || ((status === 301 || status === 302) && normalized === "POST");
|
|
81
107
|
}
|
|
82
108
|
|
|
83
109
|
/**
|
|
84
110
|
* Fetch a page with timeout and size limit
|
|
85
111
|
*/
|
|
86
112
|
export async function loadPage(url: string, options: LoadPageOptions = {}): Promise<LoadPageResult> {
|
|
87
|
-
const {
|
|
113
|
+
const {
|
|
114
|
+
timeout = 20,
|
|
115
|
+
headers = {},
|
|
116
|
+
maxBytes = MAX_BYTES,
|
|
117
|
+
signal,
|
|
118
|
+
method = "GET",
|
|
119
|
+
body,
|
|
120
|
+
publicUrlGuard = true,
|
|
121
|
+
resolver,
|
|
122
|
+
maxRedirects = 10,
|
|
123
|
+
} = options;
|
|
124
|
+
|
|
125
|
+
let initialUrl = url;
|
|
126
|
+
if (publicUrlGuard) {
|
|
127
|
+
const guarded = await guardPublicFetchUrl(url, resolver, "Blocked URL fetch");
|
|
128
|
+
if (!guarded.ok) {
|
|
129
|
+
return {
|
|
130
|
+
content: "",
|
|
131
|
+
contentType: "",
|
|
132
|
+
finalUrl: guarded.finalUrl,
|
|
133
|
+
ok: false,
|
|
134
|
+
error: guarded.error,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
initialUrl = guarded.url;
|
|
138
|
+
}
|
|
88
139
|
|
|
89
|
-
for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
|
|
140
|
+
attempts: for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
|
|
90
141
|
if (signal?.aborted) {
|
|
91
142
|
throw new ToolAbortError();
|
|
92
143
|
}
|
|
93
144
|
|
|
94
145
|
const userAgent = USER_AGENTS[attempt];
|
|
95
146
|
const requestSignal = ptree.combineSignals(signal, timeout * 1000);
|
|
147
|
+
let currentUrl = initialUrl;
|
|
148
|
+
let currentMethod = method;
|
|
149
|
+
let currentBody = body;
|
|
96
150
|
|
|
97
151
|
try {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
152
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
|
|
153
|
+
const requestInit: RequestInit = {
|
|
154
|
+
signal: requestSignal,
|
|
155
|
+
method: currentMethod,
|
|
156
|
+
headers: {
|
|
157
|
+
"User-Agent": userAgent,
|
|
158
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
159
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
160
|
+
"Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
|
|
161
|
+
...headers,
|
|
162
|
+
},
|
|
163
|
+
redirect: "manual",
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
if (currentBody !== undefined) {
|
|
167
|
+
requestInit.body = currentBody;
|
|
168
|
+
}
|
|
110
169
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
170
|
+
const response = await fetch(currentUrl, requestInit);
|
|
171
|
+
if (REDIRECT_STATUSES.has(response.status)) {
|
|
172
|
+
const location = response.headers.get("location");
|
|
173
|
+
if (!location) {
|
|
174
|
+
return {
|
|
175
|
+
content: "",
|
|
176
|
+
contentType: "",
|
|
177
|
+
finalUrl: currentUrl,
|
|
178
|
+
ok: false,
|
|
179
|
+
status: response.status,
|
|
180
|
+
error: "Redirect response missing Location header",
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
const redirectUrl = new URL(location, currentUrl).toString();
|
|
184
|
+
if (publicUrlGuard) {
|
|
185
|
+
const guarded = await guardPublicFetchUrl(redirectUrl, resolver, "Blocked URL redirect");
|
|
186
|
+
if (!guarded.ok) {
|
|
187
|
+
return {
|
|
188
|
+
content: "",
|
|
189
|
+
contentType: "",
|
|
190
|
+
finalUrl: guarded.finalUrl,
|
|
191
|
+
ok: false,
|
|
192
|
+
status: response.status,
|
|
193
|
+
error: guarded.error,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
currentUrl = guarded.url;
|
|
197
|
+
} else {
|
|
198
|
+
currentUrl = redirectUrl;
|
|
199
|
+
}
|
|
200
|
+
if (shouldRewriteRedirectMethod(response.status, currentMethod)) {
|
|
201
|
+
currentMethod = "GET";
|
|
202
|
+
currentBody = undefined;
|
|
203
|
+
}
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
114
206
|
|
|
115
|
-
|
|
207
|
+
const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
|
|
208
|
+
const finalUrl = response.url || currentUrl;
|
|
116
209
|
|
|
117
|
-
|
|
118
|
-
|
|
210
|
+
const reader = response.body?.getReader();
|
|
211
|
+
if (!reader) {
|
|
212
|
+
return { content: "", contentType, finalUrl, ok: false, status: response.status };
|
|
213
|
+
}
|
|
119
214
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
return { content: "", contentType, finalUrl, ok: false, status: response.status };
|
|
123
|
-
}
|
|
215
|
+
const chunks: Uint8Array[] = [];
|
|
216
|
+
let totalSize = 0;
|
|
124
217
|
|
|
125
|
-
|
|
126
|
-
|
|
218
|
+
while (true) {
|
|
219
|
+
const { done, value } = await reader.read();
|
|
220
|
+
if (done) break;
|
|
127
221
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if (done) break;
|
|
222
|
+
chunks.push(value);
|
|
223
|
+
totalSize += value.length;
|
|
131
224
|
|
|
132
|
-
|
|
133
|
-
|
|
225
|
+
if (totalSize > maxBytes) {
|
|
226
|
+
reader.cancel();
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
134
230
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
231
|
+
const content = Buffer.concat(chunks).toString("utf-8");
|
|
232
|
+
if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
|
|
233
|
+
continue attempts;
|
|
138
234
|
}
|
|
139
|
-
}
|
|
140
235
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
}
|
|
236
|
+
if (!response.ok) {
|
|
237
|
+
return { content, contentType, finalUrl, ok: false, status: response.status };
|
|
238
|
+
}
|
|
145
239
|
|
|
146
|
-
|
|
147
|
-
return { content, contentType, finalUrl, ok: false, status: response.status };
|
|
240
|
+
return { content, contentType, finalUrl, ok: true, status: response.status };
|
|
148
241
|
}
|
|
149
|
-
|
|
150
|
-
|
|
242
|
+
return {
|
|
243
|
+
content: "",
|
|
244
|
+
contentType: "",
|
|
245
|
+
finalUrl: currentUrl,
|
|
246
|
+
ok: false,
|
|
247
|
+
error: `Too many redirects (${maxRedirects})`,
|
|
248
|
+
};
|
|
151
249
|
} catch {
|
|
152
250
|
if (signal?.aborted) {
|
|
153
251
|
throw new ToolAbortError();
|
|
154
252
|
}
|
|
155
253
|
if (attempt === USER_AGENTS.length - 1) {
|
|
156
|
-
return { content: "", contentType: "", finalUrl:
|
|
254
|
+
return { content: "", contentType: "", finalUrl: currentUrl, ok: false };
|
|
157
255
|
}
|
|
158
256
|
}
|
|
159
257
|
}
|
|
160
258
|
|
|
161
|
-
return { content: "", contentType: "", finalUrl:
|
|
259
|
+
return { content: "", contentType: "", finalUrl: initialUrl, ok: false };
|
|
162
260
|
}
|
|
163
261
|
|
|
164
262
|
/** Module-level Turndown instance — built lazily on first use. */
|
|
@@ -4,6 +4,8 @@ export { isRecord };
|
|
|
4
4
|
|
|
5
5
|
import { ToolAbortError } from "../../tools/tool-errors";
|
|
6
6
|
import { convertBufferWithMarkit } from "../../utils/markit";
|
|
7
|
+
import type { AddressResolver } from "../insane/url-guard";
|
|
8
|
+
import { validatePublicHttpUrl } from "../insane/url-guard";
|
|
7
9
|
import { MAX_BYTES } from "./types";
|
|
8
10
|
|
|
9
11
|
export function asRecord(value: unknown): Record<string, unknown> | null {
|
|
@@ -28,6 +30,14 @@ export interface BinaryFetchSuccess {
|
|
|
28
30
|
|
|
29
31
|
export type BinaryFetchResult = BinaryFetchSuccess | { ok: false; error?: string };
|
|
30
32
|
|
|
33
|
+
export interface FetchBinaryOptions {
|
|
34
|
+
publicUrlGuard?: boolean;
|
|
35
|
+
resolver?: AddressResolver;
|
|
36
|
+
maxRedirects?: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
40
|
+
|
|
31
41
|
async function readResponseWithLimit(response: Response, maxBytes: number, signal?: AbortSignal): Promise<Uint8Array> {
|
|
32
42
|
const reader = response.body?.getReader();
|
|
33
43
|
if (!reader) return new Uint8Array(0);
|
|
@@ -60,34 +70,75 @@ async function readResponseWithLimit(response: Response, maxBytes: number, signa
|
|
|
60
70
|
return new Uint8Array(Buffer.concat(chunks, totalBytes));
|
|
61
71
|
}
|
|
62
72
|
|
|
73
|
+
async function guardPublicBinaryUrl(
|
|
74
|
+
rawUrl: string,
|
|
75
|
+
resolver: AddressResolver | undefined,
|
|
76
|
+
context: string,
|
|
77
|
+
): Promise<{ ok: true; url: string } | { ok: false; error: string }> {
|
|
78
|
+
const guard = await validatePublicHttpUrl(rawUrl, { resolver });
|
|
79
|
+
if (guard.ok) return { ok: true, url: guard.url.toString() };
|
|
80
|
+
return { ok: false, error: `${context}: target URL is not public HTTP(S): ${guard.reason}` };
|
|
81
|
+
}
|
|
82
|
+
|
|
63
83
|
/**
|
|
64
84
|
* Fetch binary content from a URL
|
|
65
85
|
*/
|
|
66
|
-
export async function fetchBinary(
|
|
86
|
+
export async function fetchBinary(
|
|
87
|
+
url: string,
|
|
88
|
+
timeout: number = 20,
|
|
89
|
+
signal?: AbortSignal,
|
|
90
|
+
options: FetchBinaryOptions = {},
|
|
91
|
+
): Promise<BinaryFetchResult> {
|
|
67
92
|
const requestSignal = ptree.combineSignals(signal, timeout * 1000);
|
|
93
|
+
const { publicUrlGuard = true, resolver, maxRedirects = 10 } = options;
|
|
68
94
|
try {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
redirect: "follow",
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
if (!response.ok) {
|
|
78
|
-
return { ok: false, error: `HTTP ${response.status}` };
|
|
95
|
+
let currentUrl = url;
|
|
96
|
+
if (publicUrlGuard) {
|
|
97
|
+
const guarded = await guardPublicBinaryUrl(url, resolver, "Blocked binary fetch");
|
|
98
|
+
if (!guarded.ok) return { ok: false, error: guarded.error };
|
|
99
|
+
currentUrl = guarded.url;
|
|
79
100
|
}
|
|
80
101
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
102
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
|
|
103
|
+
const response = await fetch(currentUrl, {
|
|
104
|
+
signal: requestSignal,
|
|
105
|
+
headers: {
|
|
106
|
+
"User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
|
|
107
|
+
},
|
|
108
|
+
redirect: "manual",
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
if (REDIRECT_STATUSES.has(response.status)) {
|
|
112
|
+
const location = response.headers.get("location");
|
|
113
|
+
if (!location) return { ok: false, error: "Redirect response missing Location header" };
|
|
114
|
+
const redirectUrl = new URL(location, currentUrl).toString();
|
|
115
|
+
if (publicUrlGuard) {
|
|
116
|
+
const guarded = await guardPublicBinaryUrl(redirectUrl, resolver, "Blocked binary redirect");
|
|
117
|
+
if (!guarded.ok) return { ok: false, error: guarded.error };
|
|
118
|
+
currentUrl = guarded.url;
|
|
119
|
+
} else {
|
|
120
|
+
currentUrl = redirectUrl;
|
|
121
|
+
}
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (!response.ok) {
|
|
126
|
+
return { ok: false, error: `HTTP ${response.status}` };
|
|
87
127
|
}
|
|
128
|
+
|
|
129
|
+
const contentDisposition = response.headers.get("content-disposition") || undefined;
|
|
130
|
+
const contentLength = response.headers.get("content-length");
|
|
131
|
+
if (contentLength) {
|
|
132
|
+
const size = Number.parseInt(contentLength, 10);
|
|
133
|
+
if (Number.isFinite(size) && size > MAX_BYTES) {
|
|
134
|
+
return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
|
|
138
|
+
return { ok: true, buffer, contentDisposition };
|
|
88
139
|
}
|
|
89
|
-
|
|
90
|
-
return { ok:
|
|
140
|
+
|
|
141
|
+
return { ok: false, error: `Too many redirects (${maxRedirects})` };
|
|
91
142
|
} catch (err) {
|
|
92
143
|
if (signal?.aborted) throw new ToolAbortError();
|
|
93
144
|
if (requestSignal?.aborted) return { ok: false, error: "aborted" };
|
|
@@ -72,6 +72,11 @@ const PROVIDER_META: Record<SearchProviderId, ProviderMeta> = {
|
|
|
72
72
|
label: "DuckDuckGo",
|
|
73
73
|
load: async () => new (await import("./providers/duckduckgo")).DuckDuckGoProvider(),
|
|
74
74
|
},
|
|
75
|
+
insane: {
|
|
76
|
+
id: "insane",
|
|
77
|
+
label: "Insane",
|
|
78
|
+
load: async () => new (await import("./providers/insane")).InsaneProvider(),
|
|
79
|
+
},
|
|
75
80
|
"openai-compatible": {
|
|
76
81
|
id: "openai-compatible",
|
|
77
82
|
label: "OpenAI-compatible",
|
|
@@ -97,6 +102,7 @@ export async function getSearchProvider(id: SearchProviderId): Promise<SearchPro
|
|
|
97
102
|
|
|
98
103
|
export const SEARCH_PROVIDER_ORDER: SearchProviderId[] = [
|
|
99
104
|
"duckduckgo",
|
|
105
|
+
"insane",
|
|
100
106
|
"tavily",
|
|
101
107
|
"perplexity",
|
|
102
108
|
"brave",
|
|
@@ -234,14 +240,41 @@ export function isLocalBaseUrl(baseUrl: string | undefined): boolean {
|
|
|
234
240
|
return false;
|
|
235
241
|
}
|
|
236
242
|
|
|
243
|
+
/**
|
|
244
|
+
* Whether `baseUrl` is an official OpenAI endpoint (or absent, i.e. the default
|
|
245
|
+
* hosted OpenAI). The dedicated `codex` provider authenticates against the
|
|
246
|
+
* ChatGPT backend with the user's *local* Codex OAuth, so it must only be
|
|
247
|
+
* selected when the active model is genuinely served by OpenAI/ChatGPT — never
|
|
248
|
+
* for a custom/proxy endpoint, which should reuse its own credentials through
|
|
249
|
+
* the `openai-compatible` adapter instead.
|
|
250
|
+
*/
|
|
251
|
+
function isOpenAIOfficialBaseUrl(baseUrl: string | undefined): boolean {
|
|
252
|
+
if (!baseUrl?.trim()) return true;
|
|
253
|
+
let host: string;
|
|
254
|
+
try {
|
|
255
|
+
host = new URL(baseUrl).hostname.toLowerCase();
|
|
256
|
+
} catch {
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
return (
|
|
260
|
+
host === "api.openai.com" ||
|
|
261
|
+
host === "chatgpt.com" ||
|
|
262
|
+
host.endsWith(".openai.com") ||
|
|
263
|
+
host.endsWith(".chatgpt.com")
|
|
264
|
+
);
|
|
265
|
+
}
|
|
266
|
+
|
|
237
267
|
export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
|
|
238
268
|
if (!ctx || ctx.webSearch === "off") return undefined;
|
|
239
269
|
const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
|
|
240
270
|
if (modelId.startsWith("claude-") && isAnthropicWire(ctx.api)) return "anthropic";
|
|
241
271
|
if (modelId.startsWith("gemini-") && isGoogleWire(ctx.api)) return "gemini";
|
|
242
272
|
if (looksXaiFamilyModelId(ctx) && isOpenAICompatWire(ctx.api)) return "xai";
|
|
243
|
-
|
|
244
|
-
|
|
273
|
+
// `codex` hits the ChatGPT backend with local Codex OAuth, so only infer it
|
|
274
|
+
// for genuine OpenAI endpoints. Custom/proxy OpenAI-compatible models fall
|
|
275
|
+
// through to `activeContextNativeId` → `openai-compatible` (their own creds).
|
|
276
|
+
if (looksOpenAIFamilyModelId(ctx) && isOpenAICompatWire(ctx.api) && isOpenAIOfficialBaseUrl(ctx.baseUrl)) {
|
|
277
|
+
return "codex";
|
|
245
278
|
}
|
|
246
279
|
return undefined;
|
|
247
280
|
}
|
|
@@ -249,8 +282,9 @@ export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | und
|
|
|
249
282
|
function canUseDirectProviderMapping(ctx: ActiveSearchModelContext, id: SearchProviderId): boolean {
|
|
250
283
|
if (ctx.webSearch === "off") return false;
|
|
251
284
|
if (id !== "codex") return true;
|
|
252
|
-
|
|
253
|
-
|
|
285
|
+
// Same constraint as inference: the ChatGPT-backed codex provider is valid
|
|
286
|
+
// only for official OpenAI endpoints, not custom/proxy base URLs.
|
|
287
|
+
return isOpenAIOfficialBaseUrl(ctx.baseUrl);
|
|
254
288
|
}
|
|
255
289
|
|
|
256
290
|
export async function canUseGenericCredentials(
|
|
@@ -268,17 +302,35 @@ export async function canUseGenericCredentials(
|
|
|
268
302
|
return Boolean(key);
|
|
269
303
|
}
|
|
270
304
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
305
|
+
/**
|
|
306
|
+
* Native web-search provider to attempt by reusing the ACTIVE model's own
|
|
307
|
+
* credentials + baseUrl, dispatched by the model's wire protocol.
|
|
308
|
+
*
|
|
309
|
+
* This is the "native search over a proxy" path: when a model is served through
|
|
310
|
+
* a proxy/custom endpoint, its canonical search credentials (e.g. a dedicated
|
|
311
|
+
* `anthropic` key, or ChatGPT OAuth for `codex`) are usually absent, but the
|
|
312
|
+
* credential that authenticates the model itself — stored under the active
|
|
313
|
+
* provider id and aimed at `ctx.baseUrl` — can drive native web search just as
|
|
314
|
+
* well. Each provider's `search()` falls back to those active credentials when
|
|
315
|
+
* its canonical ones are missing.
|
|
316
|
+
*
|
|
317
|
+
* Returned ids are matched purely from the wire `api` (+ model-id family where a
|
|
318
|
+
* native tool only makes sense for that family); the providers themselves fail
|
|
319
|
+
* closed (and the chain falls through to DuckDuckGo) if the endpoint does not
|
|
320
|
+
* actually support web search.
|
|
321
|
+
*/
|
|
322
|
+
export function activeContextNativeId(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
|
|
323
|
+
if (!ctx || ctx.webSearch === "off") return undefined;
|
|
324
|
+
const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
|
|
325
|
+
// Dispatch must match exactly what each provider can service by reusing the
|
|
326
|
+
// active credential: the OpenAI-compatible adapter only speaks the two plain
|
|
327
|
+
// OpenAI wires (not azure), and the Gemini active path only speaks the public
|
|
328
|
+
// Generative Language wire (not vertex/cloud-code). Returning an id the
|
|
329
|
+
// provider would reject just wastes a guaranteed-fail attempt before DuckDuckGo.
|
|
330
|
+
if (isAnthropicWire(ctx.api) && modelId.startsWith("claude-")) return "anthropic";
|
|
331
|
+
if (ctx.api === "openai-responses" || ctx.api === "openai-completions") return "openai-compatible";
|
|
332
|
+
if (ctx.api === "google-generative-ai" && modelId.startsWith("gemini-")) return "gemini";
|
|
333
|
+
return undefined;
|
|
282
334
|
}
|
|
283
335
|
|
|
284
336
|
export async function resolveProviderChain(options: ResolveProviderChainOptions): Promise<SearchProvider[]> {
|
|
@@ -304,9 +356,16 @@ export async function resolveProviderChain(options: ResolveProviderChainOptions)
|
|
|
304
356
|
await appendAvailable(chain, directId, authStorage);
|
|
305
357
|
const inferred = inferNativeProviderFromModel(activeModelContext);
|
|
306
358
|
if (inferred) await appendAvailable(chain, inferred, authStorage);
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
359
|
+
// Native-over-proxy: when no canonical native provider was selected above,
|
|
360
|
+
// fall back to the model's own credentials (resolved under the active
|
|
361
|
+
// provider id against its baseUrl) to drive native web search. Gated on
|
|
362
|
+
// those credentials actually resolving; otherwise the chain ends at the
|
|
363
|
+
// keyless DuckDuckGo terminal fallback.
|
|
364
|
+
if (chain.length === 0) {
|
|
365
|
+
const activeNativeId = activeContextNativeId(activeModelContext);
|
|
366
|
+
if (activeNativeId && (await canUseGenericCredentials(authStorage, activeModelContext, sessionId, signal)))
|
|
367
|
+
chain.push(activeNativeId);
|
|
368
|
+
}
|
|
310
369
|
}
|
|
311
370
|
|
|
312
371
|
// Configured fallbacks are user-facing only: the internal `openai-compatible`
|