@gajae-code/coding-agent 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/CHANGELOG.md +57 -0
  2. package/dist/types/cli/mcp-cli.d.ts +25 -0
  3. package/dist/types/cli/notify-cli.d.ts +2 -0
  4. package/dist/types/cli.d.ts +6 -0
  5. package/dist/types/commands/mcp.d.ts +70 -0
  6. package/dist/types/config/keybindings.d.ts +2 -2
  7. package/dist/types/config/settings-schema.d.ts +39 -2
  8. package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
  9. package/dist/types/extensibility/shared-events.d.ts +1 -0
  10. package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
  11. package/dist/types/lsp/types.d.ts +2 -0
  12. package/dist/types/modes/components/custom-editor.d.ts +1 -1
  13. package/dist/types/modes/components/model-selector.d.ts +2 -0
  14. package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
  15. package/dist/types/modes/theme/defaults/index.d.ts +99 -0
  16. package/dist/types/notifications/attachment-registry.d.ts +17 -0
  17. package/dist/types/notifications/chat-adapters.d.ts +9 -0
  18. package/dist/types/notifications/config.d.ts +9 -1
  19. package/dist/types/notifications/engine.d.ts +59 -0
  20. package/dist/types/notifications/managed-daemon.d.ts +48 -0
  21. package/dist/types/notifications/operator-runtime.d.ts +52 -0
  22. package/dist/types/notifications/telegram-daemon.d.ts +73 -16
  23. package/dist/types/notifications/threaded-inbound.d.ts +19 -0
  24. package/dist/types/notifications/threaded-render.d.ts +6 -1
  25. package/dist/types/notifications/topic-registry.d.ts +2 -0
  26. package/dist/types/session/agent-session.d.ts +2 -0
  27. package/dist/types/tools/composer-bash-policy.d.ts +14 -0
  28. package/dist/types/tools/fetch.d.ts +23 -0
  29. package/dist/types/tools/index.d.ts +1 -0
  30. package/dist/types/tools/telegram-send.d.ts +32 -0
  31. package/dist/types/web/insane/bridge.d.ts +103 -0
  32. package/dist/types/web/insane/url-guard.d.ts +25 -0
  33. package/dist/types/web/scrapers/types.d.ts +5 -0
  34. package/dist/types/web/scrapers/utils.d.ts +7 -1
  35. package/dist/types/web/search/provider.d.ts +18 -1
  36. package/dist/types/web/search/providers/insane.d.ts +53 -0
  37. package/dist/types/web/search/providers/text-citations.d.ts +23 -0
  38. package/dist/types/web/search/types.d.ts +12 -4
  39. package/package.json +10 -8
  40. package/scripts/verify-insane-vendor.ts +132 -0
  41. package/src/cli/args.ts +1 -1
  42. package/src/cli/fast-help.ts +1 -1
  43. package/src/cli/mcp-cli.ts +272 -0
  44. package/src/cli/notify-cli.ts +152 -5
  45. package/src/cli.ts +6 -2
  46. package/src/commands/mcp.ts +117 -0
  47. package/src/commands/team.ts +1 -1
  48. package/src/config/keybindings.ts +2 -2
  49. package/src/config/settings-schema.ts +30 -1
  50. package/src/deep-interview/plaintext-gate-guard.ts +94 -0
  51. package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
  52. package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
  53. package/src/defaults/gjc/skills/team/SKILL.md +3 -2
  54. package/src/extensibility/extensions/runner.ts +1 -0
  55. package/src/extensibility/shared-events.ts +1 -0
  56. package/src/gjc-runtime/launch-tmux.ts +17 -3
  57. package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
  58. package/src/gjc-runtime/ralplan-runtime.ts +2 -2
  59. package/src/gjc-runtime/tmux-common.ts +3 -1
  60. package/src/gjc-runtime/ultragoal-guard.ts +25 -8
  61. package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
  62. package/src/gjc-runtime/workflow-manifest.ts +7 -2
  63. package/src/hooks/skill-state.ts +57 -0
  64. package/src/internal-urls/docs-index.generated.ts +14 -11
  65. package/src/lsp/config.ts +16 -3
  66. package/src/lsp/defaults.json +7 -0
  67. package/src/lsp/types.ts +2 -0
  68. package/src/modes/bridge/bridge-mode.ts +11 -0
  69. package/src/modes/components/custom-editor.ts +2 -0
  70. package/src/modes/components/footer.ts +2 -3
  71. package/src/modes/components/model-selector.ts +12 -0
  72. package/src/modes/components/status-line/git-utils.ts +25 -0
  73. package/src/modes/components/status-line.ts +10 -11
  74. package/src/modes/components/welcome.ts +2 -3
  75. package/src/modes/controllers/event-controller.ts +15 -0
  76. package/src/modes/controllers/selector-controller.ts +3 -0
  77. package/src/modes/interactive-mode.ts +48 -3
  78. package/src/modes/shared/agent-wire/scopes.ts +1 -1
  79. package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
  80. package/src/modes/theme/defaults/index.ts +2 -0
  81. package/src/modes/utils/context-usage.ts +2 -2
  82. package/src/notifications/attachment-registry.ts +23 -0
  83. package/src/notifications/chat-adapters.ts +147 -0
  84. package/src/notifications/config.ts +23 -2
  85. package/src/notifications/engine.ts +100 -0
  86. package/src/notifications/index.ts +180 -38
  87. package/src/notifications/managed-daemon.ts +163 -0
  88. package/src/notifications/operator-runtime.ts +171 -0
  89. package/src/notifications/telegram-daemon.ts +553 -236
  90. package/src/notifications/threaded-inbound.ts +60 -4
  91. package/src/notifications/threaded-render.ts +20 -2
  92. package/src/notifications/topic-registry.ts +5 -0
  93. package/src/session/agent-session.ts +82 -51
  94. package/src/slash-commands/helpers/parse.ts +2 -1
  95. package/src/tools/bash.ts +9 -0
  96. package/src/tools/composer-bash-policy.ts +96 -0
  97. package/src/tools/fetch.ts +94 -1
  98. package/src/tools/index.ts +3 -0
  99. package/src/tools/telegram-send.ts +137 -0
  100. package/src/web/insane/bridge.ts +350 -0
  101. package/src/web/insane/url-guard.ts +159 -0
  102. package/src/web/scrapers/types.ts +143 -45
  103. package/src/web/scrapers/utils.ts +70 -19
  104. package/src/web/search/provider.ts +77 -18
  105. package/src/web/search/providers/anthropic.ts +70 -3
  106. package/src/web/search/providers/codex.ts +1 -119
  107. package/src/web/search/providers/gemini.ts +99 -0
  108. package/src/web/search/providers/insane.ts +551 -0
  109. package/src/web/search/providers/openai-compatible.ts +66 -32
  110. package/src/web/search/providers/text-citations.ts +111 -0
  111. package/src/web/search/types.ts +13 -2
  112. package/vendor/insane-search/LICENSE +21 -0
  113. package/vendor/insane-search/MANIFEST.json +24 -0
  114. package/vendor/insane-search/engine/__init__.py +23 -0
  115. package/vendor/insane-search/engine/__main__.py +128 -0
  116. package/vendor/insane-search/engine/bias_check.py +183 -0
  117. package/vendor/insane-search/engine/executor.py +254 -0
  118. package/vendor/insane-search/engine/fetch_chain.py +725 -0
  119. package/vendor/insane-search/engine/learning.py +175 -0
  120. package/vendor/insane-search/engine/phase0.py +214 -0
  121. package/vendor/insane-search/engine/safety.py +91 -0
  122. package/vendor/insane-search/engine/templates/package.json +11 -0
  123. package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
  124. package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
  125. package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
  126. package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
  127. package/vendor/insane-search/engine/tests/test_u1.py +200 -0
  128. package/vendor/insane-search/engine/tests/test_u4.py +131 -0
  129. package/vendor/insane-search/engine/tests/test_u5.py +163 -0
  130. package/vendor/insane-search/engine/tests/test_u7.py +124 -0
  131. package/vendor/insane-search/engine/transport.py +211 -0
  132. package/vendor/insane-search/engine/url_transforms.py +98 -0
  133. package/vendor/insane-search/engine/validators.py +331 -0
  134. package/vendor/insane-search/engine/waf_detector.py +214 -0
  135. package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Public HTTP(S) URL guard for user-supplied web fetch targets.
3
+ *
4
+ * Network-capable URL readers MUST run this guard before the first request and
5
+ * before following any redirect target. It is fail-closed: anything it cannot
6
+ * prove is a public, non-credentialed http/https target is rejected.
7
+ *
8
+ * The vendored insane-search engine performs its own redirects outside the
9
+ * TypeScript fetch path, so its fallback remains opt-in and is guarded before
10
+ * any dependency probe or engine subprocess is spawned.
11
+ */
12
+ import * as dns from "node:dns/promises";
13
+ import * as net from "node:net";
14
+
15
+ export interface PublicUrlAccepted {
16
+ ok: true;
17
+ url: URL;
18
+ addresses: string[];
19
+ }
20
+
21
+ export interface PublicUrlRejected {
22
+ ok: false;
23
+ reason: string;
24
+ }
25
+
26
+ export type PublicUrlResult = PublicUrlAccepted | PublicUrlRejected;
27
+
28
+ /** Resolver seam so tests can inject DNS results without real lookups. */
29
+ export type AddressResolver = (hostname: string) => Promise<string[]>;
30
+
31
+ const defaultResolver: AddressResolver = async hostname => {
32
+ const records = await dns.lookup(hostname, { all: true, verbatim: true });
33
+ return records.map(record => record.address);
34
+ };
35
+
36
+ const BLOCKED_HOSTNAMES = new Set(["localhost", "localhost.localdomain", "0.0.0.0", ""]);
37
+
38
+ function isBlockedHostname(hostname: string): boolean {
39
+ const normalized = hostname.toLowerCase().replace(/\.$/, "");
40
+ return (
41
+ BLOCKED_HOSTNAMES.has(normalized) ||
42
+ normalized === "localhost" ||
43
+ normalized.endsWith(".localhost") ||
44
+ normalized.endsWith(".local") ||
45
+ normalized.endsWith(".internal") ||
46
+ normalized.endsWith(".home.arpa")
47
+ );
48
+ }
49
+
50
+ function isPrivateIPv4(address: string): boolean {
51
+ const parts = address.split(".").map(part => Number.parseInt(part, 10));
52
+ if (parts.length !== 4 || parts.some(part => !Number.isInteger(part) || part < 0 || part > 255)) return true;
53
+ const [a, b] = parts;
54
+ return (
55
+ a === 0 || // unspecified / "this network"
56
+ a === 10 || // RFC1918
57
+ a === 127 || // loopback
58
+ (a === 100 && b >= 64 && b <= 127) || // CGNAT 100.64/10
59
+ (a === 169 && b === 254) || // link-local
60
+ (a === 172 && b >= 16 && b <= 31) || // RFC1918
61
+ (a === 192 && b === 0) || // 192.0.0/24 & 192.0.2/24 (documentation/reserved)
62
+ (a === 192 && b === 168) || // RFC1918
63
+ (a === 198 && (b === 18 || b === 19)) || // benchmarking 198.18/15
64
+ (a === 198 && b === 51) || // 198.51.100/24 documentation
65
+ (a === 203 && b === 0) || // 203.0.113/24 documentation
66
+ a >= 224 // multicast (224/4) + reserved (240/4) + broadcast
67
+ );
68
+ }
69
+
70
+ function normalizeIPv4MappedIPv6(address: string): string {
71
+ return address.toLowerCase().startsWith("::ffff:") ? address.slice(7) : address;
72
+ }
73
+
74
+ function isPrivateIPv6(address: string): boolean {
75
+ const normalized = address.toLowerCase();
76
+ const mapped = normalizeIPv4MappedIPv6(normalized);
77
+ if (mapped !== normalized && net.isIP(mapped) === 4) return isPrivateIPv4(mapped);
78
+ return (
79
+ normalized === "::" || // unspecified
80
+ normalized === "::1" || // loopback
81
+ normalized.startsWith("fc") || // ULA fc00::/7
82
+ normalized.startsWith("fd") || // ULA
83
+ normalized.startsWith("fe8") || // link-local fe80::/10
84
+ normalized.startsWith("fe9") ||
85
+ normalized.startsWith("fea") ||
86
+ normalized.startsWith("feb") ||
87
+ normalized.startsWith("ff") || // multicast ff00::/8
88
+ normalized.startsWith("2001:db8") || // documentation
89
+ normalized.startsWith("::ffff:") // any remaining IPv4-mapped form we could not classify
90
+ );
91
+ }
92
+
93
+ /** True for any address that is not a routable public unicast address. */
94
+ export function isPrivateOrSpecialAddress(address: string): boolean {
95
+ const normalized = normalizeIPv4MappedIPv6(address);
96
+ const family = net.isIP(normalized);
97
+ if (family === 4) return isPrivateIPv4(normalized);
98
+ if (family === 6) return isPrivateIPv6(normalized);
99
+ // Re-check the raw value in case it was an IPv4-mapped IPv6 literal.
100
+ if (net.isIP(address) === 6) return isPrivateIPv6(address);
101
+ return true; // not a recognizable IP -> treat as unsafe
102
+ }
103
+
104
+ /**
105
+ * Validate that `rawUrl` is a public http/https target. Resolves DNS names and
106
+ * rejects any that map to a private/special address. Never throws; returns a
107
+ * discriminated result.
108
+ */
109
+ export async function validatePublicHttpUrl(
110
+ rawUrl: string,
111
+ options: { resolver?: AddressResolver } = {},
112
+ ): Promise<PublicUrlResult> {
113
+ const resolver = options.resolver ?? defaultResolver;
114
+
115
+ let url: URL;
116
+ try {
117
+ url = new URL(rawUrl);
118
+ } catch {
119
+ return { ok: false, reason: "invalid URL" };
120
+ }
121
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
122
+ return { ok: false, reason: `unsupported scheme ${url.protocol}` };
123
+ }
124
+ if (url.username || url.password) {
125
+ return { ok: false, reason: "URL credentials are not allowed" };
126
+ }
127
+ if (isBlockedHostname(url.hostname)) {
128
+ return { ok: false, reason: "localhost or internal host" };
129
+ }
130
+
131
+ const literalFamily = net.isIP(url.hostname);
132
+ if (literalFamily !== 0) {
133
+ if (isPrivateOrSpecialAddress(url.hostname)) {
134
+ return { ok: false, reason: "private, loopback, link-local, or reserved IP literal" };
135
+ }
136
+ return { ok: true, url, addresses: [url.hostname] };
137
+ }
138
+
139
+ let addresses: string[];
140
+ try {
141
+ addresses = await resolver(url.hostname);
142
+ } catch {
143
+ return { ok: false, reason: "host could not be resolved" };
144
+ }
145
+ if (addresses.length === 0) {
146
+ return { ok: false, reason: "host resolved to no addresses" };
147
+ }
148
+ if (addresses.some(isPrivateOrSpecialAddress)) {
149
+ return { ok: false, reason: "host resolves to a private or reserved address" };
150
+ }
151
+ return { ok: true, url, addresses };
152
+ }
153
+
154
+ export async function validatePublicHttpUrlForInsane(
155
+ rawUrl: string,
156
+ options: { resolver?: AddressResolver } = {},
157
+ ): Promise<PublicUrlResult> {
158
+ return validatePublicHttpUrl(rawUrl, options);
159
+ }
@@ -6,6 +6,8 @@ import type TurndownService from "turndown";
6
6
 
7
7
  import type { AgentStorage } from "../../session/agent-storage";
8
8
  import { ToolAbortError } from "../../tools/tool-errors";
9
+ import type { AddressResolver } from "../insane/url-guard";
10
+ import { validatePublicHttpUrl } from "../insane/url-guard";
9
11
 
10
12
  export { formatNumber } from "@gajae-code/utils";
11
13
 
@@ -35,6 +37,7 @@ const USER_AGENTS = [
35
37
  "Mozilla/5.0 (compatible; TextBot/1.0)",
36
38
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
37
39
  ];
40
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
38
41
 
39
42
  function isBotBlocked(status: number, content: string): boolean {
40
43
  if (status === 403 || status === 503) {
@@ -70,6 +73,9 @@ export interface LoadPageOptions {
70
73
  body?: string;
71
74
  maxBytes?: number;
72
75
  signal?: AbortSignal;
76
+ publicUrlGuard?: boolean;
77
+ resolver?: AddressResolver;
78
+ maxRedirects?: number;
73
79
  }
74
80
 
75
81
  export interface LoadPageResult {
@@ -78,87 +84,179 @@ export interface LoadPageResult {
78
84
  finalUrl: string;
79
85
  ok: boolean;
80
86
  status?: number;
87
+ error?: string;
88
+ }
89
+
90
+ async function guardPublicFetchUrl(
91
+ rawUrl: string,
92
+ resolver: AddressResolver | undefined,
93
+ context: string,
94
+ ): Promise<{ ok: true; url: string } | { ok: false; error: string; finalUrl: string }> {
95
+ const guard = await validatePublicHttpUrl(rawUrl, { resolver });
96
+ if (guard.ok) return { ok: true, url: guard.url.toString() };
97
+ return {
98
+ ok: false,
99
+ error: `${context}: target URL is not public HTTP(S): ${guard.reason}`,
100
+ finalUrl: rawUrl,
101
+ };
102
+ }
103
+
104
+ function shouldRewriteRedirectMethod(status: number, method: string): boolean {
105
+ const normalized = method.toUpperCase();
106
+ return status === 303 || ((status === 301 || status === 302) && normalized === "POST");
81
107
  }
82
108
 
83
109
  /**
84
110
  * Fetch a page with timeout and size limit
85
111
  */
86
112
  export async function loadPage(url: string, options: LoadPageOptions = {}): Promise<LoadPageResult> {
87
- const { timeout = 20, headers = {}, maxBytes = MAX_BYTES, signal, method = "GET", body } = options;
113
+ const {
114
+ timeout = 20,
115
+ headers = {},
116
+ maxBytes = MAX_BYTES,
117
+ signal,
118
+ method = "GET",
119
+ body,
120
+ publicUrlGuard = true,
121
+ resolver,
122
+ maxRedirects = 10,
123
+ } = options;
124
+
125
+ let initialUrl = url;
126
+ if (publicUrlGuard) {
127
+ const guarded = await guardPublicFetchUrl(url, resolver, "Blocked URL fetch");
128
+ if (!guarded.ok) {
129
+ return {
130
+ content: "",
131
+ contentType: "",
132
+ finalUrl: guarded.finalUrl,
133
+ ok: false,
134
+ error: guarded.error,
135
+ };
136
+ }
137
+ initialUrl = guarded.url;
138
+ }
88
139
 
89
- for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
140
+ attempts: for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
90
141
  if (signal?.aborted) {
91
142
  throw new ToolAbortError();
92
143
  }
93
144
 
94
145
  const userAgent = USER_AGENTS[attempt];
95
146
  const requestSignal = ptree.combineSignals(signal, timeout * 1000);
147
+ let currentUrl = initialUrl;
148
+ let currentMethod = method;
149
+ let currentBody = body;
96
150
 
97
151
  try {
98
- const requestInit: RequestInit = {
99
- signal: requestSignal,
100
- method,
101
- headers: {
102
- "User-Agent": userAgent,
103
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
104
- "Accept-Language": "en-US,en;q=0.5",
105
- "Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
106
- ...headers,
107
- },
108
- redirect: "follow",
109
- };
152
+ for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
153
+ const requestInit: RequestInit = {
154
+ signal: requestSignal,
155
+ method: currentMethod,
156
+ headers: {
157
+ "User-Agent": userAgent,
158
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
159
+ "Accept-Language": "en-US,en;q=0.5",
160
+ "Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
161
+ ...headers,
162
+ },
163
+ redirect: "manual",
164
+ };
165
+
166
+ if (currentBody !== undefined) {
167
+ requestInit.body = currentBody;
168
+ }
110
169
 
111
- if (body !== undefined) {
112
- requestInit.body = body;
113
- }
170
+ const response = await fetch(currentUrl, requestInit);
171
+ if (REDIRECT_STATUSES.has(response.status)) {
172
+ const location = response.headers.get("location");
173
+ if (!location) {
174
+ return {
175
+ content: "",
176
+ contentType: "",
177
+ finalUrl: currentUrl,
178
+ ok: false,
179
+ status: response.status,
180
+ error: "Redirect response missing Location header",
181
+ };
182
+ }
183
+ const redirectUrl = new URL(location, currentUrl).toString();
184
+ if (publicUrlGuard) {
185
+ const guarded = await guardPublicFetchUrl(redirectUrl, resolver, "Blocked URL redirect");
186
+ if (!guarded.ok) {
187
+ return {
188
+ content: "",
189
+ contentType: "",
190
+ finalUrl: guarded.finalUrl,
191
+ ok: false,
192
+ status: response.status,
193
+ error: guarded.error,
194
+ };
195
+ }
196
+ currentUrl = guarded.url;
197
+ } else {
198
+ currentUrl = redirectUrl;
199
+ }
200
+ if (shouldRewriteRedirectMethod(response.status, currentMethod)) {
201
+ currentMethod = "GET";
202
+ currentBody = undefined;
203
+ }
204
+ continue;
205
+ }
114
206
 
115
- const response = await fetch(url, requestInit);
207
+ const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
208
+ const finalUrl = response.url || currentUrl;
116
209
 
117
- const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
118
- const finalUrl = response.url;
210
+ const reader = response.body?.getReader();
211
+ if (!reader) {
212
+ return { content: "", contentType, finalUrl, ok: false, status: response.status };
213
+ }
119
214
 
120
- const reader = response.body?.getReader();
121
- if (!reader) {
122
- return { content: "", contentType, finalUrl, ok: false, status: response.status };
123
- }
215
+ const chunks: Uint8Array[] = [];
216
+ let totalSize = 0;
124
217
 
125
- const chunks: Uint8Array[] = [];
126
- let totalSize = 0;
218
+ while (true) {
219
+ const { done, value } = await reader.read();
220
+ if (done) break;
127
221
 
128
- while (true) {
129
- const { done, value } = await reader.read();
130
- if (done) break;
222
+ chunks.push(value);
223
+ totalSize += value.length;
131
224
 
132
- chunks.push(value);
133
- totalSize += value.length;
225
+ if (totalSize > maxBytes) {
226
+ reader.cancel();
227
+ break;
228
+ }
229
+ }
134
230
 
135
- if (totalSize > maxBytes) {
136
- reader.cancel();
137
- break;
231
+ const content = Buffer.concat(chunks).toString("utf-8");
232
+ if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
233
+ continue attempts;
138
234
  }
139
- }
140
235
 
141
- const content = Buffer.concat(chunks).toString("utf-8");
142
- if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
143
- continue;
144
- }
236
+ if (!response.ok) {
237
+ return { content, contentType, finalUrl, ok: false, status: response.status };
238
+ }
145
239
 
146
- if (!response.ok) {
147
- return { content, contentType, finalUrl, ok: false, status: response.status };
240
+ return { content, contentType, finalUrl, ok: true, status: response.status };
148
241
  }
149
-
150
- return { content, contentType, finalUrl, ok: true, status: response.status };
242
+ return {
243
+ content: "",
244
+ contentType: "",
245
+ finalUrl: currentUrl,
246
+ ok: false,
247
+ error: `Too many redirects (${maxRedirects})`,
248
+ };
151
249
  } catch {
152
250
  if (signal?.aborted) {
153
251
  throw new ToolAbortError();
154
252
  }
155
253
  if (attempt === USER_AGENTS.length - 1) {
156
- return { content: "", contentType: "", finalUrl: url, ok: false };
254
+ return { content: "", contentType: "", finalUrl: currentUrl, ok: false };
157
255
  }
158
256
  }
159
257
  }
160
258
 
161
- return { content: "", contentType: "", finalUrl: url, ok: false };
259
+ return { content: "", contentType: "", finalUrl: initialUrl, ok: false };
162
260
  }
163
261
 
164
262
  /** Module-level Turndown instance — built lazily on first use. */
@@ -4,6 +4,8 @@ export { isRecord };
4
4
 
5
5
  import { ToolAbortError } from "../../tools/tool-errors";
6
6
  import { convertBufferWithMarkit } from "../../utils/markit";
7
+ import type { AddressResolver } from "../insane/url-guard";
8
+ import { validatePublicHttpUrl } from "../insane/url-guard";
7
9
  import { MAX_BYTES } from "./types";
8
10
 
9
11
  export function asRecord(value: unknown): Record<string, unknown> | null {
@@ -28,6 +30,14 @@ export interface BinaryFetchSuccess {
28
30
 
29
31
  export type BinaryFetchResult = BinaryFetchSuccess | { ok: false; error?: string };
30
32
 
33
+ export interface FetchBinaryOptions {
34
+ publicUrlGuard?: boolean;
35
+ resolver?: AddressResolver;
36
+ maxRedirects?: number;
37
+ }
38
+
39
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
40
+
31
41
  async function readResponseWithLimit(response: Response, maxBytes: number, signal?: AbortSignal): Promise<Uint8Array> {
32
42
  const reader = response.body?.getReader();
33
43
  if (!reader) return new Uint8Array(0);
@@ -60,34 +70,75 @@ async function readResponseWithLimit(response: Response, maxBytes: number, signa
60
70
  return new Uint8Array(Buffer.concat(chunks, totalBytes));
61
71
  }
62
72
 
73
+ async function guardPublicBinaryUrl(
74
+ rawUrl: string,
75
+ resolver: AddressResolver | undefined,
76
+ context: string,
77
+ ): Promise<{ ok: true; url: string } | { ok: false; error: string }> {
78
+ const guard = await validatePublicHttpUrl(rawUrl, { resolver });
79
+ if (guard.ok) return { ok: true, url: guard.url.toString() };
80
+ return { ok: false, error: `${context}: target URL is not public HTTP(S): ${guard.reason}` };
81
+ }
82
+
63
83
  /**
64
84
  * Fetch binary content from a URL
65
85
  */
66
- export async function fetchBinary(url: string, timeout: number = 20, signal?: AbortSignal): Promise<BinaryFetchResult> {
86
+ export async function fetchBinary(
87
+ url: string,
88
+ timeout: number = 20,
89
+ signal?: AbortSignal,
90
+ options: FetchBinaryOptions = {},
91
+ ): Promise<BinaryFetchResult> {
67
92
  const requestSignal = ptree.combineSignals(signal, timeout * 1000);
93
+ const { publicUrlGuard = true, resolver, maxRedirects = 10 } = options;
68
94
  try {
69
- const response = await fetch(url, {
70
- signal: requestSignal,
71
- headers: {
72
- "User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
73
- },
74
- redirect: "follow",
75
- });
76
-
77
- if (!response.ok) {
78
- return { ok: false, error: `HTTP ${response.status}` };
95
+ let currentUrl = url;
96
+ if (publicUrlGuard) {
97
+ const guarded = await guardPublicBinaryUrl(url, resolver, "Blocked binary fetch");
98
+ if (!guarded.ok) return { ok: false, error: guarded.error };
99
+ currentUrl = guarded.url;
79
100
  }
80
101
 
81
- const contentDisposition = response.headers.get("content-disposition") || undefined;
82
- const contentLength = response.headers.get("content-length");
83
- if (contentLength) {
84
- const size = Number.parseInt(contentLength, 10);
85
- if (Number.isFinite(size) && size > MAX_BYTES) {
86
- return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
102
+ for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
103
+ const response = await fetch(currentUrl, {
104
+ signal: requestSignal,
105
+ headers: {
106
+ "User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
107
+ },
108
+ redirect: "manual",
109
+ });
110
+
111
+ if (REDIRECT_STATUSES.has(response.status)) {
112
+ const location = response.headers.get("location");
113
+ if (!location) return { ok: false, error: "Redirect response missing Location header" };
114
+ const redirectUrl = new URL(location, currentUrl).toString();
115
+ if (publicUrlGuard) {
116
+ const guarded = await guardPublicBinaryUrl(redirectUrl, resolver, "Blocked binary redirect");
117
+ if (!guarded.ok) return { ok: false, error: guarded.error };
118
+ currentUrl = guarded.url;
119
+ } else {
120
+ currentUrl = redirectUrl;
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (!response.ok) {
126
+ return { ok: false, error: `HTTP ${response.status}` };
87
127
  }
128
+
129
+ const contentDisposition = response.headers.get("content-disposition") || undefined;
130
+ const contentLength = response.headers.get("content-length");
131
+ if (contentLength) {
132
+ const size = Number.parseInt(contentLength, 10);
133
+ if (Number.isFinite(size) && size > MAX_BYTES) {
134
+ return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
135
+ }
136
+ }
137
+ const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
138
+ return { ok: true, buffer, contentDisposition };
88
139
  }
89
- const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
90
- return { ok: true, buffer, contentDisposition };
140
+
141
+ return { ok: false, error: `Too many redirects (${maxRedirects})` };
91
142
  } catch (err) {
92
143
  if (signal?.aborted) throw new ToolAbortError();
93
144
  if (requestSignal?.aborted) return { ok: false, error: "aborted" };
@@ -72,6 +72,11 @@ const PROVIDER_META: Record<SearchProviderId, ProviderMeta> = {
72
72
  label: "DuckDuckGo",
73
73
  load: async () => new (await import("./providers/duckduckgo")).DuckDuckGoProvider(),
74
74
  },
75
+ insane: {
76
+ id: "insane",
77
+ label: "Insane",
78
+ load: async () => new (await import("./providers/insane")).InsaneProvider(),
79
+ },
75
80
  "openai-compatible": {
76
81
  id: "openai-compatible",
77
82
  label: "OpenAI-compatible",
@@ -97,6 +102,7 @@ export async function getSearchProvider(id: SearchProviderId): Promise<SearchPro
97
102
 
98
103
  export const SEARCH_PROVIDER_ORDER: SearchProviderId[] = [
99
104
  "duckduckgo",
105
+ "insane",
100
106
  "tavily",
101
107
  "perplexity",
102
108
  "brave",
@@ -234,14 +240,41 @@ export function isLocalBaseUrl(baseUrl: string | undefined): boolean {
234
240
  return false;
235
241
  }
236
242
 
243
+ /**
244
+ * Whether `baseUrl` is an official OpenAI endpoint (or absent, i.e. the default
245
+ * hosted OpenAI). The dedicated `codex` provider authenticates against the
246
+ * ChatGPT backend with the user's *local* Codex OAuth, so it must only be
247
+ * selected when the active model is genuinely served by OpenAI/ChatGPT — never
248
+ * for a custom/proxy endpoint, which should reuse its own credentials through
249
+ * the `openai-compatible` adapter instead.
250
+ */
251
+ function isOpenAIOfficialBaseUrl(baseUrl: string | undefined): boolean {
252
+ if (!baseUrl?.trim()) return true;
253
+ let host: string;
254
+ try {
255
+ host = new URL(baseUrl).hostname.toLowerCase();
256
+ } catch {
257
+ return false;
258
+ }
259
+ return (
260
+ host === "api.openai.com" ||
261
+ host === "chatgpt.com" ||
262
+ host.endsWith(".openai.com") ||
263
+ host.endsWith(".chatgpt.com")
264
+ );
265
+ }
266
+
237
267
  export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
238
268
  if (!ctx || ctx.webSearch === "off") return undefined;
239
269
  const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
240
270
  if (modelId.startsWith("claude-") && isAnthropicWire(ctx.api)) return "anthropic";
241
271
  if (modelId.startsWith("gemini-") && isGoogleWire(ctx.api)) return "gemini";
242
272
  if (looksXaiFamilyModelId(ctx) && isOpenAICompatWire(ctx.api)) return "xai";
243
- if (looksOpenAIFamilyModelId(ctx) && isOpenAICompatWire(ctx.api)) {
244
- if (ctx.webSearch === "on" || !isLocalBaseUrl(ctx.baseUrl)) return "codex";
273
+ // `codex` hits the ChatGPT backend with local Codex OAuth, so only infer it
274
+ // for genuine OpenAI endpoints. Custom/proxy OpenAI-compatible models fall
275
+ // through to `activeContextNativeId` → `openai-compatible` (their own creds).
276
+ if (looksOpenAIFamilyModelId(ctx) && isOpenAICompatWire(ctx.api) && isOpenAIOfficialBaseUrl(ctx.baseUrl)) {
277
+ return "codex";
245
278
  }
246
279
  return undefined;
247
280
  }
@@ -249,8 +282,9 @@ export function inferNativeProviderFromModel(ctx: ActiveSearchModelContext | und
249
282
  function canUseDirectProviderMapping(ctx: ActiveSearchModelContext, id: SearchProviderId): boolean {
250
283
  if (ctx.webSearch === "off") return false;
251
284
  if (id !== "codex") return true;
252
- if (!isOpenAICompatWire(ctx.api)) return true;
253
- return ctx.webSearch === "on" || !isLocalBaseUrl(ctx.baseUrl);
285
+ // Same constraint as inference: the ChatGPT-backed codex provider is valid
286
+ // only for official OpenAI endpoints, not custom/proxy base URLs.
287
+ return isOpenAIOfficialBaseUrl(ctx.baseUrl);
254
288
  }
255
289
 
256
290
  export async function canUseGenericCredentials(
@@ -268,17 +302,35 @@ export async function canUseGenericCredentials(
268
302
  return Boolean(key);
269
303
  }
270
304
 
271
- export async function shouldTryGenericOpenAICompat(
272
- authStorage: AuthStorage,
273
- ctx: ActiveSearchModelContext | undefined,
274
- sessionId?: string,
275
- signal?: AbortSignal,
276
- ): Promise<boolean> {
277
- if (!ctx || ctx.webSearch === "off" || !isOpenAICompatWire(ctx.api)) return false;
278
- const autoAllowed =
279
- ctx.webSearch === "on" ||
280
- ((ctx.api === "openai-responses" || looksOpenAIFamilyModelId(ctx)) && !isLocalBaseUrl(ctx.baseUrl));
281
- return autoAllowed && (await canUseGenericCredentials(authStorage, ctx, sessionId, signal));
305
+ /**
306
+ * Native web-search provider to attempt by reusing the ACTIVE model's own
307
+ * credentials + baseUrl, dispatched by the model's wire protocol.
308
+ *
309
+ * This is the "native search over a proxy" path: when a model is served through
310
+ * a proxy/custom endpoint, its canonical search credentials (e.g. a dedicated
311
+ * `anthropic` key, or ChatGPT OAuth for `codex`) are usually absent, but the
312
+ * credential that authenticates the model itself — stored under the active
313
+ * provider id and aimed at `ctx.baseUrl` can drive native web search just as
314
+ * well. Each provider's `search()` falls back to those active credentials when
315
+ * its canonical ones are missing.
316
+ *
317
+ * Returned ids are matched purely from the wire `api` (+ model-id family where a
318
+ * native tool only makes sense for that family); the providers themselves fail
319
+ * closed (and the chain falls through to DuckDuckGo) if the endpoint does not
320
+ * actually support web search.
321
+ */
322
+ export function activeContextNativeId(ctx: ActiveSearchModelContext | undefined): SearchProviderId | undefined {
323
+ if (!ctx || ctx.webSearch === "off") return undefined;
324
+ const modelId = (ctx.wireModelId ?? ctx.modelId).toLowerCase();
325
+ // Dispatch must match exactly what each provider can service by reusing the
326
+ // active credential: the OpenAI-compatible adapter only speaks the two plain
327
+ // OpenAI wires (not azure), and the Gemini active path only speaks the public
328
+ // Generative Language wire (not vertex/cloud-code). Returning an id the
329
+ // provider would reject just wastes a guaranteed-fail attempt before DuckDuckGo.
330
+ if (isAnthropicWire(ctx.api) && modelId.startsWith("claude-")) return "anthropic";
331
+ if (ctx.api === "openai-responses" || ctx.api === "openai-completions") return "openai-compatible";
332
+ if (ctx.api === "google-generative-ai" && modelId.startsWith("gemini-")) return "gemini";
333
+ return undefined;
282
334
  }
283
335
 
284
336
  export async function resolveProviderChain(options: ResolveProviderChainOptions): Promise<SearchProvider[]> {
@@ -304,9 +356,16 @@ export async function resolveProviderChain(options: ResolveProviderChainOptions)
304
356
  await appendAvailable(chain, directId, authStorage);
305
357
  const inferred = inferNativeProviderFromModel(activeModelContext);
306
358
  if (inferred) await appendAvailable(chain, inferred, authStorage);
307
- const hasNativeXai = chain.includes("xai");
308
- if (!hasNativeXai && (await shouldTryGenericOpenAICompat(authStorage, activeModelContext, sessionId, signal)))
309
- appendDeduped(chain, "openai-compatible");
359
+ // Native-over-proxy: when no canonical native provider was selected above,
360
+ // fall back to the model's own credentials (resolved under the active
361
+ // provider id against its baseUrl) to drive native web search. Gated on
362
+ // those credentials actually resolving; otherwise the chain ends at the
363
+ // keyless DuckDuckGo terminal fallback.
364
+ if (chain.length === 0) {
365
+ const activeNativeId = activeContextNativeId(activeModelContext);
366
+ if (activeNativeId && (await canUseGenericCredentials(authStorage, activeModelContext, sessionId, signal)))
367
+ chain.push(activeNativeId);
368
+ }
310
369
  }
311
370
 
312
371
  // Configured fallbacks are user-facing only: the internal `openai-compatible`