@gajae-code/coding-agent 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +38 -0
  2. package/dist/types/cli/mcp-cli.d.ts +25 -0
  3. package/dist/types/cli.d.ts +6 -0
  4. package/dist/types/commands/mcp.d.ts +70 -0
  5. package/dist/types/config/keybindings.d.ts +2 -2
  6. package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
  7. package/dist/types/modes/components/custom-editor.d.ts +1 -1
  8. package/dist/types/modes/components/model-selector.d.ts +2 -0
  9. package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
  10. package/dist/types/modes/theme/defaults/index.d.ts +99 -0
  11. package/dist/types/notifications/operator-runtime.d.ts +52 -0
  12. package/dist/types/notifications/telegram-daemon.d.ts +54 -16
  13. package/dist/types/notifications/topic-registry.d.ts +2 -0
  14. package/dist/types/tools/composer-bash-policy.d.ts +14 -0
  15. package/dist/types/web/insane/url-guard.d.ts +6 -3
  16. package/dist/types/web/scrapers/types.d.ts +5 -0
  17. package/dist/types/web/scrapers/utils.d.ts +7 -1
  18. package/package.json +7 -7
  19. package/src/cli/mcp-cli.ts +272 -0
  20. package/src/cli.ts +6 -2
  21. package/src/commands/mcp.ts +117 -0
  22. package/src/config/keybindings.ts +2 -2
  23. package/src/deep-interview/plaintext-gate-guard.ts +94 -0
  24. package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
  25. package/src/defaults/gjc/skills/team/SKILL.md +3 -2
  26. package/src/extensibility/extensions/runner.ts +1 -0
  27. package/src/gjc-runtime/tmux-common.ts +3 -1
  28. package/src/gjc-runtime/ultragoal-guard.ts +25 -8
  29. package/src/hooks/skill-state.ts +57 -0
  30. package/src/internal-urls/docs-index.generated.ts +10 -7
  31. package/src/modes/bridge/bridge-mode.ts +11 -0
  32. package/src/modes/components/custom-editor.ts +2 -0
  33. package/src/modes/components/footer.ts +2 -3
  34. package/src/modes/components/model-selector.ts +12 -0
  35. package/src/modes/components/status-line/git-utils.ts +25 -0
  36. package/src/modes/components/status-line.ts +10 -11
  37. package/src/modes/components/welcome.ts +2 -3
  38. package/src/modes/controllers/selector-controller.ts +3 -0
  39. package/src/modes/interactive-mode.ts +2 -1
  40. package/src/modes/shared/agent-wire/scopes.ts +1 -1
  41. package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
  42. package/src/modes/theme/defaults/index.ts +2 -0
  43. package/src/notifications/operator-runtime.ts +171 -0
  44. package/src/notifications/telegram-daemon.ts +347 -251
  45. package/src/notifications/topic-registry.ts +5 -0
  46. package/src/slash-commands/helpers/parse.ts +2 -1
  47. package/src/tools/bash.ts +9 -0
  48. package/src/tools/composer-bash-policy.ts +96 -0
  49. package/src/tools/fetch.ts +18 -2
  50. package/src/web/insane/url-guard.ts +18 -14
  51. package/src/web/scrapers/types.ts +143 -45
  52. package/src/web/scrapers/utils.ts +70 -19
@@ -65,6 +65,11 @@ export class TopicRegistry {
65
65
  return this.byTopic.get(topicId);
66
66
  }
67
67
 
68
+ /** All session ids with a persisted topic record. */
69
+ sessionIds(): string[] {
70
+ return [...this.topics.keys()];
71
+ }
72
+
68
73
  /** The existing topic record for a session, if any. */
69
74
  get(sessionId: string): TopicRecord | undefined {
70
75
  return this.topics.get(sessionId);
@@ -1,3 +1,4 @@
1
+ import { parseCommandArgs } from "../../utils/command-args";
1
2
  import type { ParsedSlashCommand, SlashCommandResult, SlashCommandRuntime } from "../types";
2
3
 
3
4
  export interface ParsedSubcommand {
@@ -65,7 +66,7 @@ export function errorMessage(error: unknown): string {
65
66
  * "name required" diagnostics with their own messaging.
66
67
  */
67
68
  export function parseNamedScopeArgs(rest: string, invalidScopeMessage: string): NamedScopeArgs {
68
- const tokens = rest.split(/\s+/).filter(Boolean);
69
+ const tokens = parseCommandArgs(rest);
69
70
  let name: string | undefined;
70
71
  let scope: ConfigScope = "project";
71
72
  let i = 0;
package/src/tools/bash.ts CHANGED
@@ -25,6 +25,7 @@ import { type BashInteractiveResult, runInteractiveBashPty } from "./bash-intera
25
25
  import { checkBashInterception } from "./bash-interceptor";
26
26
  import { canUseInteractiveBashPty } from "./bash-pty-selection";
27
27
  import { expandInternalUrls, type InternalUrlExpansionOptions } from "./bash-skill-urls";
28
+ import { checkComposerBashPolicy } from "./composer-bash-policy";
28
29
  import { formatStyledTruncationWarning, type OutputMeta, stripOutputNotice } from "./output-meta";
29
30
  import { resolveToCwd } from "./path-utils";
30
31
  import { formatToolWorkingDirectory, replaceTabs } from "./render-utils";
@@ -570,6 +571,14 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
570
571
  }
571
572
  }
572
573
 
574
+ const composerPolicy = checkComposerBashPolicy({
575
+ modelId: this.session.getActiveModelString?.() ?? this.session.getModelString?.() ?? this.session.model?.id,
576
+ commands: rawCommand === command ? [command] : [rawCommand, command],
577
+ });
578
+ if (!composerPolicy.allowed) {
579
+ throw new ToolError(composerPolicy.message);
580
+ }
581
+
573
582
  const internalUrlOptions: InternalUrlExpansionOptions = {
574
583
  skills: this.session.skills ?? [],
575
584
  internalRouter: InternalUrlRouter.instance(),
@@ -0,0 +1,96 @@
1
+ import { isComposerHarnessModel } from "@gajae-code/ai/providers/composer-discipline";
2
+
3
+ export const COMPOSER_BASH_POLICY_ERROR =
4
+ "Composer bash policy blocked repository file I/O. Use find, search, read, and edit tools for file discovery, file inspection, and file mutation.";
5
+
6
+ type ComposerBashPolicyResult =
7
+ | { allowed: true }
8
+ | {
9
+ allowed: false;
10
+ reason: string;
11
+ message: string;
12
+ };
13
+
14
+ const BLOCK_PATTERNS: Array<{ id: string; pattern: RegExp }> = [
15
+ { id: "pipe", pattern: /\|/ },
16
+ { id: "process-substitution", pattern: /<[>(]/ },
17
+ { id: "heredoc", pattern: /<<[-~]?/ },
18
+ { id: "command-substitution", pattern: /\$\(|`/ },
19
+ { id: "redirection", pattern: /(^|[^<>])(?:>>?|<)(?!=)/ },
20
+ { id: "tee", pattern: /(?:^|[;&|\s])tee(?:\s|$)/ },
21
+ {
22
+ id: "shell-file-read-discovery",
23
+ pattern: /(?:^|[;&|()\s])(?:\S*\/)?(?:cat|head|tail|less|more|grep|rg|find|fd|tree|ls)\b/,
24
+ },
25
+ {
26
+ id: "shell-file-mutation",
27
+ pattern: /(?:^|[;&|()\s])(?:\S*\/)?(?:cp|mv|rm|touch|mkdir|chmod|chown|ln)\b/,
28
+ },
29
+ { id: "sed-print", pattern: /(?:^|[;&|()\s])sed\s+(?:-[^\s]*n\b|.*\bp\b)/ },
30
+ { id: "awk-print", pattern: /(?:^|[;&|()\s])awk\b/ },
31
+ { id: "git-ls-files", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+ls-files\b/ },
32
+ { id: "git-grep", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+grep\b/ },
33
+ { id: "git-show-path", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+show\s+\S+:\S+/ },
34
+ { id: "git-diff", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+diff(?:\s|$)/ },
35
+ { id: "git-cat-file", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+cat-file\b/ },
36
+ {
37
+ id: "git-show-discovery",
38
+ pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+show\b.*(?:--name-only|--name-status|--stat)/,
39
+ },
40
+ {
41
+ id: "git-log-path-discovery",
42
+ pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+log\b.*(?:--name-only|--name-status|--stat)/,
43
+ },
44
+ { id: "sed-in-place", pattern: /(?:^|[;&|()\s])sed\s+-[^\s]*i\b/ },
45
+ { id: "perl-in-place", pattern: /(?:^|[;&|()\s])perl\s+-[^\s]*p[^\s]*i\b/ },
46
+ {
47
+ id: "script-file-io",
48
+ pattern:
49
+ /(?:^|[;&|()\s])(?:python3?|node|bun)\s+(?:-\s*<<|-c\b|-e\b|--eval\b).*?(?:read_text|read_bytes|write_text|iterdir|listdir|glob\.glob|readFile|readFileSync|writeFile|writeFileSync|readdir|readdirSync|stat|statSync|cpSync|rmSync|mkdirSync|createReadStream|createWriteStream|Bun\.file|Bun\.write|fs\.readFile|fs\.writeFile|fs\.readdir|fs\.stat|fs\.cp|fs\.rm|fs\.mkdir|open\s*\()/s,
50
+ },
51
+ {
52
+ id: "contaminated-command",
53
+ pattern: /```|^\s*(?:I\s+(?:will|need|am going)|We\s+(?:need|will)|First[, ]|Now[, ]|Let's)\b/im,
54
+ },
55
+ ];
56
+
57
+ const ALLOWED_TERMINAL_PATTERNS: RegExp[] = [
58
+ /^bun\s+test(?:\s+[\w./:@=-]+)*$/,
59
+ /^bun\s+run\s+(?:check(?::[\w-]+)?|test(?::[\w-]+)?|build(?::[\w-]+)?)(?:\s+[\w./:@=-]+)*$/,
60
+ /^bun\s+--version$/,
61
+ /^mise\s+x\s+bun@\d+\.\d+\.\d+\s+--\s+bun\s+test(?:\s+[\w./:@=-]+)*$/,
62
+ /^mise\s+x\s+bun@\d+\.\d+\.\d+\s+--\s+bun\s+run\s+(?:check(?::[\w-]+)?|test(?::[\w-]+)?|build(?::[\w-]+)?)(?:\s+[\w./:@=-]+)*$/,
63
+ /^cargo\s+(?:test|check|build)(?:\s+[\w./:@=-]+)*$/,
64
+ /^git\s+status(?:\s+--short)?(?:\s+--branch)?$/,
65
+ /^git\s+rev-parse\s+HEAD$/,
66
+ /^npm\s+--version$/,
67
+ /^pnpm\s+--version$/,
68
+ /^yarn\s+--version$/,
69
+ ];
70
+
71
+ function isAllowedComposerTerminalCommand(command: string): boolean {
72
+ const normalized = command.trim().replace(/\s+/g, " ");
73
+ return ALLOWED_TERMINAL_PATTERNS.some(pattern => pattern.test(normalized));
74
+ }
75
+
76
+ export function isComposerBashPolicyModel(modelId: string | undefined): boolean {
77
+ return Boolean(modelId && isComposerHarnessModel(modelId));
78
+ }
79
+
80
+ export function checkComposerBashPolicy(input: {
81
+ modelId?: string;
82
+ commands: readonly string[];
83
+ }): ComposerBashPolicyResult {
84
+ if (!isComposerBashPolicyModel(input.modelId)) return { allowed: true };
85
+ for (const command of input.commands) {
86
+ for (const block of BLOCK_PATTERNS) {
87
+ if (block.pattern.test(command)) {
88
+ return { allowed: false, reason: block.id, message: COMPOSER_BASH_POLICY_ERROR };
89
+ }
90
+ }
91
+ if (!isAllowedComposerTerminalCommand(command)) {
92
+ return { allowed: false, reason: "not-allowlisted", message: COMPOSER_BASH_POLICY_ERROR };
93
+ }
94
+ }
95
+ return { allowed: true };
96
+ }
@@ -17,7 +17,7 @@ import { CachedOutputBlock } from "../tui/output-block";
17
17
  import { formatDimensionNote, resizeImage } from "../utils/image-resize";
18
18
  import { ensureTool } from "../utils/tools-manager";
19
19
  import { INSANE_NOTES, tryInsaneFetch } from "../web/insane/bridge";
20
- import { validatePublicHttpUrlForInsane } from "../web/insane/url-guard";
20
+ import { validatePublicHttpUrl, validatePublicHttpUrlForInsane } from "../web/insane/url-guard";
21
21
  import { extractWithParallel, findParallelApiKey, getParallelExtractContent } from "../web/parallel";
22
22
  import { specialHandlers } from "../web/scrapers";
23
23
  import type { RenderResult } from "../web/scrapers/types";
@@ -789,6 +789,21 @@ async function renderUrl(
789
789
 
790
790
  // Step 0: Normalize URL (ensure scheme for special handlers)
791
791
  url = normalizeUrl(url);
792
+ const publicUrl = await validatePublicHttpUrl(url);
793
+ if (!publicUrl.ok) {
794
+ notes.push(`Blocked URL fetch: target URL is not public HTTP(S): ${publicUrl.reason}`);
795
+ return {
796
+ url,
797
+ finalUrl: url,
798
+ contentType: "unknown",
799
+ method: "failed",
800
+ content: "",
801
+ fetchedAt,
802
+ truncated: false,
803
+ notes,
804
+ };
805
+ }
806
+ url = publicUrl.url.toString();
792
807
 
793
808
  // Step 1: Try special handlers for known sites (unless raw mode)
794
809
  if (!raw) {
@@ -802,7 +817,8 @@ async function renderUrl(
802
817
  throw new ToolAbortError();
803
818
  }
804
819
  if (!response.ok) {
805
- const failureNote = response.status ? `Failed to fetch URL (HTTP ${response.status})` : "Failed to fetch URL";
820
+ const failureNote =
821
+ response.error ?? (response.status ? `Failed to fetch URL (HTTP ${response.status})` : "Failed to fetch URL");
806
822
  notes.push(failureNote);
807
823
  const insane = await tryInsaneFallback({
808
824
  url,
@@ -1,16 +1,13 @@
1
1
  /**
2
- * Public HTTP(S) URL guard for the insane-search read fallback.
2
+ * Public HTTP(S) URL guard for user-supplied web fetch targets.
3
3
  *
4
- * The vendored insane-search engine performs its own network requests (curl_cffi,
5
- * a real browser) entirely outside the TypeScript fetch path, so the normal
6
- * `loadPage()` flow cannot protect against SSRF. This guard MUST run before any
7
- * dependency probe or engine subprocess is spawned. It is fail-closed: anything
8
- * it cannot prove is a public, non-credentialed http/https target is rejected.
4
+ * Network-capable URL readers MUST run this guard before the first request and
5
+ * before following any redirect target. It is fail-closed: anything it cannot
6
+ * prove is a public, non-credentialed http/https target is rejected.
9
7
  *
10
- * It does NOT follow or re-validate redirects the engine may follow redirects
11
- * internally that this guard never sees. That residual risk is documented in the
12
- * plan and mitigated by validating the input target and keeping the feature
13
- * opt-in (default off).
8
+ * The vendored insane-search engine performs its own redirects outside the
9
+ * TypeScript fetch path, so its fallback remains opt-in and is guarded before
10
+ * any dependency probe or engine subprocess is spawned.
14
11
  */
15
12
  import * as dns from "node:dns/promises";
16
13
  import * as net from "node:net";
@@ -105,11 +102,11 @@ export function isPrivateOrSpecialAddress(address: string): boolean {
105
102
  }
106
103
 
107
104
  /**
108
- * Validate that `rawUrl` is a public http/https target safe to hand to the
109
- * insane-search engine. Resolves DNS names and rejects any that map to a
110
- * private/special address. Never throws; returns a discriminated result.
105
+ * Validate that `rawUrl` is a public http/https target. Resolves DNS names and
106
+ * rejects any that map to a private/special address. Never throws; returns a
107
+ * discriminated result.
111
108
  */
112
- export async function validatePublicHttpUrlForInsane(
109
+ export async function validatePublicHttpUrl(
113
110
  rawUrl: string,
114
111
  options: { resolver?: AddressResolver } = {},
115
112
  ): Promise<PublicUrlResult> {
@@ -153,3 +150,10 @@ export async function validatePublicHttpUrlForInsane(
153
150
  }
154
151
  return { ok: true, url, addresses };
155
152
  }
153
+
154
+ export async function validatePublicHttpUrlForInsane(
155
+ rawUrl: string,
156
+ options: { resolver?: AddressResolver } = {},
157
+ ): Promise<PublicUrlResult> {
158
+ return validatePublicHttpUrl(rawUrl, options);
159
+ }
@@ -6,6 +6,8 @@ import type TurndownService from "turndown";
6
6
 
7
7
  import type { AgentStorage } from "../../session/agent-storage";
8
8
  import { ToolAbortError } from "../../tools/tool-errors";
9
+ import type { AddressResolver } from "../insane/url-guard";
10
+ import { validatePublicHttpUrl } from "../insane/url-guard";
9
11
 
10
12
  export { formatNumber } from "@gajae-code/utils";
11
13
 
@@ -35,6 +37,7 @@ const USER_AGENTS = [
35
37
  "Mozilla/5.0 (compatible; TextBot/1.0)",
36
38
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
37
39
  ];
40
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
38
41
 
39
42
  function isBotBlocked(status: number, content: string): boolean {
40
43
  if (status === 403 || status === 503) {
@@ -70,6 +73,9 @@ export interface LoadPageOptions {
70
73
  body?: string;
71
74
  maxBytes?: number;
72
75
  signal?: AbortSignal;
76
+ publicUrlGuard?: boolean;
77
+ resolver?: AddressResolver;
78
+ maxRedirects?: number;
73
79
  }
74
80
 
75
81
  export interface LoadPageResult {
@@ -78,87 +84,179 @@ export interface LoadPageResult {
78
84
  finalUrl: string;
79
85
  ok: boolean;
80
86
  status?: number;
87
+ error?: string;
88
+ }
89
+
90
+ async function guardPublicFetchUrl(
91
+ rawUrl: string,
92
+ resolver: AddressResolver | undefined,
93
+ context: string,
94
+ ): Promise<{ ok: true; url: string } | { ok: false; error: string; finalUrl: string }> {
95
+ const guard = await validatePublicHttpUrl(rawUrl, { resolver });
96
+ if (guard.ok) return { ok: true, url: guard.url.toString() };
97
+ return {
98
+ ok: false,
99
+ error: `${context}: target URL is not public HTTP(S): ${guard.reason}`,
100
+ finalUrl: rawUrl,
101
+ };
102
+ }
103
+
104
+ function shouldRewriteRedirectMethod(status: number, method: string): boolean {
105
+ const normalized = method.toUpperCase();
106
+ return status === 303 || ((status === 301 || status === 302) && normalized === "POST");
81
107
  }
82
108
 
83
109
  /**
84
110
  * Fetch a page with timeout and size limit
85
111
  */
86
112
  export async function loadPage(url: string, options: LoadPageOptions = {}): Promise<LoadPageResult> {
87
- const { timeout = 20, headers = {}, maxBytes = MAX_BYTES, signal, method = "GET", body } = options;
113
+ const {
114
+ timeout = 20,
115
+ headers = {},
116
+ maxBytes = MAX_BYTES,
117
+ signal,
118
+ method = "GET",
119
+ body,
120
+ publicUrlGuard = true,
121
+ resolver,
122
+ maxRedirects = 10,
123
+ } = options;
124
+
125
+ let initialUrl = url;
126
+ if (publicUrlGuard) {
127
+ const guarded = await guardPublicFetchUrl(url, resolver, "Blocked URL fetch");
128
+ if (!guarded.ok) {
129
+ return {
130
+ content: "",
131
+ contentType: "",
132
+ finalUrl: guarded.finalUrl,
133
+ ok: false,
134
+ error: guarded.error,
135
+ };
136
+ }
137
+ initialUrl = guarded.url;
138
+ }
88
139
 
89
- for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
140
+ attempts: for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
90
141
  if (signal?.aborted) {
91
142
  throw new ToolAbortError();
92
143
  }
93
144
 
94
145
  const userAgent = USER_AGENTS[attempt];
95
146
  const requestSignal = ptree.combineSignals(signal, timeout * 1000);
147
+ let currentUrl = initialUrl;
148
+ let currentMethod = method;
149
+ let currentBody = body;
96
150
 
97
151
  try {
98
- const requestInit: RequestInit = {
99
- signal: requestSignal,
100
- method,
101
- headers: {
102
- "User-Agent": userAgent,
103
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
104
- "Accept-Language": "en-US,en;q=0.5",
105
- "Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
106
- ...headers,
107
- },
108
- redirect: "follow",
109
- };
152
+ for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
153
+ const requestInit: RequestInit = {
154
+ signal: requestSignal,
155
+ method: currentMethod,
156
+ headers: {
157
+ "User-Agent": userAgent,
158
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
159
+ "Accept-Language": "en-US,en;q=0.5",
160
+ "Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
161
+ ...headers,
162
+ },
163
+ redirect: "manual",
164
+ };
165
+
166
+ if (currentBody !== undefined) {
167
+ requestInit.body = currentBody;
168
+ }
110
169
 
111
- if (body !== undefined) {
112
- requestInit.body = body;
113
- }
170
+ const response = await fetch(currentUrl, requestInit);
171
+ if (REDIRECT_STATUSES.has(response.status)) {
172
+ const location = response.headers.get("location");
173
+ if (!location) {
174
+ return {
175
+ content: "",
176
+ contentType: "",
177
+ finalUrl: currentUrl,
178
+ ok: false,
179
+ status: response.status,
180
+ error: "Redirect response missing Location header",
181
+ };
182
+ }
183
+ const redirectUrl = new URL(location, currentUrl).toString();
184
+ if (publicUrlGuard) {
185
+ const guarded = await guardPublicFetchUrl(redirectUrl, resolver, "Blocked URL redirect");
186
+ if (!guarded.ok) {
187
+ return {
188
+ content: "",
189
+ contentType: "",
190
+ finalUrl: guarded.finalUrl,
191
+ ok: false,
192
+ status: response.status,
193
+ error: guarded.error,
194
+ };
195
+ }
196
+ currentUrl = guarded.url;
197
+ } else {
198
+ currentUrl = redirectUrl;
199
+ }
200
+ if (shouldRewriteRedirectMethod(response.status, currentMethod)) {
201
+ currentMethod = "GET";
202
+ currentBody = undefined;
203
+ }
204
+ continue;
205
+ }
114
206
 
115
- const response = await fetch(url, requestInit);
207
+ const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
208
+ const finalUrl = response.url || currentUrl;
116
209
 
117
- const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
118
- const finalUrl = response.url;
210
+ const reader = response.body?.getReader();
211
+ if (!reader) {
212
+ return { content: "", contentType, finalUrl, ok: false, status: response.status };
213
+ }
119
214
 
120
- const reader = response.body?.getReader();
121
- if (!reader) {
122
- return { content: "", contentType, finalUrl, ok: false, status: response.status };
123
- }
215
+ const chunks: Uint8Array[] = [];
216
+ let totalSize = 0;
124
217
 
125
- const chunks: Uint8Array[] = [];
126
- let totalSize = 0;
218
+ while (true) {
219
+ const { done, value } = await reader.read();
220
+ if (done) break;
127
221
 
128
- while (true) {
129
- const { done, value } = await reader.read();
130
- if (done) break;
222
+ chunks.push(value);
223
+ totalSize += value.length;
131
224
 
132
- chunks.push(value);
133
- totalSize += value.length;
225
+ if (totalSize > maxBytes) {
226
+ reader.cancel();
227
+ break;
228
+ }
229
+ }
134
230
 
135
- if (totalSize > maxBytes) {
136
- reader.cancel();
137
- break;
231
+ const content = Buffer.concat(chunks).toString("utf-8");
232
+ if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
233
+ continue attempts;
138
234
  }
139
- }
140
235
 
141
- const content = Buffer.concat(chunks).toString("utf-8");
142
- if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
143
- continue;
144
- }
236
+ if (!response.ok) {
237
+ return { content, contentType, finalUrl, ok: false, status: response.status };
238
+ }
145
239
 
146
- if (!response.ok) {
147
- return { content, contentType, finalUrl, ok: false, status: response.status };
240
+ return { content, contentType, finalUrl, ok: true, status: response.status };
148
241
  }
149
-
150
- return { content, contentType, finalUrl, ok: true, status: response.status };
242
+ return {
243
+ content: "",
244
+ contentType: "",
245
+ finalUrl: currentUrl,
246
+ ok: false,
247
+ error: `Too many redirects (${maxRedirects})`,
248
+ };
151
249
  } catch {
152
250
  if (signal?.aborted) {
153
251
  throw new ToolAbortError();
154
252
  }
155
253
  if (attempt === USER_AGENTS.length - 1) {
156
- return { content: "", contentType: "", finalUrl: url, ok: false };
254
+ return { content: "", contentType: "", finalUrl: currentUrl, ok: false };
157
255
  }
158
256
  }
159
257
  }
160
258
 
161
- return { content: "", contentType: "", finalUrl: url, ok: false };
259
+ return { content: "", contentType: "", finalUrl: initialUrl, ok: false };
162
260
  }
163
261
 
164
262
  /** Module-level Turndown instance — built lazily on first use. */
@@ -4,6 +4,8 @@ export { isRecord };
4
4
 
5
5
  import { ToolAbortError } from "../../tools/tool-errors";
6
6
  import { convertBufferWithMarkit } from "../../utils/markit";
7
+ import type { AddressResolver } from "../insane/url-guard";
8
+ import { validatePublicHttpUrl } from "../insane/url-guard";
7
9
  import { MAX_BYTES } from "./types";
8
10
 
9
11
  export function asRecord(value: unknown): Record<string, unknown> | null {
@@ -28,6 +30,14 @@ export interface BinaryFetchSuccess {
28
30
 
29
31
  export type BinaryFetchResult = BinaryFetchSuccess | { ok: false; error?: string };
30
32
 
33
+ export interface FetchBinaryOptions {
34
+ publicUrlGuard?: boolean;
35
+ resolver?: AddressResolver;
36
+ maxRedirects?: number;
37
+ }
38
+
39
+ const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
40
+
31
41
  async function readResponseWithLimit(response: Response, maxBytes: number, signal?: AbortSignal): Promise<Uint8Array> {
32
42
  const reader = response.body?.getReader();
33
43
  if (!reader) return new Uint8Array(0);
@@ -60,34 +70,75 @@ async function readResponseWithLimit(response: Response, maxBytes: number, signa
60
70
  return new Uint8Array(Buffer.concat(chunks, totalBytes));
61
71
  }
62
72
 
73
+ async function guardPublicBinaryUrl(
74
+ rawUrl: string,
75
+ resolver: AddressResolver | undefined,
76
+ context: string,
77
+ ): Promise<{ ok: true; url: string } | { ok: false; error: string }> {
78
+ const guard = await validatePublicHttpUrl(rawUrl, { resolver });
79
+ if (guard.ok) return { ok: true, url: guard.url.toString() };
80
+ return { ok: false, error: `${context}: target URL is not public HTTP(S): ${guard.reason}` };
81
+ }
82
+
63
83
  /**
64
84
  * Fetch binary content from a URL
65
85
  */
66
- export async function fetchBinary(url: string, timeout: number = 20, signal?: AbortSignal): Promise<BinaryFetchResult> {
86
+ export async function fetchBinary(
87
+ url: string,
88
+ timeout: number = 20,
89
+ signal?: AbortSignal,
90
+ options: FetchBinaryOptions = {},
91
+ ): Promise<BinaryFetchResult> {
67
92
  const requestSignal = ptree.combineSignals(signal, timeout * 1000);
93
+ const { publicUrlGuard = true, resolver, maxRedirects = 10 } = options;
68
94
  try {
69
- const response = await fetch(url, {
70
- signal: requestSignal,
71
- headers: {
72
- "User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
73
- },
74
- redirect: "follow",
75
- });
76
-
77
- if (!response.ok) {
78
- return { ok: false, error: `HTTP ${response.status}` };
95
+ let currentUrl = url;
96
+ if (publicUrlGuard) {
97
+ const guarded = await guardPublicBinaryUrl(url, resolver, "Blocked binary fetch");
98
+ if (!guarded.ok) return { ok: false, error: guarded.error };
99
+ currentUrl = guarded.url;
79
100
  }
80
101
 
81
- const contentDisposition = response.headers.get("content-disposition") || undefined;
82
- const contentLength = response.headers.get("content-length");
83
- if (contentLength) {
84
- const size = Number.parseInt(contentLength, 10);
85
- if (Number.isFinite(size) && size > MAX_BYTES) {
86
- return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
102
+ for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
103
+ const response = await fetch(currentUrl, {
104
+ signal: requestSignal,
105
+ headers: {
106
+ "User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
107
+ },
108
+ redirect: "manual",
109
+ });
110
+
111
+ if (REDIRECT_STATUSES.has(response.status)) {
112
+ const location = response.headers.get("location");
113
+ if (!location) return { ok: false, error: "Redirect response missing Location header" };
114
+ const redirectUrl = new URL(location, currentUrl).toString();
115
+ if (publicUrlGuard) {
116
+ const guarded = await guardPublicBinaryUrl(redirectUrl, resolver, "Blocked binary redirect");
117
+ if (!guarded.ok) return { ok: false, error: guarded.error };
118
+ currentUrl = guarded.url;
119
+ } else {
120
+ currentUrl = redirectUrl;
121
+ }
122
+ continue;
123
+ }
124
+
125
+ if (!response.ok) {
126
+ return { ok: false, error: `HTTP ${response.status}` };
87
127
  }
128
+
129
+ const contentDisposition = response.headers.get("content-disposition") || undefined;
130
+ const contentLength = response.headers.get("content-length");
131
+ if (contentLength) {
132
+ const size = Number.parseInt(contentLength, 10);
133
+ if (Number.isFinite(size) && size > MAX_BYTES) {
134
+ return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
135
+ }
136
+ }
137
+ const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
138
+ return { ok: true, buffer, contentDisposition };
88
139
  }
89
- const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
90
- return { ok: true, buffer, contentDisposition };
140
+
141
+ return { ok: false, error: `Too many redirects (${maxRedirects})` };
91
142
  } catch (err) {
92
143
  if (signal?.aborted) throw new ToolAbortError();
93
144
  if (requestSignal?.aborted) return { ok: false, error: "aborted" };