@gajae-code/coding-agent 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/types/cli/mcp-cli.d.ts +25 -0
- package/dist/types/cli.d.ts +6 -0
- package/dist/types/commands/mcp.d.ts +70 -0
- package/dist/types/config/keybindings.d.ts +2 -2
- package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
- package/dist/types/modes/components/custom-editor.d.ts +1 -1
- package/dist/types/modes/components/model-selector.d.ts +2 -0
- package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
- package/dist/types/modes/theme/defaults/index.d.ts +99 -0
- package/dist/types/notifications/operator-runtime.d.ts +52 -0
- package/dist/types/notifications/telegram-daemon.d.ts +54 -16
- package/dist/types/notifications/topic-registry.d.ts +2 -0
- package/dist/types/tools/composer-bash-policy.d.ts +14 -0
- package/dist/types/web/insane/url-guard.d.ts +6 -3
- package/dist/types/web/scrapers/types.d.ts +5 -0
- package/dist/types/web/scrapers/utils.d.ts +7 -1
- package/package.json +7 -7
- package/src/cli/mcp-cli.ts +272 -0
- package/src/cli.ts +6 -2
- package/src/commands/mcp.ts +117 -0
- package/src/config/keybindings.ts +2 -2
- package/src/deep-interview/plaintext-gate-guard.ts +94 -0
- package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
- package/src/defaults/gjc/skills/team/SKILL.md +3 -2
- package/src/extensibility/extensions/runner.ts +1 -0
- package/src/gjc-runtime/tmux-common.ts +3 -1
- package/src/gjc-runtime/ultragoal-guard.ts +25 -8
- package/src/hooks/skill-state.ts +57 -0
- package/src/internal-urls/docs-index.generated.ts +10 -7
- package/src/modes/bridge/bridge-mode.ts +11 -0
- package/src/modes/components/custom-editor.ts +2 -0
- package/src/modes/components/footer.ts +2 -3
- package/src/modes/components/model-selector.ts +12 -0
- package/src/modes/components/status-line/git-utils.ts +25 -0
- package/src/modes/components/status-line.ts +10 -11
- package/src/modes/components/welcome.ts +2 -3
- package/src/modes/controllers/selector-controller.ts +3 -0
- package/src/modes/interactive-mode.ts +2 -1
- package/src/modes/shared/agent-wire/scopes.ts +1 -1
- package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
- package/src/modes/theme/defaults/index.ts +2 -0
- package/src/notifications/operator-runtime.ts +171 -0
- package/src/notifications/telegram-daemon.ts +347 -251
- package/src/notifications/topic-registry.ts +5 -0
- package/src/slash-commands/helpers/parse.ts +2 -1
- package/src/tools/bash.ts +9 -0
- package/src/tools/composer-bash-policy.ts +96 -0
- package/src/tools/fetch.ts +18 -2
- package/src/web/insane/url-guard.ts +18 -14
- package/src/web/scrapers/types.ts +143 -45
- package/src/web/scrapers/utils.ts +70 -19
|
@@ -65,6 +65,11 @@ export class TopicRegistry {
|
|
|
65
65
|
return this.byTopic.get(topicId);
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
/** All session ids with a persisted topic record. */
|
|
69
|
+
sessionIds(): string[] {
|
|
70
|
+
return [...this.topics.keys()];
|
|
71
|
+
}
|
|
72
|
+
|
|
68
73
|
/** The existing topic record for a session, if any. */
|
|
69
74
|
get(sessionId: string): TopicRecord | undefined {
|
|
70
75
|
return this.topics.get(sessionId);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { parseCommandArgs } from "../../utils/command-args";
|
|
1
2
|
import type { ParsedSlashCommand, SlashCommandResult, SlashCommandRuntime } from "../types";
|
|
2
3
|
|
|
3
4
|
export interface ParsedSubcommand {
|
|
@@ -65,7 +66,7 @@ export function errorMessage(error: unknown): string {
|
|
|
65
66
|
* "name required" diagnostics with their own messaging.
|
|
66
67
|
*/
|
|
67
68
|
export function parseNamedScopeArgs(rest: string, invalidScopeMessage: string): NamedScopeArgs {
|
|
68
|
-
const tokens = rest
|
|
69
|
+
const tokens = parseCommandArgs(rest);
|
|
69
70
|
let name: string | undefined;
|
|
70
71
|
let scope: ConfigScope = "project";
|
|
71
72
|
let i = 0;
|
package/src/tools/bash.ts
CHANGED
|
@@ -25,6 +25,7 @@ import { type BashInteractiveResult, runInteractiveBashPty } from "./bash-intera
|
|
|
25
25
|
import { checkBashInterception } from "./bash-interceptor";
|
|
26
26
|
import { canUseInteractiveBashPty } from "./bash-pty-selection";
|
|
27
27
|
import { expandInternalUrls, type InternalUrlExpansionOptions } from "./bash-skill-urls";
|
|
28
|
+
import { checkComposerBashPolicy } from "./composer-bash-policy";
|
|
28
29
|
import { formatStyledTruncationWarning, type OutputMeta, stripOutputNotice } from "./output-meta";
|
|
29
30
|
import { resolveToCwd } from "./path-utils";
|
|
30
31
|
import { formatToolWorkingDirectory, replaceTabs } from "./render-utils";
|
|
@@ -570,6 +571,14 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
570
571
|
}
|
|
571
572
|
}
|
|
572
573
|
|
|
574
|
+
const composerPolicy = checkComposerBashPolicy({
|
|
575
|
+
modelId: this.session.getActiveModelString?.() ?? this.session.getModelString?.() ?? this.session.model?.id,
|
|
576
|
+
commands: rawCommand === command ? [command] : [rawCommand, command],
|
|
577
|
+
});
|
|
578
|
+
if (!composerPolicy.allowed) {
|
|
579
|
+
throw new ToolError(composerPolicy.message);
|
|
580
|
+
}
|
|
581
|
+
|
|
573
582
|
const internalUrlOptions: InternalUrlExpansionOptions = {
|
|
574
583
|
skills: this.session.skills ?? [],
|
|
575
584
|
internalRouter: InternalUrlRouter.instance(),
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { isComposerHarnessModel } from "@gajae-code/ai/providers/composer-discipline";
|
|
2
|
+
|
|
3
|
+
export const COMPOSER_BASH_POLICY_ERROR =
|
|
4
|
+
"Composer bash policy blocked repository file I/O. Use find, search, read, and edit tools for file discovery, file inspection, and file mutation.";
|
|
5
|
+
|
|
6
|
+
type ComposerBashPolicyResult =
|
|
7
|
+
| { allowed: true }
|
|
8
|
+
| {
|
|
9
|
+
allowed: false;
|
|
10
|
+
reason: string;
|
|
11
|
+
message: string;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
const BLOCK_PATTERNS: Array<{ id: string; pattern: RegExp }> = [
|
|
15
|
+
{ id: "pipe", pattern: /\|/ },
|
|
16
|
+
{ id: "process-substitution", pattern: /<[>(]/ },
|
|
17
|
+
{ id: "heredoc", pattern: /<<[-~]?/ },
|
|
18
|
+
{ id: "command-substitution", pattern: /\$\(|`/ },
|
|
19
|
+
{ id: "redirection", pattern: /(^|[^<>])(?:>>?|<)(?!=)/ },
|
|
20
|
+
{ id: "tee", pattern: /(?:^|[;&|\s])tee(?:\s|$)/ },
|
|
21
|
+
{
|
|
22
|
+
id: "shell-file-read-discovery",
|
|
23
|
+
pattern: /(?:^|[;&|()\s])(?:\S*\/)?(?:cat|head|tail|less|more|grep|rg|find|fd|tree|ls)\b/,
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
id: "shell-file-mutation",
|
|
27
|
+
pattern: /(?:^|[;&|()\s])(?:\S*\/)?(?:cp|mv|rm|touch|mkdir|chmod|chown|ln)\b/,
|
|
28
|
+
},
|
|
29
|
+
{ id: "sed-print", pattern: /(?:^|[;&|()\s])sed\s+(?:-[^\s]*n\b|.*\bp\b)/ },
|
|
30
|
+
{ id: "awk-print", pattern: /(?:^|[;&|()\s])awk\b/ },
|
|
31
|
+
{ id: "git-ls-files", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+ls-files\b/ },
|
|
32
|
+
{ id: "git-grep", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+grep\b/ },
|
|
33
|
+
{ id: "git-show-path", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+show\s+\S+:\S+/ },
|
|
34
|
+
{ id: "git-diff", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+diff(?:\s|$)/ },
|
|
35
|
+
{ id: "git-cat-file", pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+cat-file\b/ },
|
|
36
|
+
{
|
|
37
|
+
id: "git-show-discovery",
|
|
38
|
+
pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+show\b.*(?:--name-only|--name-status|--stat)/,
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
id: "git-log-path-discovery",
|
|
42
|
+
pattern: /(?:^|[;&|()\s])git(?:\s+-C\s+\S+)?\s+log\b.*(?:--name-only|--name-status|--stat)/,
|
|
43
|
+
},
|
|
44
|
+
{ id: "sed-in-place", pattern: /(?:^|[;&|()\s])sed\s+-[^\s]*i\b/ },
|
|
45
|
+
{ id: "perl-in-place", pattern: /(?:^|[;&|()\s])perl\s+-[^\s]*p[^\s]*i\b/ },
|
|
46
|
+
{
|
|
47
|
+
id: "script-file-io",
|
|
48
|
+
pattern:
|
|
49
|
+
/(?:^|[;&|()\s])(?:python3?|node|bun)\s+(?:-\s*<<|-c\b|-e\b|--eval\b).*?(?:read_text|read_bytes|write_text|iterdir|listdir|glob\.glob|readFile|readFileSync|writeFile|writeFileSync|readdir|readdirSync|stat|statSync|cpSync|rmSync|mkdirSync|createReadStream|createWriteStream|Bun\.file|Bun\.write|fs\.readFile|fs\.writeFile|fs\.readdir|fs\.stat|fs\.cp|fs\.rm|fs\.mkdir|open\s*\()/s,
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: "contaminated-command",
|
|
53
|
+
pattern: /```|^\s*(?:I\s+(?:will|need|am going)|We\s+(?:need|will)|First[, ]|Now[, ]|Let's)\b/im,
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
const ALLOWED_TERMINAL_PATTERNS: RegExp[] = [
|
|
58
|
+
/^bun\s+test(?:\s+[\w./:@=-]+)*$/,
|
|
59
|
+
/^bun\s+run\s+(?:check(?::[\w-]+)?|test(?::[\w-]+)?|build(?::[\w-]+)?)(?:\s+[\w./:@=-]+)*$/,
|
|
60
|
+
/^bun\s+--version$/,
|
|
61
|
+
/^mise\s+x\s+bun@\d+\.\d+\.\d+\s+--\s+bun\s+test(?:\s+[\w./:@=-]+)*$/,
|
|
62
|
+
/^mise\s+x\s+bun@\d+\.\d+\.\d+\s+--\s+bun\s+run\s+(?:check(?::[\w-]+)?|test(?::[\w-]+)?|build(?::[\w-]+)?)(?:\s+[\w./:@=-]+)*$/,
|
|
63
|
+
/^cargo\s+(?:test|check|build)(?:\s+[\w./:@=-]+)*$/,
|
|
64
|
+
/^git\s+status(?:\s+--short)?(?:\s+--branch)?$/,
|
|
65
|
+
/^git\s+rev-parse\s+HEAD$/,
|
|
66
|
+
/^npm\s+--version$/,
|
|
67
|
+
/^pnpm\s+--version$/,
|
|
68
|
+
/^yarn\s+--version$/,
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
function isAllowedComposerTerminalCommand(command: string): boolean {
|
|
72
|
+
const normalized = command.trim().replace(/\s+/g, " ");
|
|
73
|
+
return ALLOWED_TERMINAL_PATTERNS.some(pattern => pattern.test(normalized));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function isComposerBashPolicyModel(modelId: string | undefined): boolean {
|
|
77
|
+
return Boolean(modelId && isComposerHarnessModel(modelId));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function checkComposerBashPolicy(input: {
|
|
81
|
+
modelId?: string;
|
|
82
|
+
commands: readonly string[];
|
|
83
|
+
}): ComposerBashPolicyResult {
|
|
84
|
+
if (!isComposerBashPolicyModel(input.modelId)) return { allowed: true };
|
|
85
|
+
for (const command of input.commands) {
|
|
86
|
+
for (const block of BLOCK_PATTERNS) {
|
|
87
|
+
if (block.pattern.test(command)) {
|
|
88
|
+
return { allowed: false, reason: block.id, message: COMPOSER_BASH_POLICY_ERROR };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (!isAllowedComposerTerminalCommand(command)) {
|
|
92
|
+
return { allowed: false, reason: "not-allowlisted", message: COMPOSER_BASH_POLICY_ERROR };
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { allowed: true };
|
|
96
|
+
}
|
package/src/tools/fetch.ts
CHANGED
|
@@ -17,7 +17,7 @@ import { CachedOutputBlock } from "../tui/output-block";
|
|
|
17
17
|
import { formatDimensionNote, resizeImage } from "../utils/image-resize";
|
|
18
18
|
import { ensureTool } from "../utils/tools-manager";
|
|
19
19
|
import { INSANE_NOTES, tryInsaneFetch } from "../web/insane/bridge";
|
|
20
|
-
import { validatePublicHttpUrlForInsane } from "../web/insane/url-guard";
|
|
20
|
+
import { validatePublicHttpUrl, validatePublicHttpUrlForInsane } from "../web/insane/url-guard";
|
|
21
21
|
import { extractWithParallel, findParallelApiKey, getParallelExtractContent } from "../web/parallel";
|
|
22
22
|
import { specialHandlers } from "../web/scrapers";
|
|
23
23
|
import type { RenderResult } from "../web/scrapers/types";
|
|
@@ -789,6 +789,21 @@ async function renderUrl(
|
|
|
789
789
|
|
|
790
790
|
// Step 0: Normalize URL (ensure scheme for special handlers)
|
|
791
791
|
url = normalizeUrl(url);
|
|
792
|
+
const publicUrl = await validatePublicHttpUrl(url);
|
|
793
|
+
if (!publicUrl.ok) {
|
|
794
|
+
notes.push(`Blocked URL fetch: target URL is not public HTTP(S): ${publicUrl.reason}`);
|
|
795
|
+
return {
|
|
796
|
+
url,
|
|
797
|
+
finalUrl: url,
|
|
798
|
+
contentType: "unknown",
|
|
799
|
+
method: "failed",
|
|
800
|
+
content: "",
|
|
801
|
+
fetchedAt,
|
|
802
|
+
truncated: false,
|
|
803
|
+
notes,
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
url = publicUrl.url.toString();
|
|
792
807
|
|
|
793
808
|
// Step 1: Try special handlers for known sites (unless raw mode)
|
|
794
809
|
if (!raw) {
|
|
@@ -802,7 +817,8 @@ async function renderUrl(
|
|
|
802
817
|
throw new ToolAbortError();
|
|
803
818
|
}
|
|
804
819
|
if (!response.ok) {
|
|
805
|
-
const failureNote =
|
|
820
|
+
const failureNote =
|
|
821
|
+
response.error ?? (response.status ? `Failed to fetch URL (HTTP ${response.status})` : "Failed to fetch URL");
|
|
806
822
|
notes.push(failureNote);
|
|
807
823
|
const insane = await tryInsaneFallback({
|
|
808
824
|
url,
|
|
@@ -1,16 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Public HTTP(S) URL guard for
|
|
2
|
+
* Public HTTP(S) URL guard for user-supplied web fetch targets.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* dependency probe or engine subprocess is spawned. It is fail-closed: anything
|
|
8
|
-
* it cannot prove is a public, non-credentialed http/https target is rejected.
|
|
4
|
+
* Network-capable URL readers MUST run this guard before the first request and
|
|
5
|
+
* before following any redirect target. It is fail-closed: anything it cannot
|
|
6
|
+
* prove is a public, non-credentialed http/https target is rejected.
|
|
9
7
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* opt-in (default off).
|
|
8
|
+
* The vendored insane-search engine performs its own redirects outside the
|
|
9
|
+
* TypeScript fetch path, so its fallback remains opt-in and is guarded before
|
|
10
|
+
* any dependency probe or engine subprocess is spawned.
|
|
14
11
|
*/
|
|
15
12
|
import * as dns from "node:dns/promises";
|
|
16
13
|
import * as net from "node:net";
|
|
@@ -105,11 +102,11 @@ export function isPrivateOrSpecialAddress(address: string): boolean {
|
|
|
105
102
|
}
|
|
106
103
|
|
|
107
104
|
/**
|
|
108
|
-
* Validate that `rawUrl` is a public http/https target
|
|
109
|
-
*
|
|
110
|
-
*
|
|
105
|
+
* Validate that `rawUrl` is a public http/https target. Resolves DNS names and
|
|
106
|
+
* rejects any that map to a private/special address. Never throws; returns a
|
|
107
|
+
* discriminated result.
|
|
111
108
|
*/
|
|
112
|
-
export async function
|
|
109
|
+
export async function validatePublicHttpUrl(
|
|
113
110
|
rawUrl: string,
|
|
114
111
|
options: { resolver?: AddressResolver } = {},
|
|
115
112
|
): Promise<PublicUrlResult> {
|
|
@@ -153,3 +150,10 @@ export async function validatePublicHttpUrlForInsane(
|
|
|
153
150
|
}
|
|
154
151
|
return { ok: true, url, addresses };
|
|
155
152
|
}
|
|
153
|
+
|
|
154
|
+
export async function validatePublicHttpUrlForInsane(
|
|
155
|
+
rawUrl: string,
|
|
156
|
+
options: { resolver?: AddressResolver } = {},
|
|
157
|
+
): Promise<PublicUrlResult> {
|
|
158
|
+
return validatePublicHttpUrl(rawUrl, options);
|
|
159
|
+
}
|
|
@@ -6,6 +6,8 @@ import type TurndownService from "turndown";
|
|
|
6
6
|
|
|
7
7
|
import type { AgentStorage } from "../../session/agent-storage";
|
|
8
8
|
import { ToolAbortError } from "../../tools/tool-errors";
|
|
9
|
+
import type { AddressResolver } from "../insane/url-guard";
|
|
10
|
+
import { validatePublicHttpUrl } from "../insane/url-guard";
|
|
9
11
|
|
|
10
12
|
export { formatNumber } from "@gajae-code/utils";
|
|
11
13
|
|
|
@@ -35,6 +37,7 @@ const USER_AGENTS = [
|
|
|
35
37
|
"Mozilla/5.0 (compatible; TextBot/1.0)",
|
|
36
38
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
37
39
|
];
|
|
40
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
38
41
|
|
|
39
42
|
function isBotBlocked(status: number, content: string): boolean {
|
|
40
43
|
if (status === 403 || status === 503) {
|
|
@@ -70,6 +73,9 @@ export interface LoadPageOptions {
|
|
|
70
73
|
body?: string;
|
|
71
74
|
maxBytes?: number;
|
|
72
75
|
signal?: AbortSignal;
|
|
76
|
+
publicUrlGuard?: boolean;
|
|
77
|
+
resolver?: AddressResolver;
|
|
78
|
+
maxRedirects?: number;
|
|
73
79
|
}
|
|
74
80
|
|
|
75
81
|
export interface LoadPageResult {
|
|
@@ -78,87 +84,179 @@ export interface LoadPageResult {
|
|
|
78
84
|
finalUrl: string;
|
|
79
85
|
ok: boolean;
|
|
80
86
|
status?: number;
|
|
87
|
+
error?: string;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async function guardPublicFetchUrl(
|
|
91
|
+
rawUrl: string,
|
|
92
|
+
resolver: AddressResolver | undefined,
|
|
93
|
+
context: string,
|
|
94
|
+
): Promise<{ ok: true; url: string } | { ok: false; error: string; finalUrl: string }> {
|
|
95
|
+
const guard = await validatePublicHttpUrl(rawUrl, { resolver });
|
|
96
|
+
if (guard.ok) return { ok: true, url: guard.url.toString() };
|
|
97
|
+
return {
|
|
98
|
+
ok: false,
|
|
99
|
+
error: `${context}: target URL is not public HTTP(S): ${guard.reason}`,
|
|
100
|
+
finalUrl: rawUrl,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function shouldRewriteRedirectMethod(status: number, method: string): boolean {
|
|
105
|
+
const normalized = method.toUpperCase();
|
|
106
|
+
return status === 303 || ((status === 301 || status === 302) && normalized === "POST");
|
|
81
107
|
}
|
|
82
108
|
|
|
83
109
|
/**
|
|
84
110
|
* Fetch a page with timeout and size limit
|
|
85
111
|
*/
|
|
86
112
|
export async function loadPage(url: string, options: LoadPageOptions = {}): Promise<LoadPageResult> {
|
|
87
|
-
const {
|
|
113
|
+
const {
|
|
114
|
+
timeout = 20,
|
|
115
|
+
headers = {},
|
|
116
|
+
maxBytes = MAX_BYTES,
|
|
117
|
+
signal,
|
|
118
|
+
method = "GET",
|
|
119
|
+
body,
|
|
120
|
+
publicUrlGuard = true,
|
|
121
|
+
resolver,
|
|
122
|
+
maxRedirects = 10,
|
|
123
|
+
} = options;
|
|
124
|
+
|
|
125
|
+
let initialUrl = url;
|
|
126
|
+
if (publicUrlGuard) {
|
|
127
|
+
const guarded = await guardPublicFetchUrl(url, resolver, "Blocked URL fetch");
|
|
128
|
+
if (!guarded.ok) {
|
|
129
|
+
return {
|
|
130
|
+
content: "",
|
|
131
|
+
contentType: "",
|
|
132
|
+
finalUrl: guarded.finalUrl,
|
|
133
|
+
ok: false,
|
|
134
|
+
error: guarded.error,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
initialUrl = guarded.url;
|
|
138
|
+
}
|
|
88
139
|
|
|
89
|
-
for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
|
|
140
|
+
attempts: for (let attempt = 0; attempt < USER_AGENTS.length; attempt++) {
|
|
90
141
|
if (signal?.aborted) {
|
|
91
142
|
throw new ToolAbortError();
|
|
92
143
|
}
|
|
93
144
|
|
|
94
145
|
const userAgent = USER_AGENTS[attempt];
|
|
95
146
|
const requestSignal = ptree.combineSignals(signal, timeout * 1000);
|
|
147
|
+
let currentUrl = initialUrl;
|
|
148
|
+
let currentMethod = method;
|
|
149
|
+
let currentBody = body;
|
|
96
150
|
|
|
97
151
|
try {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
152
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
|
|
153
|
+
const requestInit: RequestInit = {
|
|
154
|
+
signal: requestSignal,
|
|
155
|
+
method: currentMethod,
|
|
156
|
+
headers: {
|
|
157
|
+
"User-Agent": userAgent,
|
|
158
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
159
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
160
|
+
"Accept-Encoding": "identity", // Cloudflare Markdown-for-Agents returns corrupted bytes when compression is negotiated
|
|
161
|
+
...headers,
|
|
162
|
+
},
|
|
163
|
+
redirect: "manual",
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
if (currentBody !== undefined) {
|
|
167
|
+
requestInit.body = currentBody;
|
|
168
|
+
}
|
|
110
169
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
170
|
+
const response = await fetch(currentUrl, requestInit);
|
|
171
|
+
if (REDIRECT_STATUSES.has(response.status)) {
|
|
172
|
+
const location = response.headers.get("location");
|
|
173
|
+
if (!location) {
|
|
174
|
+
return {
|
|
175
|
+
content: "",
|
|
176
|
+
contentType: "",
|
|
177
|
+
finalUrl: currentUrl,
|
|
178
|
+
ok: false,
|
|
179
|
+
status: response.status,
|
|
180
|
+
error: "Redirect response missing Location header",
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
const redirectUrl = new URL(location, currentUrl).toString();
|
|
184
|
+
if (publicUrlGuard) {
|
|
185
|
+
const guarded = await guardPublicFetchUrl(redirectUrl, resolver, "Blocked URL redirect");
|
|
186
|
+
if (!guarded.ok) {
|
|
187
|
+
return {
|
|
188
|
+
content: "",
|
|
189
|
+
contentType: "",
|
|
190
|
+
finalUrl: guarded.finalUrl,
|
|
191
|
+
ok: false,
|
|
192
|
+
status: response.status,
|
|
193
|
+
error: guarded.error,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
currentUrl = guarded.url;
|
|
197
|
+
} else {
|
|
198
|
+
currentUrl = redirectUrl;
|
|
199
|
+
}
|
|
200
|
+
if (shouldRewriteRedirectMethod(response.status, currentMethod)) {
|
|
201
|
+
currentMethod = "GET";
|
|
202
|
+
currentBody = undefined;
|
|
203
|
+
}
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
114
206
|
|
|
115
|
-
|
|
207
|
+
const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase() ?? "";
|
|
208
|
+
const finalUrl = response.url || currentUrl;
|
|
116
209
|
|
|
117
|
-
|
|
118
|
-
|
|
210
|
+
const reader = response.body?.getReader();
|
|
211
|
+
if (!reader) {
|
|
212
|
+
return { content: "", contentType, finalUrl, ok: false, status: response.status };
|
|
213
|
+
}
|
|
119
214
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
return { content: "", contentType, finalUrl, ok: false, status: response.status };
|
|
123
|
-
}
|
|
215
|
+
const chunks: Uint8Array[] = [];
|
|
216
|
+
let totalSize = 0;
|
|
124
217
|
|
|
125
|
-
|
|
126
|
-
|
|
218
|
+
while (true) {
|
|
219
|
+
const { done, value } = await reader.read();
|
|
220
|
+
if (done) break;
|
|
127
221
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if (done) break;
|
|
222
|
+
chunks.push(value);
|
|
223
|
+
totalSize += value.length;
|
|
131
224
|
|
|
132
|
-
|
|
133
|
-
|
|
225
|
+
if (totalSize > maxBytes) {
|
|
226
|
+
reader.cancel();
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
134
230
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
231
|
+
const content = Buffer.concat(chunks).toString("utf-8");
|
|
232
|
+
if (isBotBlocked(response.status, content) && attempt < USER_AGENTS.length - 1) {
|
|
233
|
+
continue attempts;
|
|
138
234
|
}
|
|
139
|
-
}
|
|
140
235
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
}
|
|
236
|
+
if (!response.ok) {
|
|
237
|
+
return { content, contentType, finalUrl, ok: false, status: response.status };
|
|
238
|
+
}
|
|
145
239
|
|
|
146
|
-
|
|
147
|
-
return { content, contentType, finalUrl, ok: false, status: response.status };
|
|
240
|
+
return { content, contentType, finalUrl, ok: true, status: response.status };
|
|
148
241
|
}
|
|
149
|
-
|
|
150
|
-
|
|
242
|
+
return {
|
|
243
|
+
content: "",
|
|
244
|
+
contentType: "",
|
|
245
|
+
finalUrl: currentUrl,
|
|
246
|
+
ok: false,
|
|
247
|
+
error: `Too many redirects (${maxRedirects})`,
|
|
248
|
+
};
|
|
151
249
|
} catch {
|
|
152
250
|
if (signal?.aborted) {
|
|
153
251
|
throw new ToolAbortError();
|
|
154
252
|
}
|
|
155
253
|
if (attempt === USER_AGENTS.length - 1) {
|
|
156
|
-
return { content: "", contentType: "", finalUrl:
|
|
254
|
+
return { content: "", contentType: "", finalUrl: currentUrl, ok: false };
|
|
157
255
|
}
|
|
158
256
|
}
|
|
159
257
|
}
|
|
160
258
|
|
|
161
|
-
return { content: "", contentType: "", finalUrl:
|
|
259
|
+
return { content: "", contentType: "", finalUrl: initialUrl, ok: false };
|
|
162
260
|
}
|
|
163
261
|
|
|
164
262
|
/** Module-level Turndown instance — built lazily on first use. */
|
|
@@ -4,6 +4,8 @@ export { isRecord };
|
|
|
4
4
|
|
|
5
5
|
import { ToolAbortError } from "../../tools/tool-errors";
|
|
6
6
|
import { convertBufferWithMarkit } from "../../utils/markit";
|
|
7
|
+
import type { AddressResolver } from "../insane/url-guard";
|
|
8
|
+
import { validatePublicHttpUrl } from "../insane/url-guard";
|
|
7
9
|
import { MAX_BYTES } from "./types";
|
|
8
10
|
|
|
9
11
|
export function asRecord(value: unknown): Record<string, unknown> | null {
|
|
@@ -28,6 +30,14 @@ export interface BinaryFetchSuccess {
|
|
|
28
30
|
|
|
29
31
|
export type BinaryFetchResult = BinaryFetchSuccess | { ok: false; error?: string };
|
|
30
32
|
|
|
33
|
+
export interface FetchBinaryOptions {
|
|
34
|
+
publicUrlGuard?: boolean;
|
|
35
|
+
resolver?: AddressResolver;
|
|
36
|
+
maxRedirects?: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
40
|
+
|
|
31
41
|
async function readResponseWithLimit(response: Response, maxBytes: number, signal?: AbortSignal): Promise<Uint8Array> {
|
|
32
42
|
const reader = response.body?.getReader();
|
|
33
43
|
if (!reader) return new Uint8Array(0);
|
|
@@ -60,34 +70,75 @@ async function readResponseWithLimit(response: Response, maxBytes: number, signa
|
|
|
60
70
|
return new Uint8Array(Buffer.concat(chunks, totalBytes));
|
|
61
71
|
}
|
|
62
72
|
|
|
73
|
+
async function guardPublicBinaryUrl(
|
|
74
|
+
rawUrl: string,
|
|
75
|
+
resolver: AddressResolver | undefined,
|
|
76
|
+
context: string,
|
|
77
|
+
): Promise<{ ok: true; url: string } | { ok: false; error: string }> {
|
|
78
|
+
const guard = await validatePublicHttpUrl(rawUrl, { resolver });
|
|
79
|
+
if (guard.ok) return { ok: true, url: guard.url.toString() };
|
|
80
|
+
return { ok: false, error: `${context}: target URL is not public HTTP(S): ${guard.reason}` };
|
|
81
|
+
}
|
|
82
|
+
|
|
63
83
|
/**
|
|
64
84
|
* Fetch binary content from a URL
|
|
65
85
|
*/
|
|
66
|
-
export async function fetchBinary(
|
|
86
|
+
export async function fetchBinary(
|
|
87
|
+
url: string,
|
|
88
|
+
timeout: number = 20,
|
|
89
|
+
signal?: AbortSignal,
|
|
90
|
+
options: FetchBinaryOptions = {},
|
|
91
|
+
): Promise<BinaryFetchResult> {
|
|
67
92
|
const requestSignal = ptree.combineSignals(signal, timeout * 1000);
|
|
93
|
+
const { publicUrlGuard = true, resolver, maxRedirects = 10 } = options;
|
|
68
94
|
try {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
redirect: "follow",
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
if (!response.ok) {
|
|
78
|
-
return { ok: false, error: `HTTP ${response.status}` };
|
|
95
|
+
let currentUrl = url;
|
|
96
|
+
if (publicUrlGuard) {
|
|
97
|
+
const guarded = await guardPublicBinaryUrl(url, resolver, "Blocked binary fetch");
|
|
98
|
+
if (!guarded.ok) return { ok: false, error: guarded.error };
|
|
99
|
+
currentUrl = guarded.url;
|
|
79
100
|
}
|
|
80
101
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
102
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
|
|
103
|
+
const response = await fetch(currentUrl, {
|
|
104
|
+
signal: requestSignal,
|
|
105
|
+
headers: {
|
|
106
|
+
"User-Agent": "Mozilla/5.0 (compatible; TextBot/1.0)",
|
|
107
|
+
},
|
|
108
|
+
redirect: "manual",
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
if (REDIRECT_STATUSES.has(response.status)) {
|
|
112
|
+
const location = response.headers.get("location");
|
|
113
|
+
if (!location) return { ok: false, error: "Redirect response missing Location header" };
|
|
114
|
+
const redirectUrl = new URL(location, currentUrl).toString();
|
|
115
|
+
if (publicUrlGuard) {
|
|
116
|
+
const guarded = await guardPublicBinaryUrl(redirectUrl, resolver, "Blocked binary redirect");
|
|
117
|
+
if (!guarded.ok) return { ok: false, error: guarded.error };
|
|
118
|
+
currentUrl = guarded.url;
|
|
119
|
+
} else {
|
|
120
|
+
currentUrl = redirectUrl;
|
|
121
|
+
}
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (!response.ok) {
|
|
126
|
+
return { ok: false, error: `HTTP ${response.status}` };
|
|
87
127
|
}
|
|
128
|
+
|
|
129
|
+
const contentDisposition = response.headers.get("content-disposition") || undefined;
|
|
130
|
+
const contentLength = response.headers.get("content-length");
|
|
131
|
+
if (contentLength) {
|
|
132
|
+
const size = Number.parseInt(contentLength, 10);
|
|
133
|
+
if (Number.isFinite(size) && size > MAX_BYTES) {
|
|
134
|
+
return { ok: false, error: `content-length ${size} exceeds ${MAX_BYTES}` };
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const buffer = await readResponseWithLimit(response, MAX_BYTES, requestSignal);
|
|
138
|
+
return { ok: true, buffer, contentDisposition };
|
|
88
139
|
}
|
|
89
|
-
|
|
90
|
-
return { ok:
|
|
140
|
+
|
|
141
|
+
return { ok: false, error: `Too many redirects (${maxRedirects})` };
|
|
91
142
|
} catch (err) {
|
|
92
143
|
if (signal?.aborted) throw new ToolAbortError();
|
|
93
144
|
if (requestSignal?.aborted) return { ok: false, error: "aborted" };
|