@jupyterlite/ai 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/agent.d.ts +24 -2
- package/lib/agent.js +161 -24
- package/lib/{chat-model-registry.d.ts → chat-model-handler.d.ts} +12 -11
- package/lib/{chat-model-registry.js → chat-model-handler.js} +6 -40
- package/lib/chat-model.d.ts +8 -0
- package/lib/chat-model.js +156 -8
- package/lib/completion/completion-provider.d.ts +1 -1
- package/lib/completion/completion-provider.js +14 -2
- package/lib/components/model-select.js +4 -4
- package/lib/components/tool-select.d.ts +11 -2
- package/lib/components/tool-select.js +77 -18
- package/lib/index.d.ts +3 -3
- package/lib/index.js +128 -66
- package/lib/models/settings-model.d.ts +2 -0
- package/lib/models/settings-model.js +2 -0
- package/lib/providers/built-in-providers.js +7 -0
- package/lib/providers/provider-tools.d.ts +36 -0
- package/lib/providers/provider-tools.js +93 -0
- package/lib/rendered-message-outputarea.d.ts +24 -0
- package/lib/rendered-message-outputarea.js +48 -0
- package/lib/tokens.d.ts +44 -7
- package/lib/tokens.js +1 -1
- package/lib/tools/commands.js +4 -2
- package/lib/tools/web.d.ts +8 -0
- package/lib/tools/web.js +196 -0
- package/lib/widgets/ai-settings.d.ts +1 -1
- package/lib/widgets/ai-settings.js +125 -38
- package/lib/widgets/main-area-chat.d.ts +6 -0
- package/lib/widgets/main-area-chat.js +28 -0
- package/lib/widgets/provider-config-dialog.js +207 -4
- package/package.json +10 -4
- package/schema/settings-model.json +89 -1
- package/src/agent.ts +220 -42
- package/src/{chat-model-registry.ts → chat-model-handler.ts} +16 -51
- package/src/chat-model.ts +223 -14
- package/src/completion/completion-provider.ts +26 -12
- package/src/components/model-select.tsx +4 -5
- package/src/components/tool-select.tsx +110 -7
- package/src/index.ts +153 -82
- package/src/models/settings-model.ts +6 -0
- package/src/providers/built-in-providers.ts +7 -0
- package/src/providers/provider-tools.ts +179 -0
- package/src/rendered-message-outputarea.ts +62 -0
- package/src/tokens.ts +53 -9
- package/src/tools/commands.ts +4 -2
- package/src/tools/web.ts +238 -0
- package/src/widgets/ai-settings.tsx +282 -77
- package/src/widgets/main-area-chat.ts +34 -1
- package/src/widgets/provider-config-dialog.tsx +496 -3
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
2
|
+
import { openai } from '@ai-sdk/openai';
|
|
3
|
+
import type { Tool } from 'ai';
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
IProviderInfo,
|
|
7
|
+
IProviderWebFetchImplementation,
|
|
8
|
+
IProviderWebSearchImplementation
|
|
9
|
+
} from '../tokens';
|
|
10
|
+
|
|
11
|
+
type ToolMap = Record<string, Tool>;
|
|
12
|
+
|
|
13
|
+
interface IWebSearchSettings {
|
|
14
|
+
enabled?: boolean;
|
|
15
|
+
externalWebAccess?: boolean;
|
|
16
|
+
searchContextSize?: 'low' | 'medium' | 'high';
|
|
17
|
+
allowedDomains?: string[];
|
|
18
|
+
blockedDomains?: string[];
|
|
19
|
+
maxUses?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface IWebFetchSettings {
|
|
23
|
+
enabled?: boolean;
|
|
24
|
+
maxUses?: number;
|
|
25
|
+
maxContentTokens?: number;
|
|
26
|
+
allowedDomains?: string[];
|
|
27
|
+
blockedDomains?: string[];
|
|
28
|
+
citationsEnabled?: boolean;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Provider-level custom settings that control built-in web tools.
|
|
33
|
+
*/
|
|
34
|
+
export interface IProviderCustomSettings {
|
|
35
|
+
webSearch?: IWebSearchSettings;
|
|
36
|
+
webFetch?: IWebFetchSettings;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface IProviderToolContext {
|
|
40
|
+
providerInfo?: IProviderInfo | null;
|
|
41
|
+
customSettings?: IProviderCustomSettings;
|
|
42
|
+
hasFunctionTools: boolean;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const DEFAULT_ANTHROPIC_WEB_FETCH_MAX_USES = 2;
|
|
46
|
+
const DEFAULT_ANTHROPIC_WEB_FETCH_MAX_CONTENT_TOKENS = 12000;
|
|
47
|
+
|
|
48
|
+
function normalizeDomain(value: string): string {
|
|
49
|
+
const normalized = (value || '').trim().toLowerCase();
|
|
50
|
+
const withoutProtocol = normalized.replace(/^https?:\/\//, '');
|
|
51
|
+
const hostname = withoutProtocol.split('/')[0].trim();
|
|
52
|
+
// Treat "*.example.com" as "example.com" for provider domain filters.
|
|
53
|
+
return hostname.startsWith('*.') ? hostname.slice(2) : hostname;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function collectDomains(value?: string[]): string[] {
|
|
57
|
+
value = value || [];
|
|
58
|
+
const values = Array.from(
|
|
59
|
+
new Set(value.map(normalizeDomain).filter(domain => domain.length > 0))
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
return values;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function createOpenAIWebSearchTool(
|
|
66
|
+
webSearchSettings: IWebSearchSettings
|
|
67
|
+
): Tool {
|
|
68
|
+
const allowedDomains = collectDomains(webSearchSettings.allowedDomains);
|
|
69
|
+
return openai.tools.webSearch({
|
|
70
|
+
externalWebAccess: webSearchSettings.externalWebAccess,
|
|
71
|
+
searchContextSize: webSearchSettings.searchContextSize,
|
|
72
|
+
filters: allowedDomains.length > 0 ? { allowedDomains } : undefined
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function createAnthropicWebSearchTool(
|
|
77
|
+
webSearchSettings: IWebSearchSettings
|
|
78
|
+
): Tool {
|
|
79
|
+
const allowedDomains = collectDomains(webSearchSettings.allowedDomains);
|
|
80
|
+
const blockedDomains = collectDomains(webSearchSettings.blockedDomains);
|
|
81
|
+
return anthropic.tools.webSearch_20250305({
|
|
82
|
+
maxUses: webSearchSettings.maxUses,
|
|
83
|
+
allowedDomains: allowedDomains.length > 0 ? allowedDomains : undefined,
|
|
84
|
+
blockedDomains: blockedDomains.length > 0 ? blockedDomains : undefined
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function createAnthropicWebFetchTool(
|
|
89
|
+
webFetchSettings: IWebFetchSettings
|
|
90
|
+
): Tool {
|
|
91
|
+
const maxUses =
|
|
92
|
+
webFetchSettings.maxUses ?? DEFAULT_ANTHROPIC_WEB_FETCH_MAX_USES;
|
|
93
|
+
const maxContentTokens =
|
|
94
|
+
webFetchSettings.maxContentTokens ??
|
|
95
|
+
DEFAULT_ANTHROPIC_WEB_FETCH_MAX_CONTENT_TOKENS;
|
|
96
|
+
const allowedDomains = collectDomains(webFetchSettings.allowedDomains);
|
|
97
|
+
const blockedDomains = collectDomains(webFetchSettings.blockedDomains);
|
|
98
|
+
const citationsEnabled = webFetchSettings.citationsEnabled;
|
|
99
|
+
return anthropic.tools.webFetch_20250910({
|
|
100
|
+
maxUses,
|
|
101
|
+
maxContentTokens,
|
|
102
|
+
allowedDomains: allowedDomains.length > 0 ? allowedDomains : undefined,
|
|
103
|
+
blockedDomains: blockedDomains.length > 0 ? blockedDomains : undefined,
|
|
104
|
+
citations:
|
|
105
|
+
citationsEnabled !== undefined ? { enabled: citationsEnabled } : undefined
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function createWebSearchTool(
|
|
110
|
+
implementation: IProviderWebSearchImplementation,
|
|
111
|
+
webSearchSettings: IWebSearchSettings
|
|
112
|
+
): Tool {
|
|
113
|
+
switch (implementation) {
|
|
114
|
+
case 'openai':
|
|
115
|
+
return createOpenAIWebSearchTool(webSearchSettings);
|
|
116
|
+
case 'anthropic':
|
|
117
|
+
return createAnthropicWebSearchTool(webSearchSettings);
|
|
118
|
+
default:
|
|
119
|
+
throw new Error(
|
|
120
|
+
`Unsupported web search implementation: ${implementation}`
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function createWebFetchTool(
|
|
126
|
+
implementation: IProviderWebFetchImplementation,
|
|
127
|
+
webFetchSettings: IWebFetchSettings
|
|
128
|
+
): Tool {
|
|
129
|
+
switch (implementation) {
|
|
130
|
+
case 'anthropic':
|
|
131
|
+
return createAnthropicWebFetchTool(webFetchSettings);
|
|
132
|
+
default:
|
|
133
|
+
throw new Error(
|
|
134
|
+
`Unsupported web fetch implementation: ${implementation}`
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Create provider-defined tools from custom settings and provider capabilities.
|
|
141
|
+
*/
|
|
142
|
+
export function createProviderTools(options: IProviderToolContext): ToolMap {
|
|
143
|
+
const tools: ToolMap = {};
|
|
144
|
+
if (
|
|
145
|
+
!options.customSettings ||
|
|
146
|
+
!options.providerInfo?.providerToolCapabilities
|
|
147
|
+
) {
|
|
148
|
+
return tools;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const capabilities = options.providerInfo.providerToolCapabilities;
|
|
152
|
+
const webSearchSettings = options.customSettings.webSearch;
|
|
153
|
+
const webFetchSettings = options.customSettings.webFetch;
|
|
154
|
+
|
|
155
|
+
const webSearchEnabled = webSearchSettings?.enabled === true;
|
|
156
|
+
const webFetchEnabled = webFetchSettings?.enabled === true;
|
|
157
|
+
|
|
158
|
+
const webSearchCapability = capabilities.webSearch;
|
|
159
|
+
if (webSearchEnabled && webSearchSettings && webSearchCapability) {
|
|
160
|
+
const requiresNoFunctionTools =
|
|
161
|
+
webSearchCapability.requiresNoFunctionTools === true;
|
|
162
|
+
if (!requiresNoFunctionTools || !options.hasFunctionTools) {
|
|
163
|
+
tools.web_search = createWebSearchTool(
|
|
164
|
+
webSearchCapability.implementation,
|
|
165
|
+
webSearchSettings
|
|
166
|
+
);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const webFetchCapability = capabilities.webFetch;
|
|
171
|
+
if (webFetchEnabled && webFetchSettings && webFetchCapability) {
|
|
172
|
+
tools.web_fetch = createWebFetchTool(
|
|
173
|
+
webFetchCapability.implementation,
|
|
174
|
+
webFetchSettings
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return tools;
|
|
179
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { ChatWidget } from '@jupyter/chat';
|
|
2
|
+
import { IDisposable } from '@lumino/disposable';
|
|
3
|
+
|
|
4
|
+
const OUTPUT_AREA_CLASS = 'jp-OutputArea';
|
|
5
|
+
const CHAT_RENDERED_MESSAGE_SELECTOR = `.jp-chat-rendered-message:not(.${OUTPUT_AREA_CLASS})`;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Ensures chat-rendered MIME outputs also expose the OutputArea class so
|
|
9
|
+
* renderer extensions can reuse their notebook/output-area CSS rules.
|
|
10
|
+
*
|
|
11
|
+
* TODO: Remove this compatibility layer once jupyter-chat applies
|
|
12
|
+
* `jp-OutputArea` (or equivalent output-area context) to rendered MIME
|
|
13
|
+
* messages by default.
|
|
14
|
+
*/
|
|
15
|
+
export class RenderedMessageOutputAreaCompat implements IDisposable {
|
|
16
|
+
constructor(options: RenderedMessageOutputAreaCompat.IOptions) {
|
|
17
|
+
this._chatPanel = options.chatPanel;
|
|
18
|
+
this._chatPanel.model.messagesUpdated.connect(this._scheduleSync, this);
|
|
19
|
+
this._scheduleSync();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
get isDisposed(): boolean {
|
|
23
|
+
return this._isDisposed;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
dispose(): void {
|
|
27
|
+
if (this._isDisposed) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
this._isDisposed = true;
|
|
31
|
+
this._chatPanel.model.messagesUpdated.disconnect(this._scheduleSync, this);
|
|
32
|
+
if (this._raf !== 0) {
|
|
33
|
+
cancelAnimationFrame(this._raf);
|
|
34
|
+
this._raf = 0;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private _scheduleSync(): void {
|
|
39
|
+
if (this._isDisposed || this._raf !== 0) {
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
this._raf = requestAnimationFrame(() => {
|
|
43
|
+
this._raf = 0;
|
|
44
|
+
if (this._isDisposed) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
this._chatPanel.node
|
|
48
|
+
.querySelectorAll<HTMLElement>(CHAT_RENDERED_MESSAGE_SELECTOR)
|
|
49
|
+
.forEach(element => element.classList.add(OUTPUT_AREA_CLASS));
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
private readonly _chatPanel: ChatWidget;
|
|
54
|
+
private _isDisposed = false;
|
|
55
|
+
private _raf = 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export namespace RenderedMessageOutputAreaCompat {
|
|
59
|
+
export interface IOptions {
|
|
60
|
+
chatPanel: ChatWidget;
|
|
61
|
+
}
|
|
62
|
+
}
|
package/src/tokens.ts
CHANGED
|
@@ -168,6 +168,49 @@ export interface IProviderFactory {
|
|
|
168
168
|
(options: IModelOptions): LanguageModel;
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Built-in web search integration families supported by provider tools.
|
|
173
|
+
*/
|
|
174
|
+
export type IProviderWebSearchImplementation = 'openai' | 'anthropic';
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Built-in web fetch integration families supported by provider tools.
|
|
178
|
+
*/
|
|
179
|
+
export type IProviderWebFetchImplementation = 'anthropic';
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Capability descriptor for provider-hosted web search.
|
|
183
|
+
*/
|
|
184
|
+
export interface IProviderWebSearchCapability {
|
|
185
|
+
/**
|
|
186
|
+
* Which built-in integration family to use.
|
|
187
|
+
*/
|
|
188
|
+
implementation: IProviderWebSearchImplementation;
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* If true, skip provider-hosted web search when function tools are enabled.
|
|
192
|
+
*/
|
|
193
|
+
requiresNoFunctionTools?: boolean;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Capability descriptor for provider-hosted web fetch.
|
|
198
|
+
*/
|
|
199
|
+
export interface IProviderWebFetchCapability {
|
|
200
|
+
/**
|
|
201
|
+
* Which built-in integration family to use.
|
|
202
|
+
*/
|
|
203
|
+
implementation: IProviderWebFetchImplementation;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Provider-hosted tool capabilities exposed by a provider.
|
|
208
|
+
*/
|
|
209
|
+
export interface IProviderToolCapabilities {
|
|
210
|
+
webSearch?: IProviderWebSearchCapability;
|
|
211
|
+
webFetch?: IProviderWebFetchCapability;
|
|
212
|
+
}
|
|
213
|
+
|
|
171
214
|
/**
|
|
172
215
|
* Provider information
|
|
173
216
|
*/
|
|
@@ -220,6 +263,11 @@ export interface IProviderInfo {
|
|
|
220
263
|
*/
|
|
221
264
|
baseUrls?: { url: string; description?: string }[];
|
|
222
265
|
|
|
266
|
+
/**
|
|
267
|
+
* Optional provider-hosted tool capabilities for web retrieval.
|
|
268
|
+
*/
|
|
269
|
+
providerToolCapabilities?: IProviderToolCapabilities;
|
|
270
|
+
|
|
223
271
|
/**
|
|
224
272
|
* Factory function for creating language models
|
|
225
273
|
*/
|
|
@@ -296,21 +344,17 @@ export const IAgentManagerFactory = new Token<AgentManagerFactory>(
|
|
|
296
344
|
'@jupyterlite/ai:agent-manager-factory'
|
|
297
345
|
);
|
|
298
346
|
|
|
299
|
-
export interface
|
|
300
|
-
add(model: AIChatModel): void;
|
|
301
|
-
get(name: string): AIChatModel | undefined;
|
|
302
|
-
getAll(): AIChatModel[];
|
|
303
|
-
remove(name: string): void;
|
|
347
|
+
export interface IChatModelHandler {
|
|
304
348
|
createModel(
|
|
305
|
-
name
|
|
306
|
-
activeProvider
|
|
349
|
+
name: string,
|
|
350
|
+
activeProvider: string,
|
|
307
351
|
tokenUsage?: ITokenUsage
|
|
308
352
|
): AIChatModel;
|
|
309
353
|
activeCellManager: ActiveCellManager | undefined;
|
|
310
354
|
}
|
|
311
355
|
|
|
312
|
-
export const
|
|
313
|
-
'@jupyterlite/ai:chat-model-
|
|
356
|
+
export const IChatModelHandler = new Token<IChatModelHandler>(
|
|
357
|
+
'@jupyterlite/ai:chat-model-handler'
|
|
314
358
|
);
|
|
315
359
|
|
|
316
360
|
/**
|
package/src/tools/commands.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { CommandRegistry } from '@lumino/commands';
|
|
2
|
+
import { Widget } from '@lumino/widgets';
|
|
2
3
|
import { tool } from 'ai';
|
|
3
4
|
import { z } from 'zod';
|
|
4
5
|
import { ITool } from '../tokens';
|
|
@@ -115,9 +116,10 @@ export function createExecuteCommandTool(
|
|
|
115
116
|
// Execute the command
|
|
116
117
|
const result = await commands.execute(commandId, args);
|
|
117
118
|
|
|
118
|
-
// Handle
|
|
119
|
+
// Handle actual Lumino widgets specially by extracting id and title.
|
|
120
|
+
// Avoid collapsing plain command results that happen to contain an `id` field.
|
|
119
121
|
let serializedResult;
|
|
120
|
-
if (result
|
|
122
|
+
if (result instanceof Widget) {
|
|
121
123
|
serializedResult = {
|
|
122
124
|
id: result.id,
|
|
123
125
|
title: result.title?.label || result.title
|
package/src/tools/web.ts
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { tool } from 'ai';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { ITool } from '../tokens';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_MAX_CONTENT_CHARS = 20000;
|
|
6
|
+
const MAX_ALLOWED_CONTENT_CHARS = 100000;
|
|
7
|
+
const DEFAULT_TIMEOUT_MS = 20000;
|
|
8
|
+
const MAX_TIMEOUT_MS = 120000;
|
|
9
|
+
|
|
10
|
+
interface IReadBodyResult {
|
|
11
|
+
content: string;
|
|
12
|
+
isTruncated: boolean;
|
|
13
|
+
totalChars: number;
|
|
14
|
+
totalCharsExact: boolean;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Read response body text with a character cap.
|
|
19
|
+
*
|
|
20
|
+
* Stops early once the cap is reached to avoid buffering arbitrarily large
|
|
21
|
+
* payloads in memory.
|
|
22
|
+
*/
|
|
23
|
+
async function readResponseText(
|
|
24
|
+
response: Response,
|
|
25
|
+
maxContentChars: number
|
|
26
|
+
): Promise<IReadBodyResult> {
|
|
27
|
+
if (!response.body) {
|
|
28
|
+
const body = await response.text();
|
|
29
|
+
return {
|
|
30
|
+
content: body.slice(0, maxContentChars),
|
|
31
|
+
isTruncated: body.length > maxContentChars,
|
|
32
|
+
totalChars: body.length,
|
|
33
|
+
totalCharsExact: true
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const reader = response.body.getReader();
|
|
38
|
+
const decoder = new TextDecoder();
|
|
39
|
+
let content = '';
|
|
40
|
+
let totalChars = 0;
|
|
41
|
+
let isTruncated = false;
|
|
42
|
+
let done = false;
|
|
43
|
+
|
|
44
|
+
while (!done) {
|
|
45
|
+
const readResult = await reader.read();
|
|
46
|
+
done = readResult.done;
|
|
47
|
+
if (done) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const chunk = decoder.decode(readResult.value, { stream: true });
|
|
52
|
+
if (!chunk) {
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
totalChars += chunk.length;
|
|
57
|
+
|
|
58
|
+
if (!isTruncated) {
|
|
59
|
+
const remaining = maxContentChars - content.length;
|
|
60
|
+
if (chunk.length <= remaining) {
|
|
61
|
+
content += chunk;
|
|
62
|
+
} else {
|
|
63
|
+
content += chunk.slice(0, remaining);
|
|
64
|
+
isTruncated = true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (isTruncated) {
|
|
69
|
+
await reader.cancel();
|
|
70
|
+
return {
|
|
71
|
+
content,
|
|
72
|
+
isTruncated: true,
|
|
73
|
+
totalChars,
|
|
74
|
+
totalCharsExact: false
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const tail = decoder.decode();
|
|
80
|
+
if (tail) {
|
|
81
|
+
totalChars += tail.length;
|
|
82
|
+
const remaining = maxContentChars - content.length;
|
|
83
|
+
if (tail.length <= remaining) {
|
|
84
|
+
content += tail;
|
|
85
|
+
} else {
|
|
86
|
+
content += tail.slice(0, remaining);
|
|
87
|
+
isTruncated = true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return {
|
|
92
|
+
content,
|
|
93
|
+
isTruncated,
|
|
94
|
+
totalChars,
|
|
95
|
+
totalCharsExact: true
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Create a browser-native URL fetch tool.
|
|
101
|
+
*
|
|
102
|
+
* This is best-effort and subject to normal browser constraints (CORS, CSP,
|
|
103
|
+
* mixed content, bot protections).
|
|
104
|
+
*/
|
|
105
|
+
export function createBrowserFetchTool(): ITool {
|
|
106
|
+
return tool({
|
|
107
|
+
title: 'Browser Fetch',
|
|
108
|
+
description:
|
|
109
|
+
'Fetch a URL directly from the browser using HTTP GET for exact URL inspection when CORS/access permits.',
|
|
110
|
+
inputSchema: z.object({
|
|
111
|
+
url: z.string().describe('HTTP(S) URL to fetch'),
|
|
112
|
+
maxContentChars: z
|
|
113
|
+
.number()
|
|
114
|
+
.int()
|
|
115
|
+
.min(1)
|
|
116
|
+
.max(MAX_ALLOWED_CONTENT_CHARS)
|
|
117
|
+
.optional()
|
|
118
|
+
.describe(
|
|
119
|
+
`Maximum number of response characters to return (default: ${DEFAULT_MAX_CONTENT_CHARS})`
|
|
120
|
+
),
|
|
121
|
+
timeoutMs: z
|
|
122
|
+
.number()
|
|
123
|
+
.int()
|
|
124
|
+
.min(1000)
|
|
125
|
+
.max(MAX_TIMEOUT_MS)
|
|
126
|
+
.optional()
|
|
127
|
+
.describe(
|
|
128
|
+
`Timeout in milliseconds (default: ${DEFAULT_TIMEOUT_MS}, max: ${MAX_TIMEOUT_MS})`
|
|
129
|
+
)
|
|
130
|
+
}),
|
|
131
|
+
execute: async (input: {
|
|
132
|
+
url: string;
|
|
133
|
+
maxContentChars?: number;
|
|
134
|
+
timeoutMs?: number;
|
|
135
|
+
}) => {
|
|
136
|
+
const maxContentChars = Math.min(
|
|
137
|
+
input.maxContentChars ?? DEFAULT_MAX_CONTENT_CHARS,
|
|
138
|
+
MAX_ALLOWED_CONTENT_CHARS
|
|
139
|
+
);
|
|
140
|
+
const timeoutMs = Math.min(
|
|
141
|
+
input.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
142
|
+
MAX_TIMEOUT_MS
|
|
143
|
+
);
|
|
144
|
+
|
|
145
|
+
let parsedUrl: URL;
|
|
146
|
+
try {
|
|
147
|
+
parsedUrl = new URL(input.url);
|
|
148
|
+
} catch {
|
|
149
|
+
return {
|
|
150
|
+
success: false,
|
|
151
|
+
errorType: 'invalid_url',
|
|
152
|
+
error: 'Invalid URL format',
|
|
153
|
+
url: input.url
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
|
158
|
+
return {
|
|
159
|
+
success: false,
|
|
160
|
+
errorType: 'unsupported_protocol',
|
|
161
|
+
error: 'Only http:// and https:// URLs are supported',
|
|
162
|
+
url: input.url
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const controller = new AbortController();
|
|
167
|
+
const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
|
|
168
|
+
|
|
169
|
+
try {
|
|
170
|
+
const response = await fetch(parsedUrl.toString(), {
|
|
171
|
+
method: 'GET',
|
|
172
|
+
credentials: 'omit',
|
|
173
|
+
redirect: 'follow',
|
|
174
|
+
signal: controller.signal,
|
|
175
|
+
headers: {
|
|
176
|
+
Accept:
|
|
177
|
+
'text/html,text/plain,application/json,text/markdown,*/*;q=0.8'
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
const contentType = response.headers.get('content-type') || '';
|
|
182
|
+
const contentLength = response.headers.get('content-length');
|
|
183
|
+
const body = await readResponseText(response, maxContentChars);
|
|
184
|
+
const success = response.ok;
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
success,
|
|
188
|
+
url: response.url,
|
|
189
|
+
requestedUrl: parsedUrl.toString(),
|
|
190
|
+
status: response.status,
|
|
191
|
+
statusText: response.statusText,
|
|
192
|
+
contentType,
|
|
193
|
+
contentLength,
|
|
194
|
+
...(success
|
|
195
|
+
? {}
|
|
196
|
+
: {
|
|
197
|
+
errorType: 'http_error',
|
|
198
|
+
error: `HTTP ${response.status} ${response.statusText}`
|
|
199
|
+
}),
|
|
200
|
+
isTruncated: body.isTruncated,
|
|
201
|
+
returnedChars: body.content.length,
|
|
202
|
+
totalChars: body.totalChars,
|
|
203
|
+
totalCharsExact: body.totalCharsExact,
|
|
204
|
+
content: body.content,
|
|
205
|
+
limitations:
|
|
206
|
+
'Browser fetch is subject to CORS, site bot protections, and browser network policy.'
|
|
207
|
+
};
|
|
208
|
+
} catch (error) {
|
|
209
|
+
if ((error as Error).name === 'AbortError') {
|
|
210
|
+
return {
|
|
211
|
+
success: false,
|
|
212
|
+
errorType: 'timeout',
|
|
213
|
+
error: `Request timed out after ${timeoutMs} ms`,
|
|
214
|
+
url: parsedUrl.toString()
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
success: false,
|
|
220
|
+
errorType: 'network_or_cors',
|
|
221
|
+
error:
|
|
222
|
+
error instanceof Error && error.message
|
|
223
|
+
? error.message
|
|
224
|
+
: 'Fetch failed',
|
|
225
|
+
url: parsedUrl.toString(),
|
|
226
|
+
likelyCauses: [
|
|
227
|
+
'CORS blocked by the target website',
|
|
228
|
+
'DNS/network resolution failure',
|
|
229
|
+
'TLS/certificate issue',
|
|
230
|
+
'Target server rejected browser access'
|
|
231
|
+
]
|
|
232
|
+
};
|
|
233
|
+
} finally {
|
|
234
|
+
clearTimeout(timeoutHandle);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
}
|