@browserless.io/mcp 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/build/src/lib/http-auth.d.ts +2 -0
- package/build/src/lib/http-auth.js +23 -9
- package/build/src/lib/redis-oauth-proxy.d.ts +1 -2
- package/build/src/lib/redis-oauth-proxy.js +45 -67
- package/build/src/resources/download-route.js +4 -12
- package/build/src/resources/upload-route.js +6 -14
- package/build/src/tools/map.js +1 -1
- package/build/src/tools/smartscraper.js +4 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,9 +24,9 @@ No local install — see [Configuration](#configuration) for per-client snippets
|
|
|
24
24
|
|
|
25
25
|
| Tool | Description |
|
|
26
26
|
| -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
27
|
-
| `browserless_smartscraper` | Scrape
|
|
27
|
+
| `browserless_smartscraper` | Scrape a single webpage and return its content as markdown or HTML. Handles JavaScript-heavy pages and anti-bot measures automatically. For content across multiple pages, use `browserless_crawl`; to list a site's URLs, use `browserless_map`. |
|
|
28
28
|
| `browserless_search` | Search the web using Browserless and optionally scrape each result. Supports web, news, and image search with geo-targeting and time filters. |
|
|
29
|
-
| `browserless_map` | Discover and map all URLs on a website.
|
|
29
|
+
| `browserless_map` | Discover and map all URLs on a website. Scans via sitemaps and link extraction. Returns URLs with optional titles and descriptions. Useful for site audits and content discovery. |
|
|
30
30
|
| `browserless_crawl` | Crawl a website and scrape every discovered page. Supports depth control, path filtering, sitemap strategies, and configurable scrape options. Returns scraped content and metadata for each page. |
|
|
31
31
|
| `browserless_performance` | Run Lighthouse audits on any URL. Returns scores and metrics for accessibility, best practices, performance, PWA, and SEO. Optionally filter by category or supply performance budgets. |
|
|
32
32
|
| `browserless_function` | Execute custom Puppeteer JavaScript on the Browserless cloud. The function receives a `page` object and optional `context`; return `{ data, type }` to control the payload and Content-Type. |
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { Context } from 'hono';
|
|
1
2
|
import type { McpConfig } from '../@types/types.js';
|
|
2
3
|
export interface ResolvedBrowserlessAuth {
|
|
3
4
|
token: string;
|
|
@@ -20,3 +21,4 @@ export interface AuthInput {
|
|
|
20
21
|
* callback and the custom `/upload` route so both gate on the same rules.
|
|
21
22
|
*/
|
|
22
23
|
export declare const resolveBrowserlessAuth: (input: AuthInput, config: Pick<McpConfig, "browserlessApiUrl" | "supabaseUrl" | "supabaseServiceRoleKey">) => Promise<ResolvedBrowserlessAuth>;
|
|
24
|
+
export declare const guardRouteAuth: (c: Context, config: Parameters<typeof resolveBrowserlessAuth>[1]) => Promise<Response | null>;
|
|
@@ -7,22 +7,22 @@ import { resolveApiKey } from './account-resolver.js';
|
|
|
7
7
|
* callback and the custom `/upload` route so both gate on the same rules.
|
|
8
8
|
*/
|
|
9
9
|
export const resolveBrowserlessAuth = async (input, config) => {
|
|
10
|
-
const headerToken = input.authHeader?.startsWith('Bearer ')
|
|
11
|
-
? input.authHeader.slice(7)
|
|
12
|
-
: input.authHeader;
|
|
13
10
|
const apiUrl = input.apiUrlHeader ?? input.browserlessUrlQuery ?? config.browserlessApiUrl;
|
|
14
11
|
// A pre-created session id to attach to, threaded by the autologin runner.
|
|
15
12
|
// The agent tool opens /chromium/agent?sessionId=<this> instead of doing its
|
|
16
13
|
// own POST /profile.
|
|
17
|
-
const attachSessionId = input.sessionIdHeader ?? input.sessionIdQuery
|
|
14
|
+
const attachSessionId = input.sessionIdHeader ?? input.sessionIdQuery;
|
|
15
|
+
const headerToken = input.authHeader?.startsWith('Bearer ')
|
|
16
|
+
? input.authHeader.slice(7)
|
|
17
|
+
: input.authHeader;
|
|
18
18
|
// JWTs have 3 dot-separated base64url segments; plain API keys do not.
|
|
19
19
|
const isJwt = headerToken ? headerToken.split('.').length === 3 : false;
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
return { token: input.tokenQuery, apiUrl, attachSessionId };
|
|
20
|
+
// A plain key (header or ?token=) is used directly and wins over JWT exchange.
|
|
21
|
+
const plainKey = (isJwt ? undefined : headerToken) ?? input.tokenQuery;
|
|
22
|
+
if (plainKey) {
|
|
23
|
+
return { token: plainKey, apiUrl, attachSessionId };
|
|
25
24
|
}
|
|
25
|
+
// A JWT is exchanged for the account's Browserless API key via PostgREST.
|
|
26
26
|
if (isJwt && headerToken) {
|
|
27
27
|
const { apiKey } = await resolveApiKey(config.supabaseUrl, config.supabaseServiceRoleKey, headerToken);
|
|
28
28
|
return { token: apiKey, apiUrl, attachSessionId };
|
|
@@ -31,3 +31,17 @@ export const resolveBrowserlessAuth = async (input, config) => {
|
|
|
31
31
|
'Pass it as Authorization: Bearer <token> header, ' +
|
|
32
32
|
'?token= query parameter, or authenticate via OAuth.');
|
|
33
33
|
};
|
|
34
|
+
export const guardRouteAuth = async (c, config) => {
|
|
35
|
+
try {
|
|
36
|
+
await resolveBrowserlessAuth({
|
|
37
|
+
authHeader: c.req.header('authorization'),
|
|
38
|
+
tokenQuery: c.req.query('token'),
|
|
39
|
+
apiUrlHeader: c.req.header('x-browserless-api-url'),
|
|
40
|
+
browserlessUrlQuery: c.req.query('browserlessUrl'),
|
|
41
|
+
}, config);
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return c.json({ ok: false, error: 'Unauthorized' }, 401);
|
|
46
|
+
}
|
|
47
|
+
};
|
|
@@ -5,9 +5,8 @@ export declare class RedisOAuthProxy extends OAuthProxy {
|
|
|
5
5
|
constructor(config: OAuthProxyConfig, redis: Redis);
|
|
6
6
|
private get _internal();
|
|
7
7
|
registerClient(request: DCRRequest): Promise<DCRResponse>;
|
|
8
|
-
private
|
|
8
|
+
private getClientRedirectUris;
|
|
9
9
|
authorize(params: AuthorizationParams): Promise<Response>;
|
|
10
10
|
handleCallback(request: Request): Promise<Response>;
|
|
11
11
|
exchangeAuthorizationCode(request: TokenRequest): Promise<TokenResponse>;
|
|
12
|
-
destroy(): void;
|
|
13
12
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { OAuthProxy, OAuthProxyError, } from 'fastmcp/auth';
|
|
1
|
+
import { OAuthProxy, OAuthProxyError, PKCEUtils, } from 'fastmcp/auth';
|
|
2
2
|
/**
|
|
3
3
|
* Redis-backed OAuthProxy using Redis as the single source of truth for OAuth
|
|
4
4
|
* flow state (transactions, authorization codes, DCRs), so the steps of one
|
|
@@ -17,10 +17,11 @@ import { OAuthProxy, OAuthProxyError, } from 'fastmcp/auth';
|
|
|
17
17
|
const KEY_PREFIX = 'mcp:oauth:';
|
|
18
18
|
const TX_PREFIX = `${KEY_PREFIX}tx:`;
|
|
19
19
|
const CODE_PREFIX = `${KEY_PREFIX}code:`;
|
|
20
|
-
const
|
|
20
|
+
const CLIENT_ID_PREFIX = `${KEY_PREFIX}client-id:`;
|
|
21
21
|
const DEFAULT_TRANSACTION_TTL = 600;
|
|
22
22
|
const DEFAULT_CODE_TTL = 300;
|
|
23
|
-
|
|
23
|
+
// DCR clients are reused for weeks; a short TTL would expire one mid-life.
|
|
24
|
+
const DEFAULT_CLIENT_TTL = 90 * 24 * 60 * 60;
|
|
24
25
|
const DATE_FIELDS = new Set(['createdAt', 'expiresAt', 'issuedAt']);
|
|
25
26
|
function serialize(obj) {
|
|
26
27
|
return JSON.stringify(obj, (_key, value) => {
|
|
@@ -51,44 +52,25 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
51
52
|
if (this.config.consentRequired) {
|
|
52
53
|
throw new Error('RedisOAuthProxy requires consentRequired: false — consent flow is not supported in multi-instance mode');
|
|
53
54
|
}
|
|
55
|
+
// We return upstream tokens directly (no token swap); fail fast otherwise.
|
|
56
|
+
if (this.config.enableTokenSwap) {
|
|
57
|
+
throw new Error('RedisOAuthProxy requires enableTokenSwap: false — token-swap mode is not supported');
|
|
58
|
+
}
|
|
54
59
|
}
|
|
55
60
|
get _internal() {
|
|
56
61
|
return this;
|
|
57
62
|
}
|
|
58
63
|
async registerClient(request) {
|
|
59
|
-
//
|
|
60
|
-
//
|
|
61
|
-
// (The parent's in-memory Map is also populated but we never read it.)
|
|
64
|
+
// Store the client's redirect_uris under the issued client_id so any
|
|
65
|
+
// instance can validate per-client (the parent's Map is process-local).
|
|
62
66
|
const response = await super.registerClient(request);
|
|
63
67
|
const ttl = this._internal.config.clientRegistrationTtl ?? DEFAULT_CLIENT_TTL;
|
|
64
|
-
|
|
65
|
-
// registration of the same URI (two DCR calls sharing a redirect_uri).
|
|
66
|
-
// allSettled → a probe failure is fail-fast with no writes attempted.
|
|
67
|
-
const probes = await Promise.allSettled(response.redirect_uris.map(async (uri) => ({
|
|
68
|
-
uri,
|
|
69
|
-
existed: (await this.redis.exists(`${CLIENT_PREFIX}${uri}`)) > 0,
|
|
70
|
-
})));
|
|
71
|
-
const probeFailed = probes.find((p) => p.status === 'rejected');
|
|
72
|
-
if (probeFailed) {
|
|
73
|
-
throw probeFailed.reason;
|
|
74
|
-
}
|
|
75
|
-
const redisPreExisting = new Set(probes
|
|
76
|
-
.filter((p) => p.status === 'fulfilled' && p.value.existed)
|
|
77
|
-
.map((p) => p.value.uri));
|
|
78
|
-
const writes = await Promise.allSettled(response.redirect_uris.map((uri) => this.redis.set(`${CLIENT_PREFIX}${uri}`, '1', 'EX', ttl)));
|
|
79
|
-
const writeFailed = writes.find((w) => w.status === 'rejected');
|
|
80
|
-
if (writeFailed) {
|
|
81
|
-
// Best-effort cleanup of Redis keys this call introduced; if these
|
|
82
|
-
// deletes also fail the originating error still wins.
|
|
83
|
-
await Promise.allSettled(response.redirect_uris
|
|
84
|
-
.filter((uri) => !redisPreExisting.has(uri))
|
|
85
|
-
.map((uri) => this.redis.del(`${CLIENT_PREFIX}${uri}`)));
|
|
86
|
-
throw writeFailed.reason;
|
|
87
|
-
}
|
|
68
|
+
await this.redis.set(`${CLIENT_ID_PREFIX}${response.client_id}`, JSON.stringify(response.redirect_uris), 'EX', ttl);
|
|
88
69
|
return response;
|
|
89
70
|
}
|
|
90
|
-
async
|
|
91
|
-
|
|
71
|
+
async getClientRedirectUris(clientId) {
|
|
72
|
+
const json = await this.redis.get(`${CLIENT_ID_PREFIX}${clientId}`);
|
|
73
|
+
return json ? JSON.parse(json) : null;
|
|
92
74
|
}
|
|
93
75
|
async authorize(params) {
|
|
94
76
|
if (!params.client_id || !params.redirect_uri || !params.response_type) {
|
|
@@ -97,15 +79,13 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
97
79
|
if (params.response_type !== 'code') {
|
|
98
80
|
throw new OAuthProxyError('unsupported_response_type', "Only 'code' response type is supported");
|
|
99
81
|
}
|
|
100
|
-
//
|
|
101
|
-
//
|
|
102
|
-
|
|
82
|
+
// redirect_uri must be one registered for THIS client_id — a global check
|
|
83
|
+
// would let any client pair with any other's URI (CWE-601).
|
|
84
|
+
const registeredUris = await this.getClientRedirectUris(params.client_id);
|
|
85
|
+
if (!registeredUris) {
|
|
103
86
|
throw new OAuthProxyError('invalid_client', 'Unknown client_id');
|
|
104
87
|
}
|
|
105
|
-
|
|
106
|
-
// previously registered via DCR; skipping this is CWE-601 (auth-code
|
|
107
|
-
// theft). We read the shared Redis registry so cross-instance DCR counts.
|
|
108
|
-
if (!(await this.isClientRegistered(params.redirect_uri))) {
|
|
88
|
+
if (!registeredUris.includes(params.redirect_uri)) {
|
|
109
89
|
throw new OAuthProxyError('invalid_request', 'redirect_uri is not registered for this client');
|
|
110
90
|
}
|
|
111
91
|
if (params.code_challenge && !params.code_challenge_method) {
|
|
@@ -132,10 +112,10 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
132
112
|
throw new OAuthProxyError('invalid_request', 'Invalid or expired state');
|
|
133
113
|
}
|
|
134
114
|
const transaction = deserialize(txJson);
|
|
135
|
-
// Defense-in-depth:
|
|
136
|
-
//
|
|
137
|
-
|
|
138
|
-
if (!
|
|
115
|
+
// Defense-in-depth: the callback URL must still be bound to this client
|
|
116
|
+
// (guards against mid-flow DCR revocation/expiry).
|
|
117
|
+
const txUris = await this.getClientRedirectUris(transaction.clientId);
|
|
118
|
+
if (!txUris || !txUris.includes(transaction.clientCallbackUrl)) {
|
|
139
119
|
await this.redis.del(`${TX_PREFIX}${state}`);
|
|
140
120
|
throw new OAuthProxyError('invalid_request', 'Transaction callback URL is not registered');
|
|
141
121
|
}
|
|
@@ -144,11 +124,14 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
144
124
|
// We read from it, persist to Redis, then clean up the Map entry.
|
|
145
125
|
const clientCode = this._internal.generateAuthorizationCode(transaction, upstreamTokens);
|
|
146
126
|
const codeData = this._internal.clientCodes.get(clientCode);
|
|
147
|
-
if
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
}
|
|
127
|
+
// generateAuthorizationCode just populated the Map; if a cleanup race
|
|
128
|
+
// emptied it, fail loud rather than redirect with an unpersisted code.
|
|
129
|
+
if (!codeData) {
|
|
130
|
+
throw new OAuthProxyError('server_error', 'Failed to persist authorization code');
|
|
131
|
+
}
|
|
132
|
+
const codeTtl = this._internal.config.authorizationCodeTtl || DEFAULT_CODE_TTL;
|
|
133
|
+
await this.redis.set(`${CODE_PREFIX}${clientCode}`, serialize(codeData), 'EX', codeTtl);
|
|
134
|
+
this._internal.clientCodes.delete(clientCode);
|
|
152
135
|
// Remove consumed transaction
|
|
153
136
|
await this.redis.del(`${TX_PREFIX}${state}`);
|
|
154
137
|
const redirectUrl = new URL(transaction.clientCallbackUrl);
|
|
@@ -163,10 +146,9 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
163
146
|
if (request.grant_type !== 'authorization_code') {
|
|
164
147
|
throw new OAuthProxyError('unsupported_grant_type', 'Only authorization_code grant type is supported');
|
|
165
148
|
}
|
|
166
|
-
//
|
|
167
|
-
//
|
|
168
|
-
|
|
169
|
-
if (request.client_id !== this._internal.config.upstreamClientId) {
|
|
149
|
+
// Reject unknown clients here too; the code↔client binding below enforces
|
|
150
|
+
// that only the owning client can redeem the code.
|
|
151
|
+
if (!(await this.getClientRedirectUris(request.client_id))) {
|
|
170
152
|
throw new OAuthProxyError('invalid_client', 'Unknown client_id');
|
|
171
153
|
}
|
|
172
154
|
// Atomically read-and-delete the code. The parent's in-memory `used` flag
|
|
@@ -180,19 +162,15 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
180
162
|
if (clientCode.clientId !== request.client_id) {
|
|
181
163
|
throw new OAuthProxyError('invalid_client', 'Client ID mismatch');
|
|
182
164
|
}
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
if (clientCode.codeChallenge
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
// is needed in finally — only the local Map cleanup.
|
|
190
|
-
this._internal.clientCodes.set(request.code, clientCode);
|
|
191
|
-
try {
|
|
192
|
-
return await super.exchangeAuthorizationCode(request);
|
|
165
|
+
// PKCE inline, not via super (the parent re-checks client_id against a
|
|
166
|
+
// process-local Map). One-time-use is enforced by the GETDEL above, not the
|
|
167
|
+
// parent's `used` flag.
|
|
168
|
+
if (clientCode.codeChallenge) {
|
|
169
|
+
if (!request.code_verifier) {
|
|
170
|
+
throw new OAuthProxyError('invalid_request', 'code_verifier required for PKCE');
|
|
193
171
|
}
|
|
194
|
-
|
|
195
|
-
|
|
172
|
+
if (!PKCEUtils.validateChallenge(request.code_verifier, clientCode.codeChallenge, clientCode.codeChallengeMethod)) {
|
|
173
|
+
throw new OAuthProxyError('invalid_grant', 'Invalid PKCE verifier');
|
|
196
174
|
}
|
|
197
175
|
}
|
|
198
176
|
const response = {
|
|
@@ -203,12 +181,12 @@ export class RedisOAuthProxy extends OAuthProxy {
|
|
|
203
181
|
if (clientCode.upstreamTokens.refreshToken) {
|
|
204
182
|
response.refresh_token = clientCode.upstreamTokens.refreshToken;
|
|
205
183
|
}
|
|
184
|
+
if (clientCode.upstreamTokens.idToken) {
|
|
185
|
+
response.id_token = clientCode.upstreamTokens.idToken;
|
|
186
|
+
}
|
|
206
187
|
if (clientCode.upstreamTokens.scope?.length > 0) {
|
|
207
188
|
response.scope = clientCode.upstreamTokens.scope.join(' ');
|
|
208
189
|
}
|
|
209
190
|
return response;
|
|
210
191
|
}
|
|
211
|
-
destroy() {
|
|
212
|
-
super.destroy();
|
|
213
|
-
}
|
|
214
192
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { readFile, rm } from 'node:fs/promises';
|
|
2
2
|
import { consumeDownload } from '../lib/download-store.js';
|
|
3
|
-
import {
|
|
3
|
+
import { guardRouteAuth } from '../lib/http-auth.js';
|
|
4
4
|
/**
|
|
5
5
|
* Registers `GET /download/:id` on the HTTP-stream server. getDownloads surfaces
|
|
6
6
|
* a download as a notification (metadata only) plus this URL; the client fetches
|
|
@@ -17,17 +17,9 @@ import { resolveBrowserlessAuth } from '../lib/http-auth.js';
|
|
|
17
17
|
export function registerDownloadRoute(server, config) {
|
|
18
18
|
const app = server.getApp();
|
|
19
19
|
app.get('/download/:id', async (c) => {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
tokenQuery: c.req.query('token'),
|
|
24
|
-
apiUrlHeader: c.req.header('x-browserless-api-url'),
|
|
25
|
-
browserlessUrlQuery: c.req.query('browserlessUrl'),
|
|
26
|
-
}, config);
|
|
27
|
-
}
|
|
28
|
-
catch {
|
|
29
|
-
return c.json({ ok: false, error: 'Unauthorized' }, 401);
|
|
30
|
-
}
|
|
20
|
+
const denied = await guardRouteAuth(c, config);
|
|
21
|
+
if (denied)
|
|
22
|
+
return denied;
|
|
31
23
|
// Single-use: consume removes it from the registry so a second GET 404s.
|
|
32
24
|
const record = consumeDownload(c.req.param('id'));
|
|
33
25
|
if (!record) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { downloadUri, storeDownload, FILE_TRANSFER_MAX_BYTES, } from '../lib/download-store.js';
|
|
2
|
-
import {
|
|
2
|
+
import { guardRouteAuth } from '../lib/http-auth.js';
|
|
3
3
|
// Registers `POST /upload` (httpStream only): clients push a file's bytes over
|
|
4
4
|
// plain HTTP and get back a handle to pass to the agent's `uploadFile`.
|
|
5
5
|
// curl -s -F file=@/path/to/file "<mcpBaseUrl>/upload?token=<token>"
|
|
@@ -7,19 +7,11 @@ import { resolveBrowserlessAuth } from '../lib/http-auth.js';
|
|
|
7
7
|
export function registerUploadRoute(server, config) {
|
|
8
8
|
const app = server.getApp();
|
|
9
9
|
app.post('/upload', async (c) => {
|
|
10
|
-
// Raw Hono routes bypass FastMCP's authenticate, so gate the
|
|
11
|
-
//
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
tokenQuery: c.req.query('token'),
|
|
16
|
-
apiUrlHeader: c.req.header('x-browserless-api-url'),
|
|
17
|
-
browserlessUrlQuery: c.req.query('browserlessUrl'),
|
|
18
|
-
}, config);
|
|
19
|
-
}
|
|
20
|
-
catch {
|
|
21
|
-
return c.json({ ok: false, error: 'Unauthorized' }, 401);
|
|
22
|
-
}
|
|
10
|
+
// Raw Hono routes bypass FastMCP's authenticate, so gate on the same token
|
|
11
|
+
// rules as the MCP surface — no anonymous drops.
|
|
12
|
+
const denied = await guardRouteAuth(c, config);
|
|
13
|
+
if (denied)
|
|
14
|
+
return denied;
|
|
23
15
|
let file;
|
|
24
16
|
try {
|
|
25
17
|
const body = await c.req.parseBody();
|
package/build/src/tools/map.js
CHANGED
|
@@ -42,7 +42,7 @@ export function registerMapTool(server, config, analytics) {
|
|
|
42
42
|
defineTool(server, config, analytics, {
|
|
43
43
|
name: 'browserless_map',
|
|
44
44
|
description: 'Discover and map all URLs on a website using Browserless. ' +
|
|
45
|
-
'
|
|
45
|
+
'Scans a site via sitemaps and link extraction to find all pages. ' +
|
|
46
46
|
'Returns a list of URLs with optional titles and descriptions. ' +
|
|
47
47
|
'Use the search parameter to order results by relevance to a query. ' +
|
|
48
48
|
'Useful for site audits, content discovery, and building site maps.',
|
|
@@ -47,9 +47,10 @@ export function registerSmartScraperTool(server, config, analytics) {
|
|
|
47
47
|
const cache = new ResponseCache(config.cacheTtlMs);
|
|
48
48
|
defineTool(server, config, analytics, {
|
|
49
49
|
name: 'browserless_smartscraper',
|
|
50
|
-
description: 'Scrape
|
|
51
|
-
'
|
|
52
|
-
'
|
|
50
|
+
description: 'Scrape a SINGLE webpage and return its content as markdown or HTML. ' +
|
|
51
|
+
'Handles JavaScript-heavy pages and anti-bot measures automatically. ' +
|
|
52
|
+
'For content across MULTIPLE pages of a site, use browserless_crawl; ' +
|
|
53
|
+
"to list a site's URLs, use browserless_map.",
|
|
53
54
|
parameters: SmartScraperParamsSchema,
|
|
54
55
|
annotations: {
|
|
55
56
|
title: 'Browserless Smart Scraper',
|
package/package.json
CHANGED