alterlab-mcp-server 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +9 -1
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +42 -0
- package/dist/client.js.map +1 -1
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +5 -1
- package/dist/errors.js.map +1 -1
- package/dist/format.d.ts.map +1 -1
- package/dist/format.js +7 -0
- package/dist/format.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16 -1
- package/dist/index.js.map +1 -1
- package/dist/tools/batch.d.ts +75 -0
- package/dist/tools/batch.d.ts.map +1 -0
- package/dist/tools/batch.js +175 -0
- package/dist/tools/batch.js.map +1 -0
- package/dist/tools/crawl.d.ts +70 -0
- package/dist/tools/crawl.d.ts.map +1 -0
- package/dist/tools/crawl.js +219 -0
- package/dist/tools/crawl.js.map +1 -0
- package/dist/tools/extract.d.ts +17 -11
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +88 -26
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/map.d.ts +40 -0
- package/dist/tools/map.d.ts.map +1 -0
- package/dist/tools/map.js +139 -0
- package/dist/tools/map.js.map +1 -0
- package/dist/tools/scrape.d.ts +42 -6
- package/dist/tools/scrape.d.ts.map +1 -1
- package/dist/tools/scrape.js +89 -7
- package/dist/tools/scrape.js.map +1 -1
- package/dist/tools/search.d.ts +40 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +147 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/types.d.ts +165 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/tools/scrape.d.ts
CHANGED
|
@@ -3,40 +3,76 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
|
|
|
3
3
|
import type { AlterLabClient } from "../client.js";
|
|
4
4
|
export declare const scrapeSchema: z.ZodObject<{
|
|
5
5
|
url: z.ZodString;
|
|
6
|
+
method: z.ZodDefault<z.ZodEnum<["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"]>>;
|
|
7
|
+
body: z.ZodOptional<z.ZodString>;
|
|
6
8
|
mode: z.ZodDefault<z.ZodEnum<["auto", "html", "js", "pdf", "ocr"]>>;
|
|
7
|
-
formats: z.ZodDefault<z.ZodArray<z.ZodEnum<["text", "json", "html", "markdown"]>, "many">>;
|
|
8
|
-
render_js: z.ZodDefault<z.ZodBoolean
|
|
9
|
+
formats: z.ZodDefault<z.ZodArray<z.ZodEnum<["text", "json", "json_v2", "html", "markdown", "rag"]>, "many">>;
|
|
10
|
+
render_js: z.ZodDefault<z.ZodUnion<[z.ZodBoolean, z.ZodLiteral<"auto">]>>;
|
|
9
11
|
use_proxy: z.ZodDefault<z.ZodBoolean>;
|
|
10
12
|
proxy_country: z.ZodOptional<z.ZodString>;
|
|
11
13
|
wait_for: z.ZodOptional<z.ZodString>;
|
|
12
14
|
timeout: z.ZodDefault<z.ZodNumber>;
|
|
15
|
+
max_response_bytes: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
13
16
|
include_raw_html: z.ZodDefault<z.ZodBoolean>;
|
|
14
17
|
session_id: z.ZodOptional<z.ZodString>;
|
|
15
18
|
cookies: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
19
|
+
scroll_to_load: z.ZodDefault<z.ZodBoolean>;
|
|
20
|
+
scroll_count: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
21
|
+
remove_cookie_banners: z.ZodDefault<z.ZodBoolean>;
|
|
22
|
+
location: z.ZodOptional<z.ZodObject<{
|
|
23
|
+
country: z.ZodOptional<z.ZodString>;
|
|
24
|
+
language: z.ZodOptional<z.ZodString>;
|
|
25
|
+
}, "strip", z.ZodTypeAny, {
|
|
26
|
+
country?: string | undefined;
|
|
27
|
+
language?: string | undefined;
|
|
28
|
+
}, {
|
|
29
|
+
country?: string | undefined;
|
|
30
|
+
language?: string | undefined;
|
|
31
|
+
}>>;
|
|
16
32
|
}, "strip", z.ZodTypeAny, {
|
|
17
33
|
url: string;
|
|
18
34
|
timeout: number;
|
|
35
|
+
method: "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD";
|
|
19
36
|
mode: "auto" | "html" | "js" | "pdf" | "ocr";
|
|
20
|
-
formats: ("text" | "html" | "json" | "markdown")[];
|
|
21
|
-
render_js: boolean;
|
|
37
|
+
formats: ("text" | "html" | "json" | "json_v2" | "markdown" | "rag")[];
|
|
38
|
+
render_js: boolean | "auto";
|
|
22
39
|
use_proxy: boolean;
|
|
23
40
|
include_raw_html: boolean;
|
|
41
|
+
scroll_to_load: boolean;
|
|
42
|
+
remove_cookie_banners: boolean;
|
|
43
|
+
body?: string | undefined;
|
|
24
44
|
proxy_country?: string | undefined;
|
|
25
45
|
wait_for?: string | undefined;
|
|
46
|
+
max_response_bytes?: number | undefined;
|
|
26
47
|
session_id?: string | undefined;
|
|
27
48
|
cookies?: Record<string, string> | undefined;
|
|
49
|
+
scroll_count?: number | undefined;
|
|
50
|
+
location?: {
|
|
51
|
+
country?: string | undefined;
|
|
52
|
+
language?: string | undefined;
|
|
53
|
+
} | undefined;
|
|
28
54
|
}, {
|
|
29
55
|
url: string;
|
|
30
56
|
timeout?: number | undefined;
|
|
57
|
+
method?: "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD" | undefined;
|
|
58
|
+
body?: string | undefined;
|
|
31
59
|
mode?: "auto" | "html" | "js" | "pdf" | "ocr" | undefined;
|
|
32
|
-
formats?: ("text" | "html" | "json" | "markdown")[] | undefined;
|
|
33
|
-
render_js?: boolean | undefined;
|
|
60
|
+
formats?: ("text" | "html" | "json" | "json_v2" | "markdown" | "rag")[] | undefined;
|
|
61
|
+
render_js?: boolean | "auto" | undefined;
|
|
34
62
|
use_proxy?: boolean | undefined;
|
|
35
63
|
proxy_country?: string | undefined;
|
|
36
64
|
wait_for?: string | undefined;
|
|
65
|
+
max_response_bytes?: number | undefined;
|
|
37
66
|
include_raw_html?: boolean | undefined;
|
|
38
67
|
session_id?: string | undefined;
|
|
39
68
|
cookies?: Record<string, string> | undefined;
|
|
69
|
+
scroll_to_load?: boolean | undefined;
|
|
70
|
+
scroll_count?: number | undefined;
|
|
71
|
+
remove_cookie_banners?: boolean | undefined;
|
|
72
|
+
location?: {
|
|
73
|
+
country?: string | undefined;
|
|
74
|
+
language?: string | undefined;
|
|
75
|
+
} | undefined;
|
|
40
76
|
}>;
|
|
41
77
|
export declare const scrapeDescription: string;
|
|
42
78
|
export declare function handleScrape(client: AlterLabClient, params: z.infer<typeof scrapeSchema>): Promise<CallToolResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/tools/scrape.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACzE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAInD,eAAO,MAAM,YAAY
|
|
1
|
+
{"version":3,"file":"scrape.d.ts","sourceRoot":"","sources":["../../src/tools/scrape.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACzE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAInD,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAsJvB,CAAC;AAEH,eAAO,MAAM,iBAAiB,QAc0B,CAAC;AAEzD,wBAAsB,YAAY,CAChC,MAAM,EAAE,cAAc,EACtB,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,GACnC,OAAO,CAAC,cAAc,CAAC,CAoCzB"}
|
package/dist/tools/scrape.js
CHANGED
|
@@ -7,22 +7,42 @@ const errors_js_1 = require("../errors.js");
|
|
|
7
7
|
const format_js_1 = require("../format.js");
|
|
8
8
|
exports.scrapeSchema = zod_1.z.object({
|
|
9
9
|
url: zod_1.z.string().url().describe("URL to scrape"),
|
|
10
|
+
method: zod_1.z
|
|
11
|
+
.enum(["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"])
|
|
12
|
+
.default("GET")
|
|
13
|
+
.describe("HTTP method for the request. Default GET (standard page scraping). " +
|
|
14
|
+
"Use POST for GraphQL endpoints, form submissions, REST API calls. " +
|
|
15
|
+
"Use PUT/PATCH for REST API updates. " +
|
|
16
|
+
"When using POST/PUT/PATCH, provide body with the request payload."),
|
|
17
|
+
body: zod_1.z
|
|
18
|
+
.string()
|
|
19
|
+
.optional()
|
|
20
|
+
.describe("Request body for POST/PUT/PATCH requests. " +
|
|
21
|
+
"For GraphQL: JSON string with 'query' and optional 'variables' fields " +
|
|
22
|
+
'(e.g., \'{"query": "{ user { id name } }"}\').' +
|
|
23
|
+
"For REST APIs: JSON-encoded payload string. " +
|
|
24
|
+
"For form submissions: URL-encoded key=value pairs (e.g., 'name=Alice&email=alice@example.com'). " +
|
|
25
|
+
"Omit for GET/HEAD/DELETE requests."),
|
|
10
26
|
mode: zod_1.z
|
|
11
27
|
.enum(["auto", "html", "js", "pdf", "ocr"])
|
|
12
28
|
.default("auto")
|
|
13
29
|
.describe("Scraping mode: auto (recommended), html, js (headless browser), pdf, or ocr"),
|
|
14
30
|
formats: zod_1.z
|
|
15
|
-
.array(zod_1.z.enum(["text", "json", "html", "markdown"]))
|
|
31
|
+
.array(zod_1.z.enum(["text", "json", "json_v2", "html", "markdown", "rag"]))
|
|
16
32
|
.default(["markdown"])
|
|
17
|
-
.describe("Output formats. 'markdown' is best for LLM consumption"
|
|
33
|
+
.describe("Output formats. 'markdown' is best for LLM consumption. " +
|
|
34
|
+
"'json_v2' returns a structured section tree (headings + content blocks). " +
|
|
35
|
+
"'rag' returns chunked text optimized for retrieval-augmented generation."),
|
|
18
36
|
render_js: zod_1.z
|
|
19
|
-
.boolean()
|
|
37
|
+
.union([zod_1.z.boolean(), zod_1.z.literal("auto")])
|
|
20
38
|
.default(false)
|
|
21
|
-
.describe("Render JavaScript using headless browser (forces Tier 4 minimum — no separate add-on charge).
|
|
39
|
+
.describe("Render JavaScript using headless browser (forces Tier 4 minimum — no separate add-on charge). " +
|
|
40
|
+
"Required for JS-heavy sites. Set to 'auto' for smart detection (probes each page, " +
|
|
41
|
+
"only renders JS-heavy pages with browser — saves 30-60% on mixed sites)."),
|
|
22
42
|
use_proxy: zod_1.z
|
|
23
43
|
.boolean()
|
|
24
44
|
.default(false)
|
|
25
|
-
.describe("Route through premium proxy (
|
|
45
|
+
.describe("Route through premium proxy (+$0.0002). Helps bypass geo-restrictions and anti-bot"),
|
|
26
46
|
proxy_country: zod_1.z
|
|
27
47
|
.string()
|
|
28
48
|
.optional()
|
|
@@ -37,6 +57,17 @@ exports.scrapeSchema = zod_1.z.object({
|
|
|
37
57
|
.max(300)
|
|
38
58
|
.default(90)
|
|
39
59
|
.describe("Request timeout in seconds (1-300)"),
|
|
60
|
+
max_response_bytes: zod_1.z
|
|
61
|
+
.number()
|
|
62
|
+
.int()
|
|
63
|
+
.min(0)
|
|
64
|
+
.max(52_428_800)
|
|
65
|
+
.default(5_242_880)
|
|
66
|
+
.optional()
|
|
67
|
+
.describe("Soft cap on raw response body size in bytes. " +
|
|
68
|
+
"When the downloaded HTML exceeds this value it is truncated before extraction. " +
|
|
69
|
+
"Default: 5 MB (5242880). Set to 0 for no limit. Maximum: 50 MB (52428800). " +
|
|
70
|
+
"Useful for very large pages where you only need the beginning of the content."),
|
|
40
71
|
include_raw_html: zod_1.z
|
|
41
72
|
.boolean()
|
|
42
73
|
.default(false)
|
|
@@ -54,30 +85,81 @@ exports.scrapeSchema = zod_1.z.object({
|
|
|
54
85
|
.describe("Inline cookies as key-value pairs for authenticated scraping " +
|
|
55
86
|
'(e.g., {"session_token": "abc123"}). ' +
|
|
56
87
|
"Use this for one-off requests; use session_id for reusable sessions."),
|
|
88
|
+
scroll_to_load: zod_1.z
|
|
89
|
+
.boolean()
|
|
90
|
+
.default(false)
|
|
91
|
+
.describe("Scroll page to trigger lazy-loaded content (requires render_js). " +
|
|
92
|
+
"Performs explicit viewport-height scrolls to load dynamic content. Adds ~2-3s latency."),
|
|
93
|
+
scroll_count: zod_1.z
|
|
94
|
+
.number()
|
|
95
|
+
.int()
|
|
96
|
+
.min(1)
|
|
97
|
+
.max(10)
|
|
98
|
+
.default(3)
|
|
99
|
+
.optional()
|
|
100
|
+
.describe("Number of scroll iterations when scroll_to_load is enabled (1-10, default 3)"),
|
|
101
|
+
remove_cookie_banners: zod_1.z
|
|
102
|
+
.boolean()
|
|
103
|
+
.default(true)
|
|
104
|
+
.describe("Remove cookie consent banners from HTML before content extraction (free, enabled by default)"),
|
|
105
|
+
location: zod_1.z
|
|
106
|
+
.object({
|
|
107
|
+
country: zod_1.z
|
|
108
|
+
.string()
|
|
109
|
+
.length(2)
|
|
110
|
+
.optional()
|
|
111
|
+
.describe("ISO 3166-1 alpha-2 country code for geo-targeting (e.g., 'US', 'DE', 'JP'). " +
|
|
112
|
+
"Routes request through a proxy in the specified country."),
|
|
113
|
+
language: zod_1.z
|
|
114
|
+
.string()
|
|
115
|
+
.min(2)
|
|
116
|
+
.max(5)
|
|
117
|
+
.optional()
|
|
118
|
+
.describe("ISO 639-1 language code (e.g., 'en', 'de', 'ja'). " +
|
|
119
|
+
"Sets the Accept-Language header and browser locale."),
|
|
120
|
+
})
|
|
121
|
+
.optional()
|
|
122
|
+
.describe("Geo-targeting parameters for localized content scraping. " +
|
|
123
|
+
"Controls proxy country routing, Accept-Language header, and browser locale."),
|
|
57
124
|
});
|
|
58
|
-
exports.scrapeDescription = "Scrape a URL and return its content as markdown, text, HTML, or
|
|
125
|
+
exports.scrapeDescription = "Scrape a URL and return its content as markdown, text, HTML, JSON, or structured sections. " +
|
|
59
126
|
"Automatically handles anti-bot protection with tier escalation. " +
|
|
60
127
|
"Returns markdown by default — optimized for LLM context. " +
|
|
128
|
+
"Supports GET (default) and POST/PUT/PATCH/DELETE/HEAD via the method parameter. " +
|
|
129
|
+
"Use method='POST' with body for GraphQL APIs, REST endpoints, and form submissions. " +
|
|
130
|
+
"For GraphQL: set body='{\"query\": \"{ ... }\"}' and method='POST'. " +
|
|
61
131
|
"Use render_js=true for JavaScript-heavy sites (React, Angular, SPAs). " +
|
|
132
|
+
"Use render_js='auto' for mixed sites to detect JS needs per-page (saves 30-60%). " +
|
|
62
133
|
"Use use_proxy=true for geo-restricted or heavily protected sites. " +
|
|
63
|
-
"
|
|
134
|
+
"Use formats=['json_v2'] for a structured section tree (headings + content blocks). " +
|
|
135
|
+
"Use formats=['rag'] for chunked text optimized for RAG pipelines. " +
|
|
136
|
+
"Supports authenticated scraping via session_id (stored session) or inline cookies. " +
|
|
137
|
+
"Use scroll_to_load=true for infinite-scroll pages that lazy-load content. " +
|
|
138
|
+
"Use location.country to scrape geo-targeted content.";
|
|
64
139
|
async function handleScrape(client, params) {
|
|
65
140
|
try {
|
|
66
141
|
const response = await client.scrape({
|
|
67
142
|
url: params.url,
|
|
143
|
+
method: params.method,
|
|
144
|
+
body: params.body,
|
|
68
145
|
mode: params.mode,
|
|
69
146
|
formats: params.formats,
|
|
70
147
|
sync: true,
|
|
71
148
|
timeout: params.timeout,
|
|
149
|
+
max_response_bytes: params.max_response_bytes,
|
|
72
150
|
include_raw_html: params.include_raw_html,
|
|
73
151
|
wait_for: params.wait_for,
|
|
74
152
|
session_id: params.session_id,
|
|
75
153
|
cookies: params.cookies,
|
|
154
|
+
location: params.location,
|
|
76
155
|
advanced: {
|
|
77
156
|
render_js: params.render_js,
|
|
78
157
|
use_proxy: params.use_proxy,
|
|
79
158
|
proxy_country: params.proxy_country,
|
|
80
159
|
markdown: params.formats.includes("markdown"),
|
|
160
|
+
scroll_to_load: params.scroll_to_load,
|
|
161
|
+
scroll_count: params.scroll_count,
|
|
162
|
+
remove_cookie_banners: params.remove_cookie_banners,
|
|
81
163
|
},
|
|
82
164
|
});
|
|
83
165
|
return {
|
package/dist/tools/scrape.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrape.js","sourceRoot":"","sources":["../../src/tools/scrape.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"scrape.js","sourceRoot":"","sources":["../../src/tools/scrape.ts"],"names":[],"mappings":";;;AA8KA,oCAuCC;AArND,6BAAwB;AAGxB,4CAAgE;AAChE,4CAAoD;AAEvC,QAAA,YAAY,GAAG,OAAC,CAAC,MAAM,CAAC;IACnC,GAAG,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC;IAC/C,MAAM,EAAE,OAAC;SACN,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;SACvD,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CACP,qEAAqE;QACnE,oEAAoE;QACpE,sCAAsC;QACtC,mEAAmE,CACtE;IACH,IAAI,EAAE,OAAC;SACJ,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,4CAA4C;QAC1C,wEAAwE;QACxE,gDAAgD;QAChD,8CAA8C;QAC9C,kGAAkG;QAClG,oCAAoC,CACvC;IACH,IAAI,EAAE,OAAC;SACJ,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;SAC1C,OAAO,CAAC,MAAM,CAAC;SACf,QAAQ,CACP,6EAA6E,CAC9E;IACH,OAAO,EAAE,OAAC;SACP,KAAK,CAAC,OAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC;SACrE,OAAO,CAAC,CAAC,UAAU,CAAC,CAAC;SACrB,QAAQ,CACP,0DAA0D;QACxD,2EAA2E;QAC3E,0EAA0E,CAC7E;IACH,SAAS,EAAE,OAAC;SACT,KAAK,CAAC,CAAC,OAAC,CAAC,OAAO,EAAE,EAAE,OAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;SACvC,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CACP,gGAAgG;QAC9F,oFAAoF;QACpF,0EAA0E,CAC7E;IACH,SAAS,EAAE,OAAC;SACT,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CACP,oFAAoF,CACrF;IACH,aAAa,EAAE,OAAC;SACb,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,gFAAgF,CACjF;IACH,QAAQ,EAAE,OAAC;SACR,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,4EAA4E,CAC7E;IACH,OAAO,EAAE,OAAC;SACP,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,GAAG,CAAC;SACR,OAAO,CAAC,EAAE,CAAC;SACX,QAAQ,CAAC,oCAAoC,CAAC;IACjD,kBAAkB,EAAE,OAAC;SAClB,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,UAAU,CAAC;SACf,OAAO,CAAC,SAAS,CAAC;SAClB,QAAQ,EAAE;SACV,QAAQ,CACP,+CAA+C;QAC7C,iFAAiF;QACjF,6EAA6E;QAC7E,+EAA+E,CAClF;IACH,gBAAgB,EAAE,OAAC;SAChB,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CAAC,8DAA8D,CAAC;IAC3E,UAAU,EAAE,OAAC;SACV,MAAM,EAAE;SACR,IAAI,EAAE;SACN,QAAQ,EAAE;SACV,QAAQ,CACP,uDAAuD;QACrD,yDAAyD;QACzD,0DAA0D,CAC7D;IACH,OAAO,EAAE,OAAC;SACP,MAAM,CAAC,OAAC,CAAC,MAAM,EAAE,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC;SAC9B,QAAQ,EAAE;SACV,QAAQ,CACP,+DAA+D;QAC7D,uCAAuC;QACvC,sEAAsE,CACzE;IACH,cAAc,EAAE,OAAC;SACd,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CACP,mEAAmE;QACjE,wFAAwF,CAC3F;IACH,YAAY,EAAE,OAAC;SACZ,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,EAAE;SACV,QAAQ,CACP,8EAA8E,CAC/E;IACH,qBAAqB,EAAE,OAAC;SACrB,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CACP,8FAA8F,CAC/F;IACH,QAAQ,EAAE,OAAC;SACR,MAAM,CAAC;QACN,OAAO,EAAE,OAAC;aACP,MAAM,EAAE;aACR,MAAM,CAAC,CAAC,CAAC;aACT,QAAQ,EAAE;aACV,QAAQ,CACP,8EAA8E;YAC5E,0DAA0D,CAC7D;QACH,QAAQ,EAAE,OAAC;aACR,MAAM,EAAE;aACR,GAAG,CAAC,CAAC,CAAC;aACN,GAAG,CAAC,CAAC,CAAC;aACN,QAAQ,EAAE;aACV,QAAQ,CACP,oDAAoD;YAClD,qDAAqD,CACxD;KACJ,CAAC;SACD,QAAQ,EAAE;SACV,QAAQ,CACP,2DAA2D;QACzD,6EAA6E,CAChF;CACJ,CAAC,CAAC;AAEU,QAAA,iBAAiB,GAC5B,6FAA6F;IAC7F,kEAAkE;IAClE,2DAA2D;IAC3D,kFAAkF;IAClF,sFAAsF;IACtF,sEAAsE;IACtE,wEAAwE;IACxE,mFAAmF;IACnF,oEAAoE;IACpE,qFAAqF;IACrF,oEAAoE;IACpE,qFAAqF;IACrF,4EAA4E;IAC5E,sDAAsD,CAAC;AAElD,KAAK,UAAU,YAAY,CAChC,MAAsB,EACtB,MAAoC;IAEpC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC;YACnC,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,IAAI,EAAE,IAAI;YACV,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,kBAAkB,EAAE,MAAM,CAAC,kBAAkB;YAC7C,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;YACzC,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,QAAQ,EAAE;gBACR,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,aAAa,EAAE,MAAM,CAAC,aAAa;gBACnC,QAAQ,EAAE,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAC7C,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,qBAAqB,EAAE,MAAM,CAAC,qBAAqB;aACpD;SACF,CAAC,CAAC;QAEH,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAA,gCAAoB,EAAC,QAAQ,CAAC,EAAE,CAAC;SAClE,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,IAAA,6BAAiB,EAAC,KAAK,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,IAAA,6BAAiB,EAAC,KAAc,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,OAAO,CACL,OAAO,KAAK,KAAK,QAAQ;QACzB,KAAK,KAAK,IAAI;QACd,QAAQ,IAAI,KAAK;QACjB,OAAQ,KAAkB,CAAC,MAAM,KAAK,QAAQ,CAC/C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
|
|
3
|
+
import type { AlterLabClient } from "../client.js";
|
|
4
|
+
export declare const searchSchema: z.ZodObject<{
|
|
5
|
+
query: z.ZodString;
|
|
6
|
+
num_results: z.ZodDefault<z.ZodNumber>;
|
|
7
|
+
page: z.ZodDefault<z.ZodNumber>;
|
|
8
|
+
domain: z.ZodOptional<z.ZodString>;
|
|
9
|
+
country: z.ZodOptional<z.ZodString>;
|
|
10
|
+
language: z.ZodOptional<z.ZodString>;
|
|
11
|
+
time_range: z.ZodOptional<z.ZodEnum<["hour", "day", "week", "month", "year"]>>;
|
|
12
|
+
scrape_results: z.ZodDefault<z.ZodBoolean>;
|
|
13
|
+
formats: z.ZodOptional<z.ZodArray<z.ZodEnum<["text", "json", "json_v2", "html", "markdown"]>, "many">>;
|
|
14
|
+
extraction_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
15
|
+
}, "strip", z.ZodTypeAny, {
|
|
16
|
+
query: string;
|
|
17
|
+
num_results: number;
|
|
18
|
+
page: number;
|
|
19
|
+
scrape_results: boolean;
|
|
20
|
+
formats?: ("text" | "html" | "json" | "json_v2" | "markdown")[] | undefined;
|
|
21
|
+
country?: string | undefined;
|
|
22
|
+
language?: string | undefined;
|
|
23
|
+
extraction_schema?: Record<string, unknown> | undefined;
|
|
24
|
+
domain?: string | undefined;
|
|
25
|
+
time_range?: "hour" | "day" | "week" | "month" | "year" | undefined;
|
|
26
|
+
}, {
|
|
27
|
+
query: string;
|
|
28
|
+
formats?: ("text" | "html" | "json" | "json_v2" | "markdown")[] | undefined;
|
|
29
|
+
country?: string | undefined;
|
|
30
|
+
language?: string | undefined;
|
|
31
|
+
extraction_schema?: Record<string, unknown> | undefined;
|
|
32
|
+
num_results?: number | undefined;
|
|
33
|
+
page?: number | undefined;
|
|
34
|
+
domain?: string | undefined;
|
|
35
|
+
time_range?: "hour" | "day" | "week" | "month" | "year" | undefined;
|
|
36
|
+
scrape_results?: boolean | undefined;
|
|
37
|
+
}>;
|
|
38
|
+
export declare const searchDescription: string;
|
|
39
|
+
export declare function handleSearch(client: AlterLabClient, params: z.infer<typeof searchSchema>): Promise<CallToolResult>;
|
|
40
|
+
//# sourceMappingURL=search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/tools/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACzE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAGnD,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA4DvB,CAAC;AAEH,eAAO,MAAM,iBAAiB,QAOqC,CAAC;AAEpE,wBAAsB,YAAY,CAChC,MAAM,EAAE,cAAc,EACtB,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,GACnC,OAAO,CAAC,cAAc,CAAC,CAuBzB"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.searchDescription = exports.searchSchema = void 0;
|
|
4
|
+
exports.handleSearch = handleSearch;
|
|
5
|
+
const zod_1 = require("zod");
|
|
6
|
+
const errors_js_1 = require("../errors.js");
|
|
7
|
+
exports.searchSchema = zod_1.z.object({
|
|
8
|
+
query: zod_1.z
|
|
9
|
+
.string()
|
|
10
|
+
.min(1)
|
|
11
|
+
.max(500)
|
|
12
|
+
.describe("Search query (max 500 characters)"),
|
|
13
|
+
num_results: zod_1.z
|
|
14
|
+
.number()
|
|
15
|
+
.int()
|
|
16
|
+
.min(1)
|
|
17
|
+
.max(30)
|
|
18
|
+
.default(10)
|
|
19
|
+
.describe("Number of results to return (1-30)"),
|
|
20
|
+
page: zod_1.z
|
|
21
|
+
.number()
|
|
22
|
+
.int()
|
|
23
|
+
.min(1)
|
|
24
|
+
.max(10)
|
|
25
|
+
.default(1)
|
|
26
|
+
.describe("Result page number (1-indexed). Page 2 returns results 11-20, etc."),
|
|
27
|
+
domain: zod_1.z
|
|
28
|
+
.string()
|
|
29
|
+
.optional()
|
|
30
|
+
.describe("Restrict results to a specific domain (applied as site: prefix, e.g. 'docs.example.com')"),
|
|
31
|
+
country: zod_1.z
|
|
32
|
+
.string()
|
|
33
|
+
.length(2)
|
|
34
|
+
.optional()
|
|
35
|
+
.describe("ISO 3166-1 alpha-2 country code for geo-targeted results (e.g., 'US', 'GB', 'DE')"),
|
|
36
|
+
language: zod_1.z
|
|
37
|
+
.string()
|
|
38
|
+
.min(2)
|
|
39
|
+
.max(5)
|
|
40
|
+
.optional()
|
|
41
|
+
.describe("Language code for results (e.g., 'en', 'fr', 'de')"),
|
|
42
|
+
time_range: zod_1.z
|
|
43
|
+
.enum(["hour", "day", "week", "month", "year"])
|
|
44
|
+
.optional()
|
|
45
|
+
.describe("Filter results by recency"),
|
|
46
|
+
scrape_results: zod_1.z
|
|
47
|
+
.boolean()
|
|
48
|
+
.default(false)
|
|
49
|
+
.describe("If true, scrape each result page and include content in response. " +
|
|
50
|
+
"Each page is billed at its scraping tier cost in addition to the base search fee."),
|
|
51
|
+
formats: zod_1.z
|
|
52
|
+
.array(zod_1.z.enum(["text", "json", "json_v2", "html", "markdown"]))
|
|
53
|
+
.optional()
|
|
54
|
+
.describe("Output formats when scrape_results=true"),
|
|
55
|
+
extraction_schema: zod_1.z
|
|
56
|
+
.record(zod_1.z.unknown())
|
|
57
|
+
.optional()
|
|
58
|
+
.describe("JSON schema for structured extraction when scrape_results=true"),
|
|
59
|
+
});
|
|
60
|
+
exports.searchDescription = "Execute a web search and return SERP results (URLs, titles, snippets). " +
|
|
61
|
+
"Uses AlterLab's own SERP engine with Google/Bing/DuckDuckGo multi-engine failover. " +
|
|
62
|
+
"Costs $0.001 per search query. " +
|
|
63
|
+
"Set scrape_results=true to also scrape each result page and get full content — " +
|
|
64
|
+
"each page is billed at its normal scraping tier cost. " +
|
|
65
|
+
"Use domain to restrict results to a specific site (equivalent to site: operator). " +
|
|
66
|
+
"Use time_range to filter by recency (hour/day/week/month/year).";
|
|
67
|
+
async function handleSearch(client, params) {
|
|
68
|
+
try {
|
|
69
|
+
const response = await client.search({
|
|
70
|
+
query: params.query,
|
|
71
|
+
num_results: params.num_results,
|
|
72
|
+
page: params.page,
|
|
73
|
+
domain: params.domain,
|
|
74
|
+
country: params.country,
|
|
75
|
+
language: params.language,
|
|
76
|
+
time_range: params.time_range,
|
|
77
|
+
scrape_results: params.scrape_results,
|
|
78
|
+
formats: params.formats,
|
|
79
|
+
extraction_schema: params.extraction_schema,
|
|
80
|
+
});
|
|
81
|
+
const text = formatSearchResponse(response, params.query);
|
|
82
|
+
return { content: [{ type: "text", text }] };
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
if (isApiError(error)) {
|
|
86
|
+
return (0, errors_js_1.formatErrorResult)(error, { url: params.query });
|
|
87
|
+
}
|
|
88
|
+
return (0, errors_js_1.formatErrorResult)(error, { url: params.query });
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function formatSearchResponse(response, query) {
|
|
92
|
+
const resp = response;
|
|
93
|
+
const searchId = String(resp.search_id ?? "");
|
|
94
|
+
const results = Array.isArray(resp.results) ? resp.results : [];
|
|
95
|
+
const resultsCount = Number(resp.results_count ?? results.length);
|
|
96
|
+
const creditsUsed = Number(resp.credits_used ?? 0);
|
|
97
|
+
const costBreakdown = resp.cost_breakdown;
|
|
98
|
+
const parts = [
|
|
99
|
+
`**Search Results for: "${query}"**\n`,
|
|
100
|
+
`Found ${resultsCount} result${resultsCount !== 1 ? "s" : ""} | Search ID: \`${searchId}\`\n`,
|
|
101
|
+
];
|
|
102
|
+
for (const result of results) {
|
|
103
|
+
const r = result;
|
|
104
|
+
const position = Number(r.position ?? 0);
|
|
105
|
+
const title = String(r.title ?? "Untitled");
|
|
106
|
+
const url = String(r.url ?? "");
|
|
107
|
+
const snippet = String(r.snippet ?? "");
|
|
108
|
+
const datePublished = r.date_published
|
|
109
|
+
? ` (${String(r.date_published)})`
|
|
110
|
+
: "";
|
|
111
|
+
parts.push(`**${position}. ${title}**${datePublished}`);
|
|
112
|
+
parts.push(`${url}`);
|
|
113
|
+
parts.push(`${snippet}\n`);
|
|
114
|
+
// Include scraped content if available
|
|
115
|
+
const content = r.content;
|
|
116
|
+
if (content) {
|
|
117
|
+
const scrapeText = content.markdown ||
|
|
118
|
+
content.text;
|
|
119
|
+
if (scrapeText) {
|
|
120
|
+
const preview = scrapeText.slice(0, 500);
|
|
121
|
+
parts.push(`> ${preview}${scrapeText.length > 500 ? "..." : ""}\n`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
// Cost footer
|
|
126
|
+
parts.push("---");
|
|
127
|
+
if (costBreakdown) {
|
|
128
|
+
const total = Number(costBreakdown.total_microcents ?? 0);
|
|
129
|
+
parts.push(`Cost: $${(total / 1_000_000).toFixed(6)} | Credits: ${creditsUsed}`);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
parts.push(`Credits used: ${creditsUsed}`);
|
|
133
|
+
}
|
|
134
|
+
// Featured snippet, knowledge panel, PAA
|
|
135
|
+
const featuredSnippet = resp.featured_snippet;
|
|
136
|
+
if (featuredSnippet) {
|
|
137
|
+
parts.unshift(`**Featured Snippet**: ${String(featuredSnippet.content ?? "")}\n`);
|
|
138
|
+
}
|
|
139
|
+
return parts.join("\n");
|
|
140
|
+
}
|
|
141
|
+
function isApiError(error) {
|
|
142
|
+
return (typeof error === "object" &&
|
|
143
|
+
error !== null &&
|
|
144
|
+
"status" in error &&
|
|
145
|
+
typeof error.status === "number");
|
|
146
|
+
}
|
|
147
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/tools/search.ts"],"names":[],"mappings":";;;AA4EA,oCA0BC;AAtGD,6BAAwB;AAGxB,4CAAgE;AAEnD,QAAA,YAAY,GAAG,OAAC,CAAC,MAAM,CAAC;IACnC,KAAK,EAAE,OAAC;SACL,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,GAAG,CAAC;SACR,QAAQ,CAAC,mCAAmC,CAAC;IAChD,WAAW,EAAE,OAAC;SACX,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,EAAE,CAAC;SACX,QAAQ,CAAC,oCAAoC,CAAC;IACjD,IAAI,EAAE,OAAC;SACJ,MAAM,EAAE;SACR,GAAG,EAAE;SACL,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CACP,oEAAoE,CACrE;IACH,MAAM,EAAE,OAAC;SACN,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CACP,0FAA0F,CAC3F;IACH,OAAO,EAAE,OAAC;SACP,MAAM,EAAE;SACR,MAAM,CAAC,CAAC,CAAC;SACT,QAAQ,EAAE;SACV,QAAQ,CACP,mFAAmF,CACpF;IACH,QAAQ,EAAE,OAAC;SACR,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,EAAE;SACV,QAAQ,CAAC,oDAAoD,CAAC;IACjE,UAAU,EAAE,OAAC;SACV,IAAI,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;SAC9C,QAAQ,EAAE;SACV,QAAQ,CAAC,2BAA2B,CAAC;IACxC,cAAc,EAAE,OAAC;SACd,OAAO,EAAE;SACT,OAAO,CAAC,KAAK,CAAC;SACd,QAAQ,CACP,oEAAoE;QAClE,mFAAmF,CACtF;IACH,OAAO,EAAE,OAAC;SACP,KAAK,CAAC,OAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;SAC9D,QAAQ,EAAE;SACV,QAAQ,CAAC,yCAAyC,CAAC;IACtD,iBAAiB,EAAE,OAAC;SACjB,MAAM,CAAC,OAAC,CAAC,OAAO,EAAE,CAAC;SACnB,QAAQ,EAAE;SACV,QAAQ,CAAC,gEAAgE,CAAC;CAC9E,CAAC,CAAC;AAEU,QAAA,iBAAiB,GAC5B,yEAAyE;IACzE,qFAAqF;IACrF,iCAAiC;IACjC,iFAAiF;IACjF,wDAAwD;IACxD,oFAAoF;IACpF,iEAAiE,CAAC;AAE7D,KAAK,UAAU,YAAY,CAChC,MAAsB,EACtB,MAAoC;IAEpC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC;YACnC,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;SAC5C,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,oBAAoB,CAAC,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC1D,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,IAAA,6BAAiB,EAAC,KAAK,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;QACzD,CAAC;QACD,OAAO,IAAA,6BAAiB,EAAC,KAAc,EAAE,EAAE,GAAG,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;IAClE,CAAC;AACH,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAiB,EAAE,KAAa;IAC5D,MAAM,IAAI,GAAG,QAAmC,CAAC;IACjD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;IAChE,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC;IAClE,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;IACnD,MAAM,aAAa,GAAG,IAAI,CAAC,cAEd,CAAC;IAEd,MAAM,KAAK,GAAa;QACtB,0BAA0B,KAAK,OAAO;QACtC,SAAS,YAAY,UAAU,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,mBAAmB,QAAQ,MAAM;KAC9F,CAAC;IAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,CAAC,GAAG,MAAiC,CAAC;QAC5C,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,UAAU,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;QAChC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QACxC,MAAM,aAAa,GAAG,CAAC,CAAC,cAAc;YACpC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,GAAG;YAClC,CAAC,CAAC,EAAE,CAAC;QAEP,KAAK,CAAC,IAAI,CAAC,KAAK,QAAQ,KAAK,KAAK,KAAK,aAAa,EAAE,CAAC,CAAC;QACxD,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;QAE3B,uCAAuC;QACvC,MAAM,OAAO,GAAG,CAAC,CAAC,OAA8C,CAAC;QACjE,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,UAAU,GACb,OAAO,CAAC,QAA+B;gBACvC,OAAO,CAAC,IAA2B,CAAC;YACvC,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBACzC,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,GAAG,UAAU,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;IACH,CAAC;IAED,cAAc;IACd,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,KAAK,GAAG,MAAM,CAAC,aAAa,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;QAC1D,KAAK,CAAC,IAAI,CACR,UAAU,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,WAAW,EAAE,CACrE,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,iBAAiB,WAAW,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED,yCAAyC;IACzC,MAAM,eAAe,GAAG,IAAI,CAAC,gBAEhB,CAAC;IACd,IAAI,eAAe,EAAE,CAAC;QACpB,KAAK,CAAC,OAAO,CACX,yBAAyB,MAAM,CAAC,eAAe,CAAC,OAAO,IAAI,EAAE,CAAC,IAAI,CACnE,CAAC;IACJ,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,OAAO,CACL,OAAO,KAAK,KAAK,QAAQ;QACzB,KAAK,KAAK,IAAI;QACd,QAAQ,IAAI,KAAK;QACjB,OAAQ,KAAkB,CAAC,MAAM,KAAK,QAAQ,CAC/C,CAAC;AACJ,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,20 +1,29 @@
|
|
|
1
1
|
export interface AdvancedOptions {
|
|
2
|
-
render_js?: boolean;
|
|
2
|
+
render_js?: boolean | "auto";
|
|
3
3
|
screenshot?: boolean;
|
|
4
4
|
markdown?: boolean;
|
|
5
5
|
use_proxy?: boolean;
|
|
6
6
|
proxy_country?: string;
|
|
7
7
|
wait_condition?: string;
|
|
8
8
|
remove_cookie_banners?: boolean;
|
|
9
|
+
scroll_to_load?: boolean;
|
|
10
|
+
scroll_count?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface LocationOptions {
|
|
13
|
+
country?: string;
|
|
14
|
+
language?: string;
|
|
9
15
|
}
|
|
10
16
|
export interface UnifiedScrapeRequest {
|
|
11
17
|
url: string;
|
|
18
|
+
method?: "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD";
|
|
19
|
+
body?: string;
|
|
12
20
|
mode?: "auto" | "html" | "js" | "pdf" | "ocr";
|
|
13
21
|
sync?: boolean;
|
|
14
22
|
advanced?: AdvancedOptions;
|
|
15
|
-
formats?: ("text" | "json" | "html" | "markdown")[];
|
|
23
|
+
formats?: ("text" | "json" | "json_v2" | "html" | "markdown" | "rag")[];
|
|
16
24
|
include_raw_html?: boolean;
|
|
17
25
|
timeout?: number;
|
|
26
|
+
max_response_bytes?: number;
|
|
18
27
|
extraction_schema?: Record<string, unknown>;
|
|
19
28
|
extraction_prompt?: string;
|
|
20
29
|
extraction_profile?: "auto" | "product" | "article" | "job_posting" | "faq" | "recipe" | "event";
|
|
@@ -23,6 +32,144 @@ export interface UnifiedScrapeRequest {
|
|
|
23
32
|
wait_until?: string;
|
|
24
33
|
session_id?: string;
|
|
25
34
|
cookies?: Record<string, string>;
|
|
35
|
+
location?: LocationOptions;
|
|
36
|
+
}
|
|
37
|
+
export interface CrawlAdvancedOptions {
|
|
38
|
+
render_js?: boolean | "auto";
|
|
39
|
+
use_proxy?: boolean;
|
|
40
|
+
wait_for?: string;
|
|
41
|
+
timeout?: number;
|
|
42
|
+
}
|
|
43
|
+
export interface CrawlRequest {
|
|
44
|
+
url: string;
|
|
45
|
+
max_pages?: number;
|
|
46
|
+
max_depth?: number;
|
|
47
|
+
include_patterns?: string[];
|
|
48
|
+
exclude_patterns?: string[];
|
|
49
|
+
sitemap?: "include" | "skip" | "only";
|
|
50
|
+
sitemap_path?: string;
|
|
51
|
+
formats?: ("text" | "json" | "json_v2" | "html" | "markdown")[];
|
|
52
|
+
extraction_schema?: Record<string, unknown>;
|
|
53
|
+
max_concurrency?: number;
|
|
54
|
+
respect_robots?: boolean;
|
|
55
|
+
include_subdomains?: boolean;
|
|
56
|
+
webhook_url?: string;
|
|
57
|
+
advanced?: CrawlAdvancedOptions;
|
|
58
|
+
}
|
|
59
|
+
export interface CrawlResponse {
|
|
60
|
+
crawl_id: string;
|
|
61
|
+
status: string;
|
|
62
|
+
url: string;
|
|
63
|
+
created_at?: string;
|
|
64
|
+
}
|
|
65
|
+
export interface CrawlStatusResponse {
|
|
66
|
+
crawl_id: string;
|
|
67
|
+
status: string;
|
|
68
|
+
url: string;
|
|
69
|
+
pages_scraped?: number;
|
|
70
|
+
pages_total?: number;
|
|
71
|
+
credits_used?: number;
|
|
72
|
+
results?: Record<string, unknown>[];
|
|
73
|
+
error?: string;
|
|
74
|
+
}
|
|
75
|
+
export interface CrawlCancelResponse {
|
|
76
|
+
crawl_id: string;
|
|
77
|
+
status: string;
|
|
78
|
+
pages_scraped?: number;
|
|
79
|
+
credits_refunded?: number;
|
|
80
|
+
}
|
|
81
|
+
export interface SearchRequest {
|
|
82
|
+
query: string;
|
|
83
|
+
num_results?: number;
|
|
84
|
+
page?: number;
|
|
85
|
+
domain?: string;
|
|
86
|
+
country?: string;
|
|
87
|
+
language?: string;
|
|
88
|
+
time_range?: "hour" | "day" | "week" | "month" | "year";
|
|
89
|
+
scrape_results?: boolean;
|
|
90
|
+
formats?: ("text" | "json" | "json_v2" | "html" | "markdown")[];
|
|
91
|
+
extraction_schema?: Record<string, unknown>;
|
|
92
|
+
}
|
|
93
|
+
export interface SearchResponse {
|
|
94
|
+
search_id: string;
|
|
95
|
+
query: string;
|
|
96
|
+
results_requested: number;
|
|
97
|
+
results_count: number;
|
|
98
|
+
credits_used: number;
|
|
99
|
+
results: Record<string, unknown>[];
|
|
100
|
+
cost_breakdown?: Record<string, unknown>;
|
|
101
|
+
featured_snippet?: Record<string, unknown>;
|
|
102
|
+
knowledge_panel?: Record<string, unknown>;
|
|
103
|
+
people_also_ask?: Record<string, unknown>[];
|
|
104
|
+
}
|
|
105
|
+
export interface MapRequest {
|
|
106
|
+
url: string;
|
|
107
|
+
max_pages?: number;
|
|
108
|
+
max_depth?: number;
|
|
109
|
+
include_patterns?: string[];
|
|
110
|
+
exclude_patterns?: string[];
|
|
111
|
+
search?: string;
|
|
112
|
+
sitemap?: "skip" | "include" | "only";
|
|
113
|
+
sitemap_path?: string;
|
|
114
|
+
include_metadata?: boolean;
|
|
115
|
+
include_subdomains?: boolean;
|
|
116
|
+
respect_robots?: boolean;
|
|
117
|
+
}
|
|
118
|
+
export interface MapResponse {
|
|
119
|
+
map_id: string;
|
|
120
|
+
total_urls: number;
|
|
121
|
+
urls: Record<string, unknown>[];
|
|
122
|
+
sitemap_found: boolean;
|
|
123
|
+
robots_txt?: Record<string, unknown>;
|
|
124
|
+
credits_used: number;
|
|
125
|
+
}
|
|
126
|
+
export interface ExtractRequest {
|
|
127
|
+
content: string;
|
|
128
|
+
content_type?: "html" | "text" | "markdown";
|
|
129
|
+
extraction_schema?: Record<string, unknown>;
|
|
130
|
+
extraction_profile?: "auto" | "product" | "article" | "job_posting" | "faq" | "recipe" | "event";
|
|
131
|
+
extraction_prompt?: string;
|
|
132
|
+
formats?: ("text" | "json" | "json_v2" | "html" | "markdown" | "rag")[];
|
|
133
|
+
source_url?: string;
|
|
134
|
+
evidence?: boolean;
|
|
135
|
+
}
|
|
136
|
+
export interface ExtractResponse {
|
|
137
|
+
extract_id: string;
|
|
138
|
+
formats: Record<string, unknown>;
|
|
139
|
+
credits_used: number;
|
|
140
|
+
model_used?: string;
|
|
141
|
+
extraction_method: string;
|
|
142
|
+
content_size_chars: number;
|
|
143
|
+
}
|
|
144
|
+
export interface BatchItemRequest {
|
|
145
|
+
url: string;
|
|
146
|
+
mode?: "auto" | "html" | "js" | "pdf" | "ocr";
|
|
147
|
+
formats?: ("text" | "json" | "json_v2" | "html" | "markdown" | "rag")[];
|
|
148
|
+
extraction_schema?: Record<string, unknown>;
|
|
149
|
+
timeout?: number;
|
|
150
|
+
wait_for?: string;
|
|
151
|
+
cache?: boolean;
|
|
152
|
+
advanced?: AdvancedOptions;
|
|
153
|
+
}
|
|
154
|
+
export interface BatchRequest {
|
|
155
|
+
urls: BatchItemRequest[];
|
|
156
|
+
webhook_url?: string;
|
|
157
|
+
}
|
|
158
|
+
export interface BatchResponse {
|
|
159
|
+
batch_id: string;
|
|
160
|
+
status: string;
|
|
161
|
+
total_urls: number;
|
|
162
|
+
estimated_credits: number;
|
|
163
|
+
job_ids?: string[];
|
|
164
|
+
}
|
|
165
|
+
export interface BatchStatusResponse {
|
|
166
|
+
batch_id: string;
|
|
167
|
+
status: string;
|
|
168
|
+
total_urls: number;
|
|
169
|
+
completed?: number;
|
|
170
|
+
failed?: number;
|
|
171
|
+
credits_used?: number;
|
|
172
|
+
items?: Record<string, unknown>[];
|
|
26
173
|
}
|
|
27
174
|
export interface Session {
|
|
28
175
|
id: string;
|
|
@@ -90,6 +237,21 @@ export interface SessionRefreshRequest {
|
|
|
90
237
|
cookies?: Record<string, string>;
|
|
91
238
|
headers?: Record<string, string>;
|
|
92
239
|
}
|
|
240
|
+
export interface ContentTruncationInfo {
|
|
241
|
+
/** Always true when this object is present. */
|
|
242
|
+
truncated: boolean;
|
|
243
|
+
/** Number of bytes at which the content was cut before processing. */
|
|
244
|
+
truncated_at_bytes: number;
|
|
245
|
+
/** Original content size in bytes before truncation. */
|
|
246
|
+
original_size_bytes: number;
|
|
247
|
+
/**
|
|
248
|
+
* Why truncation occurred:
|
|
249
|
+
* - 'readability_input_cap': HTML exceeded 2 MB before Readability processing
|
|
250
|
+
* - 'readability_output_cap': extracted text exceeded the output size cap
|
|
251
|
+
* - 'response_body_cap': raw response body exceeded max_response_bytes limit
|
|
252
|
+
*/
|
|
253
|
+
truncation_reason: "readability_input_cap" | "readability_output_cap" | "response_body_cap";
|
|
254
|
+
}
|
|
93
255
|
export interface TierEscalationDetail {
|
|
94
256
|
tier: string;
|
|
95
257
|
result: "success" | "failed" | "skipped";
|
|
@@ -124,6 +286,7 @@ export interface UnifiedScrapeResponse {
|
|
|
124
286
|
screenshot_url?: string;
|
|
125
287
|
pdf_url?: string;
|
|
126
288
|
filtered_content?: Record<string, unknown>;
|
|
289
|
+
content_truncated?: ContentTruncationInfo;
|
|
127
290
|
billing: BillingDetails;
|
|
128
291
|
extraction_method?: string;
|
|
129
292
|
version?: string;
|