crw-sdk 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -0
- package/dist/cjs/client.js +315 -0
- package/dist/cjs/errors.js +34 -0
- package/dist/cjs/index.js +13 -0
- package/dist/cjs/local.js +105 -0
- package/dist/cjs/package.json +1 -0
- package/dist/cjs/types.js +9 -0
- package/dist/esm/client.d.ts +45 -0
- package/dist/esm/client.js +311 -0
- package/dist/esm/errors.d.ts +14 -0
- package/dist/esm/errors.js +27 -0
- package/dist/esm/index.d.ts +3 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/local.d.ts +20 -0
- package/dist/esm/local.js +101 -0
- package/dist/esm/package.json +1 -0
- package/dist/esm/types.d.ts +86 -0
- package/dist/esm/types.js +8 -0
- package/package.json +45 -0
package/README.md
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# crw-sdk
|
|
2
|
+
|
|
3
|
+
TypeScript/JavaScript SDK for [CRW](https://github.com/us/crw) — the fast,
|
|
4
|
+
Firecrawl-compatible web scraper. Scrape, crawl, map, search, parse, and extract
|
|
5
|
+
any website from Node.
|
|
6
|
+
|
|
7
|
+
Zero runtime dependencies (Node 18+ `fetch`). Dual ESM + CommonJS.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install crw-sdk
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick start — Cloud (default)
|
|
16
|
+
|
|
17
|
+
CRW is **cloud-first**. [Sign up for 500 free credits](https://fastcrw.com/dashboard)
|
|
18
|
+
— no payment, no monthly reset (GitHub/Google, ~10s) — then set `CRW_API_KEY`:
|
|
19
|
+
|
|
20
|
+
```ts
|
|
21
|
+
import { CrwClient } from "crw-sdk";
|
|
22
|
+
|
|
23
|
+
const crw = new CrwClient(); // reads CRW_API_KEY from the env
|
|
24
|
+
const res = await crw.scrape("https://example.com", { formats: ["markdown"] });
|
|
25
|
+
console.log(res.markdown);
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
// ...or pass the key explicitly
|
|
30
|
+
const crw = new CrwClient({ apiKey: "fc-..." });
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Self-hosting
|
|
34
|
+
|
|
35
|
+
```ts
|
|
36
|
+
// A self-hosted server:
|
|
37
|
+
const crw = new CrwClient({ apiUrl: "http://localhost:3000" });
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Local zero-config engine (no server, no key): set CRW_LOCAL=1.
|
|
42
|
+
# Requires the `crw-mcp` binary on PATH (or set CRW_BINARY).
|
|
43
|
+
CRW_LOCAL=1 node app.js
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Methods
|
|
47
|
+
|
|
48
|
+
| Method | Description | Mode |
|
|
49
|
+
|---|---|---|
|
|
50
|
+
| `scrape(url, opts?)` | Scrape one URL | both |
|
|
51
|
+
| `crawl(url, opts?)` | Crawl a site (async, polled) | both |
|
|
52
|
+
| `map(url, opts?)` | Discover URLs | both |
|
|
53
|
+
| `search(query, opts?)` | Web search (+ optional scrape) | both¹ |
|
|
54
|
+
| `parseFile(bytes, opts?)` | PDF → markdown / structured JSON | both |
|
|
55
|
+
| `extract({urls, schema?})` | Structured LLM extraction | HTTP |
|
|
56
|
+
| `batchScrape(urls, opts?)` | Scrape many URLs (async) | HTTP |
|
|
57
|
+
| `capabilities()` | Feature-detect the engine | HTTP |
|
|
58
|
+
| `changeTrackingDiff(cur, prev?)` | Diff vs a prior snapshot | HTTP |
|
|
59
|
+
| `close()` | Shut down the local subprocess | — |
|
|
60
|
+
|
|
61
|
+
¹ Local search needs a SearXNG URL configured on the engine.
|
|
62
|
+
|
|
63
|
+
```ts
|
|
64
|
+
// Structured extraction:
|
|
65
|
+
const data = await crw.extract({
|
|
66
|
+
urls: ["https://example.com"],
|
|
67
|
+
schema: { type: "object", properties: { title: { type: "string" } } },
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Parse a PDF:
|
|
71
|
+
import { readFileSync } from "node:fs";
|
|
72
|
+
const doc = await crw.parseFile(readFileSync("invoice.pdf"), { formats: ["markdown"] });
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Parity
|
|
76
|
+
|
|
77
|
+
This SDK mirrors the Python [`crw`](https://pypi.org/project/crw/) client method-for-method,
|
|
78
|
+
and both are conformance-tested against the engine's OpenAPI spec.
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/** CRW client — cloud (default), self-hosted HTTP, or local subprocess mode. */
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.CrwClient = exports.DOCS_URL = exports.DASHBOARD_URL = exports.CLOUD_API_URL = void 0;
|
|
5
|
+
const errors_js_1 = require("./errors.js");
|
|
6
|
+
const local_js_1 = require("./local.js");
|
|
7
|
+
// CRW is cloud-first: with no explicit apiUrl and no CRW_LOCAL opt-in, the client
|
|
8
|
+
// talks to the managed cloud. Mirrors the Python SDK + CLI onboarding.
|
|
9
|
+
exports.CLOUD_API_URL = "https://api.fastcrw.com";
|
|
10
|
+
exports.DASHBOARD_URL = "https://fastcrw.com/dashboard";
|
|
11
|
+
exports.DOCS_URL = "https://us.github.io/crw";
|
|
12
|
+
const SIGNUP_NUDGE = `No CRW API key found. CRW uses the managed cloud (${exports.CLOUD_API_URL}) by default.\n` +
|
|
13
|
+
` -> Sign up at ${exports.DASHBOARD_URL} for 500 free credits — no payment, no monthly ` +
|
|
14
|
+
`reset (GitHub/Google, ~10s) — then set CRW_API_KEY (or pass apiKey).\n` +
|
|
15
|
+
` -> Prefer to self-host? Set CRW_LOCAL=1 to run the local engine. Docs: ${exports.DOCS_URL}`;
|
|
16
|
+
function envTruthy(value) {
|
|
17
|
+
return !!value && !["0", "false", "no", ""].includes(value.trim().toLowerCase());
|
|
18
|
+
}
|
|
19
|
+
function httpOnlyHint(name, reason) {
|
|
20
|
+
return (`${name}() requires HTTP mode (${reason}). It is not available with CRW_LOCAL=1. ` +
|
|
21
|
+
`Use the cloud (set CRW_API_KEY) or pass apiUrl for a self-hosted server.`);
|
|
22
|
+
}
|
|
23
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
24
|
+
class CrwClient {
|
|
25
|
+
apiUrl;
|
|
26
|
+
apiKey;
|
|
27
|
+
local = null;
|
|
28
|
+
/**
|
|
29
|
+
* CRW is cloud-first. With no arguments the client targets the managed cloud
|
|
30
|
+
* (api.fastcrw.com) and needs an API key — sign up for 500 free credits at
|
|
31
|
+
* https://fastcrw.com/dashboard. To self-host locally, set `CRW_LOCAL=1`.
|
|
32
|
+
*/
|
|
33
|
+
constructor(opts = {}) {
|
|
34
|
+
const env = globalThis.process
|
|
35
|
+
?.env ?? {};
|
|
36
|
+
this.apiKey = opts.apiKey ?? env.CRW_API_KEY;
|
|
37
|
+
if (envTruthy(env.CRW_LOCAL)) {
|
|
38
|
+
// Self-host opt-in: zero-config local engine (subprocess), no key.
|
|
39
|
+
this.apiUrl = null;
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
const explicitUrl = opts.apiUrl ?? env.CRW_API_URL;
|
|
43
|
+
this.apiUrl = explicitUrl ?? exports.CLOUD_API_URL;
|
|
44
|
+
// Only the managed-cloud default requires a key; an explicit self-hosted
|
|
45
|
+
// server may run without auth.
|
|
46
|
+
if (explicitUrl === undefined && !this.apiKey) {
|
|
47
|
+
throw new errors_js_1.CrwError(SIGNUP_NUDGE);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
async scrape(url, opts = {}) {
|
|
51
|
+
const { formats, onlyMainContent = true, includeTags, excludeTags, renderJs, renderer, waitFor, jsonSchema, ...rest } = opts;
|
|
52
|
+
const args = { url, onlyMainContent };
|
|
53
|
+
if (formats)
|
|
54
|
+
args.formats = [...formats];
|
|
55
|
+
if (includeTags)
|
|
56
|
+
args.includeTags = includeTags;
|
|
57
|
+
if (excludeTags)
|
|
58
|
+
args.excludeTags = excludeTags;
|
|
59
|
+
if (renderJs !== undefined)
|
|
60
|
+
args.renderJs = renderJs;
|
|
61
|
+
if (renderer !== undefined)
|
|
62
|
+
args.renderer = renderer;
|
|
63
|
+
if (waitFor !== undefined)
|
|
64
|
+
args.waitFor = waitFor;
|
|
65
|
+
if (jsonSchema !== undefined) {
|
|
66
|
+
args.jsonSchema = jsonSchema;
|
|
67
|
+
const f = args.formats ?? [];
|
|
68
|
+
if (!f.includes("json"))
|
|
69
|
+
args.formats = [...f, "json"];
|
|
70
|
+
}
|
|
71
|
+
Object.assign(args, rest);
|
|
72
|
+
if (this.apiUrl)
|
|
73
|
+
return this.httpPost("/v1/scrape", args);
|
|
74
|
+
return this.localTransport().toolCall("crw_scrape", args);
|
|
75
|
+
}
|
|
76
|
+
async crawl(url, opts = {}) {
|
|
77
|
+
const { maxDepth = 2, maxPages = 10, pollInterval = 2, timeout = 300, ...rest } = opts;
|
|
78
|
+
const args = { url, maxDepth, maxPages, ...rest };
|
|
79
|
+
if (this.apiUrl)
|
|
80
|
+
return this.httpCrawl(args, pollInterval, timeout);
|
|
81
|
+
const result = await this.localTransport().toolCall("crw_crawl", args);
|
|
82
|
+
const jobId = result.id;
|
|
83
|
+
if (!jobId)
|
|
84
|
+
throw new errors_js_1.CrwError(`Crawl did not return job ID: ${JSON.stringify(result)}`);
|
|
85
|
+
return this.pollLocalCrawl(jobId, pollInterval, timeout);
|
|
86
|
+
}
|
|
87
|
+
async map(url, opts = {}) {
|
|
88
|
+
const { maxDepth = 2, useSitemap = true, ...rest } = opts;
|
|
89
|
+
const args = { url, maxDepth, useSitemap, ...rest };
|
|
90
|
+
if (this.apiUrl) {
|
|
91
|
+
const data = await this.httpPost("/v1/map", args);
|
|
92
|
+
return data.links ?? [];
|
|
93
|
+
}
|
|
94
|
+
const result = await this.localTransport().toolCall("crw_map", args);
|
|
95
|
+
return result.links ?? [];
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Works in both modes; local mode needs a SearXNG URL configured on the engine.
|
|
99
|
+
*/
|
|
100
|
+
async search(query, opts = {}) {
|
|
101
|
+
const { limit = 5, lang, tbs, sources, categories, scrapeOptions, ...rest } = opts;
|
|
102
|
+
const args = { query, limit };
|
|
103
|
+
if (lang)
|
|
104
|
+
args.lang = lang;
|
|
105
|
+
if (tbs)
|
|
106
|
+
args.tbs = tbs;
|
|
107
|
+
if (sources)
|
|
108
|
+
args.sources = sources;
|
|
109
|
+
if (categories)
|
|
110
|
+
args.categories = categories;
|
|
111
|
+
if (scrapeOptions)
|
|
112
|
+
args.scrapeOptions = scrapeOptions;
|
|
113
|
+
Object.assign(args, rest);
|
|
114
|
+
if (this.apiUrl)
|
|
115
|
+
return this.httpPost("/v1/search", args);
|
|
116
|
+
return this.localTransport().toolCall("crw_search", args);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Parse a document (PDF) into markdown / structured JSON. Works in both modes.
|
|
120
|
+
*/
|
|
121
|
+
async parseFile(content, opts = {}) {
|
|
122
|
+
const { filename = "document.pdf", formats, jsonSchema, parsers, ...rest } = opts;
|
|
123
|
+
if (this.apiUrl) {
|
|
124
|
+
const options = {};
|
|
125
|
+
if (formats)
|
|
126
|
+
options.formats = [...formats];
|
|
127
|
+
if (jsonSchema !== undefined)
|
|
128
|
+
options.jsonSchema = jsonSchema;
|
|
129
|
+
if (parsers)
|
|
130
|
+
options.parsers = parsers;
|
|
131
|
+
Object.assign(options, rest);
|
|
132
|
+
const form = new FormData();
|
|
133
|
+
form.append("file", new Blob([content]), filename);
|
|
134
|
+
if (Object.keys(options).length)
|
|
135
|
+
form.append("options", JSON.stringify(options));
|
|
136
|
+
return this.httpMultipart("/v2/parse", form);
|
|
137
|
+
}
|
|
138
|
+
const b64 = Buffer.from(content).toString("base64");
|
|
139
|
+
const args = { filename, contentBase64: b64 };
|
|
140
|
+
if (formats)
|
|
141
|
+
args.formats = [...formats];
|
|
142
|
+
if (jsonSchema !== undefined)
|
|
143
|
+
args.jsonSchema = jsonSchema;
|
|
144
|
+
if (parsers)
|
|
145
|
+
args.parsers = parsers;
|
|
146
|
+
Object.assign(args, rest);
|
|
147
|
+
return this.localTransport().toolCall("crw_parse_file", args);
|
|
148
|
+
}
|
|
149
|
+
/** Structured LLM extraction across URLs (HTTP mode only). */
|
|
150
|
+
async extract(opts) {
|
|
151
|
+
if (!this.apiUrl)
|
|
152
|
+
throw new errors_js_1.CrwError(httpOnlyHint("extract", "LLM extract job endpoint"));
|
|
153
|
+
const { urls, prompt, schema, systemPrompt, pollInterval = 2, timeout = 120 } = opts;
|
|
154
|
+
const body = { urls: [...urls] };
|
|
155
|
+
if (prompt !== undefined)
|
|
156
|
+
body.prompt = prompt;
|
|
157
|
+
if (schema !== undefined)
|
|
158
|
+
body.schema = schema;
|
|
159
|
+
if (systemPrompt !== undefined)
|
|
160
|
+
body.systemPrompt = systemPrompt;
|
|
161
|
+
const start = await this.httpRequest("POST", "/v2/extract", body, { raw: true });
|
|
162
|
+
const jobId = start.id;
|
|
163
|
+
if (!jobId)
|
|
164
|
+
throw new errors_js_1.CrwError(`extract did not return job ID: ${JSON.stringify(start)}`);
|
|
165
|
+
const deadline = Date.now() + timeout * 1000;
|
|
166
|
+
for (;;) {
|
|
167
|
+
if (Date.now() > deadline)
|
|
168
|
+
throw new errors_js_1.CrwTimeoutError(`Extract ${jobId} timed out after ${timeout}s`);
|
|
169
|
+
const status = await this.httpRequest("GET", `/v2/extract/${jobId}`, undefined, { raw: true, checkSuccess: false });
|
|
170
|
+
if (status.status === "completed")
|
|
171
|
+
return status.data ?? {};
|
|
172
|
+
if (status.status === "failed")
|
|
173
|
+
throw new errors_js_1.CrwError(`Extract failed: ${status.error ?? "unknown"}`);
|
|
174
|
+
await sleep(pollInterval * 1000);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/** Scrape many URLs in one async batch job (HTTP mode only). */
|
|
178
|
+
async batchScrape(urls, opts = {}) {
|
|
179
|
+
if (!this.apiUrl)
|
|
180
|
+
throw new errors_js_1.CrwError(httpOnlyHint("batchScrape", "batch job endpoint"));
|
|
181
|
+
const { formats, pollInterval = 2, timeout = 300, ...rest } = opts;
|
|
182
|
+
const body = { urls: [...urls], ...rest };
|
|
183
|
+
if (formats)
|
|
184
|
+
body.formats = [...formats];
|
|
185
|
+
const start = await this.httpRequest("POST", "/v2/batch/scrape", body, { raw: true });
|
|
186
|
+
const jobId = start.id;
|
|
187
|
+
if (!jobId)
|
|
188
|
+
throw new errors_js_1.CrwError(`Batch scrape did not return job ID: ${JSON.stringify(start)}`);
|
|
189
|
+
const deadline = Date.now() + timeout * 1000;
|
|
190
|
+
for (;;) {
|
|
191
|
+
if (Date.now() > deadline)
|
|
192
|
+
throw new errors_js_1.CrwTimeoutError(`Batch scrape ${jobId} timed out after ${timeout}s`);
|
|
193
|
+
const status = await this.httpRequest("GET", `/v2/batch/scrape/${jobId}`, undefined, { raw: true, checkSuccess: false });
|
|
194
|
+
if (status.status === "completed")
|
|
195
|
+
return status.data ?? [];
|
|
196
|
+
if (status.status === "failed")
|
|
197
|
+
throw new errors_js_1.CrwError(`Batch scrape failed: ${status.error ?? "unknown"}`);
|
|
198
|
+
await sleep(pollInterval * 1000);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/** Feature-detect the engine (HTTP mode only). */
|
|
202
|
+
async capabilities() {
|
|
203
|
+
if (!this.apiUrl)
|
|
204
|
+
throw new errors_js_1.CrwError(httpOnlyHint("capabilities", "server capabilities endpoint"));
|
|
205
|
+
return this.httpRequest("GET", "/v1/capabilities", undefined, { checkSuccess: false });
|
|
206
|
+
}
|
|
207
|
+
/** Diff a page against a prior snapshot (HTTP mode only). */
|
|
208
|
+
async changeTrackingDiff(current, previous, opts = {}) {
|
|
209
|
+
if (!this.apiUrl)
|
|
210
|
+
throw new errors_js_1.CrwError(httpOnlyHint("changeTrackingDiff", "diff endpoint"));
|
|
211
|
+
const { modes, schema, prompt, ...rest } = opts;
|
|
212
|
+
const body = { current, modes: modes ? [...modes] : ["gitDiff"] };
|
|
213
|
+
if (previous !== undefined)
|
|
214
|
+
body.previous = previous;
|
|
215
|
+
if (schema !== undefined)
|
|
216
|
+
body.schema = schema;
|
|
217
|
+
if (prompt !== undefined)
|
|
218
|
+
body.prompt = prompt;
|
|
219
|
+
Object.assign(body, rest);
|
|
220
|
+
return this.httpPost("/v1/change-tracking/diff", body);
|
|
221
|
+
}
|
|
222
|
+
/** Shut down the local subprocess if running. */
|
|
223
|
+
close() {
|
|
224
|
+
this.local?.close();
|
|
225
|
+
this.local = null;
|
|
226
|
+
}
|
|
227
|
+
// --- local (subprocess) mode ---
|
|
228
|
+
localTransport() {
|
|
229
|
+
if (!this.local)
|
|
230
|
+
this.local = new local_js_1.LocalTransport();
|
|
231
|
+
return this.local;
|
|
232
|
+
}
|
|
233
|
+
async pollLocalCrawl(jobId, pollInterval, timeout) {
|
|
234
|
+
const deadline = Date.now() + timeout * 1000;
|
|
235
|
+
for (;;) {
|
|
236
|
+
if (Date.now() > deadline)
|
|
237
|
+
throw new errors_js_1.CrwTimeoutError(`Crawl ${jobId} timed out after ${timeout}s`);
|
|
238
|
+
const result = await this.localTransport().toolCall("crw_check_crawl_status", { id: jobId });
|
|
239
|
+
if (result.status === "completed")
|
|
240
|
+
return result.data ?? [];
|
|
241
|
+
if (result.status === "failed")
|
|
242
|
+
throw new errors_js_1.CrwError(`Crawl failed: ${result.error ?? "unknown"}`);
|
|
243
|
+
await sleep(pollInterval * 1000);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// --- HTTP mode ---
|
|
247
|
+
async httpRequest(method, path, body, { raw = false, checkSuccess = true } = {}) {
|
|
248
|
+
if (this.apiUrl === null)
|
|
249
|
+
throw new errors_js_1.CrwError("internal: httpRequest in local mode");
|
|
250
|
+
const url = `${this.apiUrl.replace(/\/$/, "")}${path}`;
|
|
251
|
+
const headers = { "Content-Type": "application/json" };
|
|
252
|
+
if (this.apiKey)
|
|
253
|
+
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
254
|
+
const resp = await fetch(url, { method, headers, body: body ? JSON.stringify(body) : undefined });
|
|
255
|
+
const result = await this.readJson(resp);
|
|
256
|
+
if (checkSuccess && result.success === false) {
|
|
257
|
+
throw new errors_js_1.CrwApiError(result.error ?? "API error", resp.status);
|
|
258
|
+
}
|
|
259
|
+
if (raw)
|
|
260
|
+
return result;
|
|
261
|
+
return result.data ?? result;
|
|
262
|
+
}
|
|
263
|
+
async httpMultipart(path, form) {
|
|
264
|
+
if (this.apiUrl === null)
|
|
265
|
+
throw new errors_js_1.CrwError("internal: httpMultipart in local mode");
|
|
266
|
+
const url = `${this.apiUrl.replace(/\/$/, "")}${path}`;
|
|
267
|
+
const headers = {};
|
|
268
|
+
if (this.apiKey)
|
|
269
|
+
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
270
|
+
const resp = await fetch(url, { method: "POST", headers, body: form });
|
|
271
|
+
const result = await this.readJson(resp);
|
|
272
|
+
if (result.success === false)
|
|
273
|
+
throw new errors_js_1.CrwApiError(result.error ?? "API error", resp.status);
|
|
274
|
+
return result.data ?? result;
|
|
275
|
+
}
|
|
276
|
+
/** Parse the JSON body; surface a non-2xx body's `error` as CrwApiError. */
|
|
277
|
+
async readJson(resp) {
|
|
278
|
+
const text = await resp.text();
|
|
279
|
+
let parsed;
|
|
280
|
+
try {
|
|
281
|
+
parsed = text ? JSON.parse(text) : {};
|
|
282
|
+
}
|
|
283
|
+
catch {
|
|
284
|
+
if (!resp.ok)
|
|
285
|
+
throw new errors_js_1.CrwApiError(`HTTP ${resp.status}: ${resp.statusText}`, resp.status);
|
|
286
|
+
throw new errors_js_1.CrwApiError(`Invalid JSON response (HTTP ${resp.status})`, resp.status);
|
|
287
|
+
}
|
|
288
|
+
if (!resp.ok) {
|
|
289
|
+
const message = parsed.error ?? parsed.message ?? `HTTP ${resp.status}`;
|
|
290
|
+
throw new errors_js_1.CrwApiError(message, resp.status);
|
|
291
|
+
}
|
|
292
|
+
return parsed;
|
|
293
|
+
}
|
|
294
|
+
httpPost(path, body) {
|
|
295
|
+
return this.httpRequest("POST", path, body);
|
|
296
|
+
}
|
|
297
|
+
async httpCrawl(args, pollInterval, timeout) {
|
|
298
|
+
const result = await this.httpPost("/v1/crawl", args);
|
|
299
|
+
const jobId = result.id;
|
|
300
|
+
if (!jobId)
|
|
301
|
+
throw new errors_js_1.CrwError(`Crawl did not return job ID: ${JSON.stringify(result)}`);
|
|
302
|
+
const deadline = Date.now() + timeout * 1000;
|
|
303
|
+
for (;;) {
|
|
304
|
+
if (Date.now() > deadline)
|
|
305
|
+
throw new errors_js_1.CrwTimeoutError(`Crawl ${jobId} timed out after ${timeout}s`);
|
|
306
|
+
const status = await this.httpRequest("GET", `/v1/crawl/${jobId}`, undefined, { raw: true });
|
|
307
|
+
if (status.status === "completed")
|
|
308
|
+
return status.data ?? [];
|
|
309
|
+
if (status.status === "failed")
|
|
310
|
+
throw new errors_js_1.CrwError(`Crawl failed: ${status.error ?? "unknown"}`);
|
|
311
|
+
await sleep(pollInterval * 1000);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
exports.CrwClient = CrwClient;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/** CRW SDK error types. */
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.CrwBinaryNotFoundError = exports.CrwTimeoutError = exports.CrwApiError = exports.CrwError = void 0;
|
|
5
|
+
class CrwError extends Error {
|
|
6
|
+
constructor(message) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.name = "CrwError";
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
exports.CrwError = CrwError;
|
|
12
|
+
class CrwApiError extends CrwError {
|
|
13
|
+
statusCode;
|
|
14
|
+
constructor(message, statusCode) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.name = "CrwApiError";
|
|
17
|
+
this.statusCode = statusCode;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
exports.CrwApiError = CrwApiError;
|
|
21
|
+
class CrwTimeoutError extends CrwError {
|
|
22
|
+
constructor(message) {
|
|
23
|
+
super(message);
|
|
24
|
+
this.name = "CrwTimeoutError";
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
exports.CrwTimeoutError = CrwTimeoutError;
|
|
28
|
+
class CrwBinaryNotFoundError extends CrwError {
|
|
29
|
+
constructor(message) {
|
|
30
|
+
super(message);
|
|
31
|
+
this.name = "CrwBinaryNotFoundError";
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
exports.CrwBinaryNotFoundError = CrwBinaryNotFoundError;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CrwBinaryNotFoundError = exports.CrwTimeoutError = exports.CrwApiError = exports.CrwError = exports.DOCS_URL = exports.DASHBOARD_URL = exports.CLOUD_API_URL = exports.CrwClient = void 0;
|
|
4
|
+
var client_js_1 = require("./client.js");
|
|
5
|
+
Object.defineProperty(exports, "CrwClient", { enumerable: true, get: function () { return client_js_1.CrwClient; } });
|
|
6
|
+
Object.defineProperty(exports, "CLOUD_API_URL", { enumerable: true, get: function () { return client_js_1.CLOUD_API_URL; } });
|
|
7
|
+
Object.defineProperty(exports, "DASHBOARD_URL", { enumerable: true, get: function () { return client_js_1.DASHBOARD_URL; } });
|
|
8
|
+
Object.defineProperty(exports, "DOCS_URL", { enumerable: true, get: function () { return client_js_1.DOCS_URL; } });
|
|
9
|
+
var errors_js_1 = require("./errors.js");
|
|
10
|
+
Object.defineProperty(exports, "CrwError", { enumerable: true, get: function () { return errors_js_1.CrwError; } });
|
|
11
|
+
Object.defineProperty(exports, "CrwApiError", { enumerable: true, get: function () { return errors_js_1.CrwApiError; } });
|
|
12
|
+
Object.defineProperty(exports, "CrwTimeoutError", { enumerable: true, get: function () { return errors_js_1.CrwTimeoutError; } });
|
|
13
|
+
Object.defineProperty(exports, "CrwBinaryNotFoundError", { enumerable: true, get: function () { return errors_js_1.CrwBinaryNotFoundError; } });
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Local (CRW_LOCAL) subprocess transport: speaks MCP JSON-RPC to a `crw-mcp`
|
|
4
|
+
* binary over stdio. Mirrors the Python SDK's subprocess mode.
|
|
5
|
+
*
|
|
6
|
+
* v1 finds the binary via the `CRW_BINARY` env var or on `PATH`; auto-download
|
|
7
|
+
* (as the Python SDK does) is a fast-follow.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.LocalTransport = void 0;
|
|
11
|
+
const node_child_process_1 = require("node:child_process");
|
|
12
|
+
const errors_js_1 = require("./errors.js");
|
|
13
|
+
const BINARY_NAME = process.platform === "win32" ? "crw-mcp.exe" : "crw-mcp";
|
|
14
|
+
class LocalTransport {
|
|
15
|
+
proc = null;
|
|
16
|
+
nextId = 0;
|
|
17
|
+
pending = new Map();
|
|
18
|
+
buffer = "";
|
|
19
|
+
resolveBinary() {
|
|
20
|
+
const env = process.env.CRW_BINARY;
|
|
21
|
+
if (env)
|
|
22
|
+
return env;
|
|
23
|
+
// Rely on PATH resolution by spawning the bare name; if it ENOENTs the
|
|
24
|
+
// error handler surfaces a clear install hint.
|
|
25
|
+
return BINARY_NAME;
|
|
26
|
+
}
|
|
27
|
+
ensureProcess() {
|
|
28
|
+
if (this.proc && this.proc.exitCode === null)
|
|
29
|
+
return this.proc;
|
|
30
|
+
const bin = this.resolveBinary();
|
|
31
|
+
const proc = (0, node_child_process_1.spawn)(bin, [], { stdio: ["pipe", "pipe", "ignore"] });
|
|
32
|
+
proc.on("error", (err) => {
|
|
33
|
+
const failure = err.code === "ENOENT"
|
|
34
|
+
? new errors_js_1.CrwBinaryNotFoundError(`crw-mcp binary not found on PATH. Install it (e.g. \`npm i -g crw-mcp\` or ` +
|
|
35
|
+
`\`cargo install crw-mcp\`) or set CRW_BINARY to its path.`)
|
|
36
|
+
: new errors_js_1.CrwError(`crw-mcp failed to start: ${err.message}`);
|
|
37
|
+
for (const p of this.pending.values())
|
|
38
|
+
p.reject(failure);
|
|
39
|
+
this.pending.clear();
|
|
40
|
+
});
|
|
41
|
+
proc.stdout.setEncoding("utf8");
|
|
42
|
+
proc.stdout.on("data", (chunk) => this.onData(chunk));
|
|
43
|
+
proc.on("exit", () => {
|
|
44
|
+
for (const p of this.pending.values())
|
|
45
|
+
p.reject(new errors_js_1.CrwError("crw-mcp process closed unexpectedly"));
|
|
46
|
+
this.pending.clear();
|
|
47
|
+
});
|
|
48
|
+
this.proc = proc;
|
|
49
|
+
return proc;
|
|
50
|
+
}
|
|
51
|
+
onData(chunk) {
|
|
52
|
+
this.buffer += chunk;
|
|
53
|
+
let idx;
|
|
54
|
+
while ((idx = this.buffer.indexOf("\n")) >= 0) {
|
|
55
|
+
const line = this.buffer.slice(0, idx).trim();
|
|
56
|
+
this.buffer = this.buffer.slice(idx + 1);
|
|
57
|
+
if (!line)
|
|
58
|
+
continue;
|
|
59
|
+
let msg;
|
|
60
|
+
try {
|
|
61
|
+
msg = JSON.parse(line);
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
const id = msg.id;
|
|
67
|
+
if (id === undefined || !this.pending.has(id))
|
|
68
|
+
continue;
|
|
69
|
+
const p = this.pending.get(id);
|
|
70
|
+
this.pending.delete(id);
|
|
71
|
+
if (msg.error) {
|
|
72
|
+
const err = msg.error;
|
|
73
|
+
p.reject(new errors_js_1.CrwApiError(err.message ?? JSON.stringify(msg.error)));
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
p.resolve(msg.result ?? {});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
jsonrpc(method, params) {
|
|
81
|
+
const proc = this.ensureProcess();
|
|
82
|
+
const id = ++this.nextId;
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
this.pending.set(id, { resolve, reject });
|
|
85
|
+
proc.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`);
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
async toolCall(name, args) {
|
|
89
|
+
const result = await this.jsonrpc("tools/call", { name, arguments: args });
|
|
90
|
+
const content = result.content?.[0];
|
|
91
|
+
if (!content)
|
|
92
|
+
throw new errors_js_1.CrwError(`Empty response from ${name}`);
|
|
93
|
+
if (result.isError)
|
|
94
|
+
throw new errors_js_1.CrwApiError(content.text ?? "Unknown error");
|
|
95
|
+
return JSON.parse(content.text ?? "{}");
|
|
96
|
+
}
|
|
97
|
+
close() {
|
|
98
|
+
if (this.proc && this.proc.exitCode === null) {
|
|
99
|
+
this.proc.stdin.end();
|
|
100
|
+
this.proc.kill();
|
|
101
|
+
}
|
|
102
|
+
this.proc = null;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
exports.LocalTransport = LocalTransport;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"commonjs"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Request/response types for the CRW SDK.
|
|
4
|
+
*
|
|
5
|
+
* Hand-written against the engine's OpenAPI spec
|
|
6
|
+
* (crates/crw-server/openapi/openapi.json). Results are returned as the engine's
|
|
7
|
+
* raw JSON objects, so the result aliases are intentionally permissive.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/** CRW client — cloud (default), self-hosted HTTP, or local subprocess mode. */
|
|
2
|
+
import type { BatchResult, BatchScrapeOptions, Capabilities, ChangeTrackingOptions, ClientOptions, CrawlOptions, CrawlResult, DiffResult, ExtractOptions, ExtractResult, Json, MapOptions, ParseFileOptions, ParseResult, ScrapeOptions, ScrapeResult, SearchOptions, SearchResult } from "./types.js";
|
|
3
|
+
export declare const CLOUD_API_URL = "https://api.fastcrw.com";
|
|
4
|
+
export declare const DASHBOARD_URL = "https://fastcrw.com/dashboard";
|
|
5
|
+
export declare const DOCS_URL = "https://us.github.io/crw";
|
|
6
|
+
export declare class CrwClient {
|
|
7
|
+
private apiUrl;
|
|
8
|
+
private apiKey;
|
|
9
|
+
private local;
|
|
10
|
+
/**
|
|
11
|
+
* CRW is cloud-first. With no arguments the client targets the managed cloud
|
|
12
|
+
* (api.fastcrw.com) and needs an API key — sign up for 500 free credits at
|
|
13
|
+
* https://fastcrw.com/dashboard. To self-host locally, set `CRW_LOCAL=1`.
|
|
14
|
+
*/
|
|
15
|
+
constructor(opts?: ClientOptions);
|
|
16
|
+
scrape(url: string, opts?: ScrapeOptions): Promise<ScrapeResult>;
|
|
17
|
+
crawl(url: string, opts?: CrawlOptions): Promise<CrawlResult>;
|
|
18
|
+
map(url: string, opts?: MapOptions): Promise<string[]>;
|
|
19
|
+
/**
|
|
20
|
+
* Works in both modes; local mode needs a SearXNG URL configured on the engine.
|
|
21
|
+
*/
|
|
22
|
+
search(query: string, opts?: SearchOptions): Promise<SearchResult>;
|
|
23
|
+
/**
|
|
24
|
+
* Parse a document (PDF) into markdown / structured JSON. Works in both modes.
|
|
25
|
+
*/
|
|
26
|
+
parseFile(content: Uint8Array, opts?: ParseFileOptions): Promise<ParseResult>;
|
|
27
|
+
/** Structured LLM extraction across URLs (HTTP mode only). */
|
|
28
|
+
extract(opts: ExtractOptions): Promise<ExtractResult>;
|
|
29
|
+
/** Scrape many URLs in one async batch job (HTTP mode only). */
|
|
30
|
+
batchScrape(urls: string[], opts?: BatchScrapeOptions): Promise<BatchResult>;
|
|
31
|
+
/** Feature-detect the engine (HTTP mode only). */
|
|
32
|
+
capabilities(): Promise<Capabilities>;
|
|
33
|
+
/** Diff a page against a prior snapshot (HTTP mode only). */
|
|
34
|
+
changeTrackingDiff(current: Json, previous?: Json, opts?: ChangeTrackingOptions): Promise<DiffResult>;
|
|
35
|
+
/** Shut down the local subprocess if running. */
|
|
36
|
+
close(): void;
|
|
37
|
+
private localTransport;
|
|
38
|
+
private pollLocalCrawl;
|
|
39
|
+
private httpRequest;
|
|
40
|
+
private httpMultipart;
|
|
41
|
+
/** Parse the JSON body; surface a non-2xx body's `error` as CrwApiError. */
|
|
42
|
+
private readJson;
|
|
43
|
+
private httpPost;
|
|
44
|
+
private httpCrawl;
|
|
45
|
+
}
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/** CRW client — cloud (default), self-hosted HTTP, or local subprocess mode. */
|
|
2
|
+
import { CrwApiError, CrwError, CrwTimeoutError } from "./errors.js";
|
|
3
|
+
import { LocalTransport } from "./local.js";
|
|
4
|
+
// CRW is cloud-first: with no explicit apiUrl and no CRW_LOCAL opt-in, the client
|
|
5
|
+
// talks to the managed cloud. Mirrors the Python SDK + CLI onboarding.
|
|
6
|
+
export const CLOUD_API_URL = "https://api.fastcrw.com";
|
|
7
|
+
export const DASHBOARD_URL = "https://fastcrw.com/dashboard";
|
|
8
|
+
export const DOCS_URL = "https://us.github.io/crw";
|
|
9
|
+
const SIGNUP_NUDGE = `No CRW API key found. CRW uses the managed cloud (${CLOUD_API_URL}) by default.\n` +
|
|
10
|
+
` -> Sign up at ${DASHBOARD_URL} for 500 free credits — no payment, no monthly ` +
|
|
11
|
+
`reset (GitHub/Google, ~10s) — then set CRW_API_KEY (or pass apiKey).\n` +
|
|
12
|
+
` -> Prefer to self-host? Set CRW_LOCAL=1 to run the local engine. Docs: ${DOCS_URL}`;
|
|
13
|
+
function envTruthy(value) {
|
|
14
|
+
return !!value && !["0", "false", "no", ""].includes(value.trim().toLowerCase());
|
|
15
|
+
}
|
|
16
|
+
function httpOnlyHint(name, reason) {
|
|
17
|
+
return (`${name}() requires HTTP mode (${reason}). It is not available with CRW_LOCAL=1. ` +
|
|
18
|
+
`Use the cloud (set CRW_API_KEY) or pass apiUrl for a self-hosted server.`);
|
|
19
|
+
}
|
|
20
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
21
|
+
export class CrwClient {
|
|
22
|
+
apiUrl;
|
|
23
|
+
apiKey;
|
|
24
|
+
local = null;
|
|
25
|
+
/**
|
|
26
|
+
* CRW is cloud-first. With no arguments the client targets the managed cloud
|
|
27
|
+
* (api.fastcrw.com) and needs an API key — sign up for 500 free credits at
|
|
28
|
+
* https://fastcrw.com/dashboard. To self-host locally, set `CRW_LOCAL=1`.
|
|
29
|
+
*/
|
|
30
|
+
constructor(opts = {}) {
|
|
31
|
+
const env = globalThis.process
|
|
32
|
+
?.env ?? {};
|
|
33
|
+
this.apiKey = opts.apiKey ?? env.CRW_API_KEY;
|
|
34
|
+
if (envTruthy(env.CRW_LOCAL)) {
|
|
35
|
+
// Self-host opt-in: zero-config local engine (subprocess), no key.
|
|
36
|
+
this.apiUrl = null;
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
const explicitUrl = opts.apiUrl ?? env.CRW_API_URL;
|
|
40
|
+
this.apiUrl = explicitUrl ?? CLOUD_API_URL;
|
|
41
|
+
// Only the managed-cloud default requires a key; an explicit self-hosted
|
|
42
|
+
// server may run without auth.
|
|
43
|
+
if (explicitUrl === undefined && !this.apiKey) {
|
|
44
|
+
throw new CrwError(SIGNUP_NUDGE);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
async scrape(url, opts = {}) {
|
|
48
|
+
const { formats, onlyMainContent = true, includeTags, excludeTags, renderJs, renderer, waitFor, jsonSchema, ...rest } = opts;
|
|
49
|
+
const args = { url, onlyMainContent };
|
|
50
|
+
if (formats)
|
|
51
|
+
args.formats = [...formats];
|
|
52
|
+
if (includeTags)
|
|
53
|
+
args.includeTags = includeTags;
|
|
54
|
+
if (excludeTags)
|
|
55
|
+
args.excludeTags = excludeTags;
|
|
56
|
+
if (renderJs !== undefined)
|
|
57
|
+
args.renderJs = renderJs;
|
|
58
|
+
if (renderer !== undefined)
|
|
59
|
+
args.renderer = renderer;
|
|
60
|
+
if (waitFor !== undefined)
|
|
61
|
+
args.waitFor = waitFor;
|
|
62
|
+
if (jsonSchema !== undefined) {
|
|
63
|
+
args.jsonSchema = jsonSchema;
|
|
64
|
+
const f = args.formats ?? [];
|
|
65
|
+
if (!f.includes("json"))
|
|
66
|
+
args.formats = [...f, "json"];
|
|
67
|
+
}
|
|
68
|
+
Object.assign(args, rest);
|
|
69
|
+
if (this.apiUrl)
|
|
70
|
+
return this.httpPost("/v1/scrape", args);
|
|
71
|
+
return this.localTransport().toolCall("crw_scrape", args);
|
|
72
|
+
}
|
|
73
|
+
async crawl(url, opts = {}) {
|
|
74
|
+
const { maxDepth = 2, maxPages = 10, pollInterval = 2, timeout = 300, ...rest } = opts;
|
|
75
|
+
const args = { url, maxDepth, maxPages, ...rest };
|
|
76
|
+
if (this.apiUrl)
|
|
77
|
+
return this.httpCrawl(args, pollInterval, timeout);
|
|
78
|
+
const result = await this.localTransport().toolCall("crw_crawl", args);
|
|
79
|
+
const jobId = result.id;
|
|
80
|
+
if (!jobId)
|
|
81
|
+
throw new CrwError(`Crawl did not return job ID: ${JSON.stringify(result)}`);
|
|
82
|
+
return this.pollLocalCrawl(jobId, pollInterval, timeout);
|
|
83
|
+
}
|
|
84
|
+
async map(url, opts = {}) {
|
|
85
|
+
const { maxDepth = 2, useSitemap = true, ...rest } = opts;
|
|
86
|
+
const args = { url, maxDepth, useSitemap, ...rest };
|
|
87
|
+
if (this.apiUrl) {
|
|
88
|
+
const data = await this.httpPost("/v1/map", args);
|
|
89
|
+
return data.links ?? [];
|
|
90
|
+
}
|
|
91
|
+
const result = await this.localTransport().toolCall("crw_map", args);
|
|
92
|
+
return result.links ?? [];
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Works in both modes; local mode needs a SearXNG URL configured on the engine.
|
|
96
|
+
*/
|
|
97
|
+
async search(query, opts = {}) {
|
|
98
|
+
const { limit = 5, lang, tbs, sources, categories, scrapeOptions, ...rest } = opts;
|
|
99
|
+
const args = { query, limit };
|
|
100
|
+
if (lang)
|
|
101
|
+
args.lang = lang;
|
|
102
|
+
if (tbs)
|
|
103
|
+
args.tbs = tbs;
|
|
104
|
+
if (sources)
|
|
105
|
+
args.sources = sources;
|
|
106
|
+
if (categories)
|
|
107
|
+
args.categories = categories;
|
|
108
|
+
if (scrapeOptions)
|
|
109
|
+
args.scrapeOptions = scrapeOptions;
|
|
110
|
+
Object.assign(args, rest);
|
|
111
|
+
if (this.apiUrl)
|
|
112
|
+
return this.httpPost("/v1/search", args);
|
|
113
|
+
return this.localTransport().toolCall("crw_search", args);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Parse a document (PDF) into markdown / structured JSON. Works in both modes.
|
|
117
|
+
*/
|
|
118
|
+
async parseFile(content, opts = {}) {
|
|
119
|
+
const { filename = "document.pdf", formats, jsonSchema, parsers, ...rest } = opts;
|
|
120
|
+
if (this.apiUrl) {
|
|
121
|
+
const options = {};
|
|
122
|
+
if (formats)
|
|
123
|
+
options.formats = [...formats];
|
|
124
|
+
if (jsonSchema !== undefined)
|
|
125
|
+
options.jsonSchema = jsonSchema;
|
|
126
|
+
if (parsers)
|
|
127
|
+
options.parsers = parsers;
|
|
128
|
+
Object.assign(options, rest);
|
|
129
|
+
const form = new FormData();
|
|
130
|
+
form.append("file", new Blob([content]), filename);
|
|
131
|
+
if (Object.keys(options).length)
|
|
132
|
+
form.append("options", JSON.stringify(options));
|
|
133
|
+
return this.httpMultipart("/v2/parse", form);
|
|
134
|
+
}
|
|
135
|
+
const b64 = Buffer.from(content).toString("base64");
|
|
136
|
+
const args = { filename, contentBase64: b64 };
|
|
137
|
+
if (formats)
|
|
138
|
+
args.formats = [...formats];
|
|
139
|
+
if (jsonSchema !== undefined)
|
|
140
|
+
args.jsonSchema = jsonSchema;
|
|
141
|
+
if (parsers)
|
|
142
|
+
args.parsers = parsers;
|
|
143
|
+
Object.assign(args, rest);
|
|
144
|
+
return this.localTransport().toolCall("crw_parse_file", args);
|
|
145
|
+
}
|
|
146
|
+
/** Structured LLM extraction across URLs (HTTP mode only). */
|
|
147
|
+
async extract(opts) {
|
|
148
|
+
if (!this.apiUrl)
|
|
149
|
+
throw new CrwError(httpOnlyHint("extract", "LLM extract job endpoint"));
|
|
150
|
+
const { urls, prompt, schema, systemPrompt, pollInterval = 2, timeout = 120 } = opts;
|
|
151
|
+
const body = { urls: [...urls] };
|
|
152
|
+
if (prompt !== undefined)
|
|
153
|
+
body.prompt = prompt;
|
|
154
|
+
if (schema !== undefined)
|
|
155
|
+
body.schema = schema;
|
|
156
|
+
if (systemPrompt !== undefined)
|
|
157
|
+
body.systemPrompt = systemPrompt;
|
|
158
|
+
const start = await this.httpRequest("POST", "/v2/extract", body, { raw: true });
|
|
159
|
+
const jobId = start.id;
|
|
160
|
+
if (!jobId)
|
|
161
|
+
throw new CrwError(`extract did not return job ID: ${JSON.stringify(start)}`);
|
|
162
|
+
const deadline = Date.now() + timeout * 1000;
|
|
163
|
+
for (;;) {
|
|
164
|
+
if (Date.now() > deadline)
|
|
165
|
+
throw new CrwTimeoutError(`Extract ${jobId} timed out after ${timeout}s`);
|
|
166
|
+
const status = await this.httpRequest("GET", `/v2/extract/${jobId}`, undefined, { raw: true, checkSuccess: false });
|
|
167
|
+
if (status.status === "completed")
|
|
168
|
+
return status.data ?? {};
|
|
169
|
+
if (status.status === "failed")
|
|
170
|
+
throw new CrwError(`Extract failed: ${status.error ?? "unknown"}`);
|
|
171
|
+
await sleep(pollInterval * 1000);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
/** Scrape many URLs in one async batch job (HTTP mode only). */
|
|
175
|
+
async batchScrape(urls, opts = {}) {
|
|
176
|
+
if (!this.apiUrl)
|
|
177
|
+
throw new CrwError(httpOnlyHint("batchScrape", "batch job endpoint"));
|
|
178
|
+
const { formats, pollInterval = 2, timeout = 300, ...rest } = opts;
|
|
179
|
+
const body = { urls: [...urls], ...rest };
|
|
180
|
+
if (formats)
|
|
181
|
+
body.formats = [...formats];
|
|
182
|
+
const start = await this.httpRequest("POST", "/v2/batch/scrape", body, { raw: true });
|
|
183
|
+
const jobId = start.id;
|
|
184
|
+
if (!jobId)
|
|
185
|
+
throw new CrwError(`Batch scrape did not return job ID: ${JSON.stringify(start)}`);
|
|
186
|
+
const deadline = Date.now() + timeout * 1000;
|
|
187
|
+
for (;;) {
|
|
188
|
+
if (Date.now() > deadline)
|
|
189
|
+
throw new CrwTimeoutError(`Batch scrape ${jobId} timed out after ${timeout}s`);
|
|
190
|
+
const status = await this.httpRequest("GET", `/v2/batch/scrape/${jobId}`, undefined, { raw: true, checkSuccess: false });
|
|
191
|
+
if (status.status === "completed")
|
|
192
|
+
return status.data ?? [];
|
|
193
|
+
if (status.status === "failed")
|
|
194
|
+
throw new CrwError(`Batch scrape failed: ${status.error ?? "unknown"}`);
|
|
195
|
+
await sleep(pollInterval * 1000);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/** Feature-detect the engine (HTTP mode only). */
|
|
199
|
+
async capabilities() {
|
|
200
|
+
if (!this.apiUrl)
|
|
201
|
+
throw new CrwError(httpOnlyHint("capabilities", "server capabilities endpoint"));
|
|
202
|
+
return this.httpRequest("GET", "/v1/capabilities", undefined, { checkSuccess: false });
|
|
203
|
+
}
|
|
204
|
+
/** Diff a page against a prior snapshot (HTTP mode only). */
|
|
205
|
+
async changeTrackingDiff(current, previous, opts = {}) {
|
|
206
|
+
if (!this.apiUrl)
|
|
207
|
+
throw new CrwError(httpOnlyHint("changeTrackingDiff", "diff endpoint"));
|
|
208
|
+
const { modes, schema, prompt, ...rest } = opts;
|
|
209
|
+
const body = { current, modes: modes ? [...modes] : ["gitDiff"] };
|
|
210
|
+
if (previous !== undefined)
|
|
211
|
+
body.previous = previous;
|
|
212
|
+
if (schema !== undefined)
|
|
213
|
+
body.schema = schema;
|
|
214
|
+
if (prompt !== undefined)
|
|
215
|
+
body.prompt = prompt;
|
|
216
|
+
Object.assign(body, rest);
|
|
217
|
+
return this.httpPost("/v1/change-tracking/diff", body);
|
|
218
|
+
}
|
|
219
|
+
/** Shut down the local subprocess if running. */
|
|
220
|
+
close() {
|
|
221
|
+
this.local?.close();
|
|
222
|
+
this.local = null;
|
|
223
|
+
}
|
|
224
|
+
// --- local (subprocess) mode ---
|
|
225
|
+
localTransport() {
|
|
226
|
+
if (!this.local)
|
|
227
|
+
this.local = new LocalTransport();
|
|
228
|
+
return this.local;
|
|
229
|
+
}
|
|
230
|
+
async pollLocalCrawl(jobId, pollInterval, timeout) {
|
|
231
|
+
const deadline = Date.now() + timeout * 1000;
|
|
232
|
+
for (;;) {
|
|
233
|
+
if (Date.now() > deadline)
|
|
234
|
+
throw new CrwTimeoutError(`Crawl ${jobId} timed out after ${timeout}s`);
|
|
235
|
+
const result = await this.localTransport().toolCall("crw_check_crawl_status", { id: jobId });
|
|
236
|
+
if (result.status === "completed")
|
|
237
|
+
return result.data ?? [];
|
|
238
|
+
if (result.status === "failed")
|
|
239
|
+
throw new CrwError(`Crawl failed: ${result.error ?? "unknown"}`);
|
|
240
|
+
await sleep(pollInterval * 1000);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// --- HTTP mode ---
|
|
244
|
+
async httpRequest(method, path, body, { raw = false, checkSuccess = true } = {}) {
|
|
245
|
+
if (this.apiUrl === null)
|
|
246
|
+
throw new CrwError("internal: httpRequest in local mode");
|
|
247
|
+
const url = `${this.apiUrl.replace(/\/$/, "")}${path}`;
|
|
248
|
+
const headers = { "Content-Type": "application/json" };
|
|
249
|
+
if (this.apiKey)
|
|
250
|
+
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
251
|
+
const resp = await fetch(url, { method, headers, body: body ? JSON.stringify(body) : undefined });
|
|
252
|
+
const result = await this.readJson(resp);
|
|
253
|
+
if (checkSuccess && result.success === false) {
|
|
254
|
+
throw new CrwApiError(result.error ?? "API error", resp.status);
|
|
255
|
+
}
|
|
256
|
+
if (raw)
|
|
257
|
+
return result;
|
|
258
|
+
return result.data ?? result;
|
|
259
|
+
}
|
|
260
|
+
async httpMultipart(path, form) {
|
|
261
|
+
if (this.apiUrl === null)
|
|
262
|
+
throw new CrwError("internal: httpMultipart in local mode");
|
|
263
|
+
const url = `${this.apiUrl.replace(/\/$/, "")}${path}`;
|
|
264
|
+
const headers = {};
|
|
265
|
+
if (this.apiKey)
|
|
266
|
+
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
267
|
+
const resp = await fetch(url, { method: "POST", headers, body: form });
|
|
268
|
+
const result = await this.readJson(resp);
|
|
269
|
+
if (result.success === false)
|
|
270
|
+
throw new CrwApiError(result.error ?? "API error", resp.status);
|
|
271
|
+
return result.data ?? result;
|
|
272
|
+
}
|
|
273
|
+
/** Parse the JSON body; surface a non-2xx body's `error` as CrwApiError. */
|
|
274
|
+
async readJson(resp) {
|
|
275
|
+
const text = await resp.text();
|
|
276
|
+
let parsed;
|
|
277
|
+
try {
|
|
278
|
+
parsed = text ? JSON.parse(text) : {};
|
|
279
|
+
}
|
|
280
|
+
catch {
|
|
281
|
+
if (!resp.ok)
|
|
282
|
+
throw new CrwApiError(`HTTP ${resp.status}: ${resp.statusText}`, resp.status);
|
|
283
|
+
throw new CrwApiError(`Invalid JSON response (HTTP ${resp.status})`, resp.status);
|
|
284
|
+
}
|
|
285
|
+
if (!resp.ok) {
|
|
286
|
+
const message = parsed.error ?? parsed.message ?? `HTTP ${resp.status}`;
|
|
287
|
+
throw new CrwApiError(message, resp.status);
|
|
288
|
+
}
|
|
289
|
+
return parsed;
|
|
290
|
+
}
|
|
291
|
+
httpPost(path, body) {
|
|
292
|
+
return this.httpRequest("POST", path, body);
|
|
293
|
+
}
|
|
294
|
+
async httpCrawl(args, pollInterval, timeout) {
|
|
295
|
+
const result = await this.httpPost("/v1/crawl", args);
|
|
296
|
+
const jobId = result.id;
|
|
297
|
+
if (!jobId)
|
|
298
|
+
throw new CrwError(`Crawl did not return job ID: ${JSON.stringify(result)}`);
|
|
299
|
+
const deadline = Date.now() + timeout * 1000;
|
|
300
|
+
for (;;) {
|
|
301
|
+
if (Date.now() > deadline)
|
|
302
|
+
throw new CrwTimeoutError(`Crawl ${jobId} timed out after ${timeout}s`);
|
|
303
|
+
const status = await this.httpRequest("GET", `/v1/crawl/${jobId}`, undefined, { raw: true });
|
|
304
|
+
if (status.status === "completed")
|
|
305
|
+
return status.data ?? [];
|
|
306
|
+
if (status.status === "failed")
|
|
307
|
+
throw new CrwError(`Crawl failed: ${status.error ?? "unknown"}`);
|
|
308
|
+
await sleep(pollInterval * 1000);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/** CRW SDK error types. */
|
|
2
|
+
export declare class CrwError extends Error {
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export declare class CrwApiError extends CrwError {
|
|
6
|
+
statusCode?: number;
|
|
7
|
+
constructor(message: string, statusCode?: number);
|
|
8
|
+
}
|
|
9
|
+
export declare class CrwTimeoutError extends CrwError {
|
|
10
|
+
constructor(message: string);
|
|
11
|
+
}
|
|
12
|
+
export declare class CrwBinaryNotFoundError extends CrwError {
|
|
13
|
+
constructor(message: string);
|
|
14
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/** CRW SDK error types. */
|
|
2
|
+
export class CrwError extends Error {
|
|
3
|
+
constructor(message) {
|
|
4
|
+
super(message);
|
|
5
|
+
this.name = "CrwError";
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
export class CrwApiError extends CrwError {
|
|
9
|
+
statusCode;
|
|
10
|
+
constructor(message, statusCode) {
|
|
11
|
+
super(message);
|
|
12
|
+
this.name = "CrwApiError";
|
|
13
|
+
this.statusCode = statusCode;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export class CrwTimeoutError extends CrwError {
|
|
17
|
+
constructor(message) {
|
|
18
|
+
super(message);
|
|
19
|
+
this.name = "CrwTimeoutError";
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export class CrwBinaryNotFoundError extends CrwError {
|
|
23
|
+
constructor(message) {
|
|
24
|
+
super(message);
|
|
25
|
+
this.name = "CrwBinaryNotFoundError";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { CrwClient, CLOUD_API_URL, DASHBOARD_URL, DOCS_URL } from "./client.js";
|
|
2
|
+
export { CrwError, CrwApiError, CrwTimeoutError, CrwBinaryNotFoundError } from "./errors.js";
|
|
3
|
+
export type { ClientOptions, ScrapeOptions, CrawlOptions, MapOptions, SearchOptions, ParseFileOptions, ExtractOptions, BatchScrapeOptions, ChangeTrackingOptions, ScrapeResult, CrawlResult, SearchResult, ParseResult, ExtractResult, BatchResult, Capabilities, DiffResult, Json, } from "./types.js";
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local (CRW_LOCAL) subprocess transport: speaks MCP JSON-RPC to a `crw-mcp`
|
|
3
|
+
* binary over stdio. Mirrors the Python SDK's subprocess mode.
|
|
4
|
+
*
|
|
5
|
+
* v1 finds the binary via the `CRW_BINARY` env var or on `PATH`; auto-download
|
|
6
|
+
* (as the Python SDK does) is a fast-follow.
|
|
7
|
+
*/
|
|
8
|
+
import type { Json } from "./types.js";
|
|
9
|
+
export declare class LocalTransport {
|
|
10
|
+
private proc;
|
|
11
|
+
private nextId;
|
|
12
|
+
private pending;
|
|
13
|
+
private buffer;
|
|
14
|
+
private resolveBinary;
|
|
15
|
+
private ensureProcess;
|
|
16
|
+
private onData;
|
|
17
|
+
private jsonrpc;
|
|
18
|
+
toolCall(name: string, args: Json): Promise<Json>;
|
|
19
|
+
close(): void;
|
|
20
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local (CRW_LOCAL) subprocess transport: speaks MCP JSON-RPC to a `crw-mcp`
|
|
3
|
+
* binary over stdio. Mirrors the Python SDK's subprocess mode.
|
|
4
|
+
*
|
|
5
|
+
* v1 finds the binary via the `CRW_BINARY` env var or on `PATH`; auto-download
|
|
6
|
+
* (as the Python SDK does) is a fast-follow.
|
|
7
|
+
*/
|
|
8
|
+
import { spawn } from "node:child_process";
|
|
9
|
+
import { CrwApiError, CrwBinaryNotFoundError, CrwError } from "./errors.js";
|
|
10
|
+
const BINARY_NAME = process.platform === "win32" ? "crw-mcp.exe" : "crw-mcp";
|
|
11
|
+
export class LocalTransport {
|
|
12
|
+
proc = null;
|
|
13
|
+
nextId = 0;
|
|
14
|
+
pending = new Map();
|
|
15
|
+
buffer = "";
|
|
16
|
+
resolveBinary() {
|
|
17
|
+
const env = process.env.CRW_BINARY;
|
|
18
|
+
if (env)
|
|
19
|
+
return env;
|
|
20
|
+
// Rely on PATH resolution by spawning the bare name; if it ENOENTs the
|
|
21
|
+
// error handler surfaces a clear install hint.
|
|
22
|
+
return BINARY_NAME;
|
|
23
|
+
}
|
|
24
|
+
ensureProcess() {
|
|
25
|
+
if (this.proc && this.proc.exitCode === null)
|
|
26
|
+
return this.proc;
|
|
27
|
+
const bin = this.resolveBinary();
|
|
28
|
+
const proc = spawn(bin, [], { stdio: ["pipe", "pipe", "ignore"] });
|
|
29
|
+
proc.on("error", (err) => {
|
|
30
|
+
const failure = err.code === "ENOENT"
|
|
31
|
+
? new CrwBinaryNotFoundError(`crw-mcp binary not found on PATH. Install it (e.g. \`npm i -g crw-mcp\` or ` +
|
|
32
|
+
`\`cargo install crw-mcp\`) or set CRW_BINARY to its path.`)
|
|
33
|
+
: new CrwError(`crw-mcp failed to start: ${err.message}`);
|
|
34
|
+
for (const p of this.pending.values())
|
|
35
|
+
p.reject(failure);
|
|
36
|
+
this.pending.clear();
|
|
37
|
+
});
|
|
38
|
+
proc.stdout.setEncoding("utf8");
|
|
39
|
+
proc.stdout.on("data", (chunk) => this.onData(chunk));
|
|
40
|
+
proc.on("exit", () => {
|
|
41
|
+
for (const p of this.pending.values())
|
|
42
|
+
p.reject(new CrwError("crw-mcp process closed unexpectedly"));
|
|
43
|
+
this.pending.clear();
|
|
44
|
+
});
|
|
45
|
+
this.proc = proc;
|
|
46
|
+
return proc;
|
|
47
|
+
}
|
|
48
|
+
onData(chunk) {
|
|
49
|
+
this.buffer += chunk;
|
|
50
|
+
let idx;
|
|
51
|
+
while ((idx = this.buffer.indexOf("\n")) >= 0) {
|
|
52
|
+
const line = this.buffer.slice(0, idx).trim();
|
|
53
|
+
this.buffer = this.buffer.slice(idx + 1);
|
|
54
|
+
if (!line)
|
|
55
|
+
continue;
|
|
56
|
+
let msg;
|
|
57
|
+
try {
|
|
58
|
+
msg = JSON.parse(line);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
const id = msg.id;
|
|
64
|
+
if (id === undefined || !this.pending.has(id))
|
|
65
|
+
continue;
|
|
66
|
+
const p = this.pending.get(id);
|
|
67
|
+
this.pending.delete(id);
|
|
68
|
+
if (msg.error) {
|
|
69
|
+
const err = msg.error;
|
|
70
|
+
p.reject(new CrwApiError(err.message ?? JSON.stringify(msg.error)));
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
p.resolve(msg.result ?? {});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
jsonrpc(method, params) {
|
|
78
|
+
const proc = this.ensureProcess();
|
|
79
|
+
const id = ++this.nextId;
|
|
80
|
+
return new Promise((resolve, reject) => {
|
|
81
|
+
this.pending.set(id, { resolve, reject });
|
|
82
|
+
proc.stdin.write(`${JSON.stringify({ jsonrpc: "2.0", id, method, params })}\n`);
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
async toolCall(name, args) {
|
|
86
|
+
const result = await this.jsonrpc("tools/call", { name, arguments: args });
|
|
87
|
+
const content = result.content?.[0];
|
|
88
|
+
if (!content)
|
|
89
|
+
throw new CrwError(`Empty response from ${name}`);
|
|
90
|
+
if (result.isError)
|
|
91
|
+
throw new CrwApiError(content.text ?? "Unknown error");
|
|
92
|
+
return JSON.parse(content.text ?? "{}");
|
|
93
|
+
}
|
|
94
|
+
close() {
|
|
95
|
+
if (this.proc && this.proc.exitCode === null) {
|
|
96
|
+
this.proc.stdin.end();
|
|
97
|
+
this.proc.kill();
|
|
98
|
+
}
|
|
99
|
+
this.proc = null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"module"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request/response types for the CRW SDK.
|
|
3
|
+
*
|
|
4
|
+
* Hand-written against the engine's OpenAPI spec
|
|
5
|
+
* (crates/crw-server/openapi/openapi.json). Results are returned as the engine's
|
|
6
|
+
* raw JSON objects, so the result aliases are intentionally permissive.
|
|
7
|
+
*/
|
|
8
|
+
export type Json = Record<string, unknown>;
|
|
9
|
+
export interface ClientOptions {
|
|
10
|
+
/** Explicit server URL (self-hosted). Defaults to the managed cloud. */
|
|
11
|
+
apiUrl?: string;
|
|
12
|
+
/** API key for the cloud or an authenticated self-hosted server. */
|
|
13
|
+
apiKey?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface ScrapeOptions {
|
|
16
|
+
formats?: string[];
|
|
17
|
+
onlyMainContent?: boolean;
|
|
18
|
+
includeTags?: string[];
|
|
19
|
+
excludeTags?: string[];
|
|
20
|
+
/** Force the JS renderer on/off (engine `renderJs`). */
|
|
21
|
+
renderJs?: boolean;
|
|
22
|
+
/** Pin a renderer tier (engine `renderer`). */
|
|
23
|
+
renderer?: string;
|
|
24
|
+
/** Milliseconds to wait after load before extracting (`waitFor`). */
|
|
25
|
+
waitFor?: number;
|
|
26
|
+
/** JSON Schema for structured LLM extraction (auto-adds the `json` format). */
|
|
27
|
+
jsonSchema?: Json;
|
|
28
|
+
/** Any other engine scrape option, passed through verbatim. */
|
|
29
|
+
[key: string]: unknown;
|
|
30
|
+
}
|
|
31
|
+
export interface CrawlOptions {
|
|
32
|
+
maxDepth?: number;
|
|
33
|
+
maxPages?: number;
|
|
34
|
+
pollInterval?: number;
|
|
35
|
+
timeout?: number;
|
|
36
|
+
[key: string]: unknown;
|
|
37
|
+
}
|
|
38
|
+
export interface MapOptions {
|
|
39
|
+
maxDepth?: number;
|
|
40
|
+
useSitemap?: boolean;
|
|
41
|
+
[key: string]: unknown;
|
|
42
|
+
}
|
|
43
|
+
export interface SearchOptions {
|
|
44
|
+
limit?: number;
|
|
45
|
+
lang?: string;
|
|
46
|
+
tbs?: string;
|
|
47
|
+
sources?: string[];
|
|
48
|
+
categories?: string[];
|
|
49
|
+
scrapeOptions?: Json;
|
|
50
|
+
[key: string]: unknown;
|
|
51
|
+
}
|
|
52
|
+
export interface ParseFileOptions {
|
|
53
|
+
filename?: string;
|
|
54
|
+
formats?: string[];
|
|
55
|
+
jsonSchema?: Json;
|
|
56
|
+
parsers?: string[];
|
|
57
|
+
[key: string]: unknown;
|
|
58
|
+
}
|
|
59
|
+
export interface ExtractOptions {
|
|
60
|
+
urls: string[];
|
|
61
|
+
prompt?: string;
|
|
62
|
+
schema?: Json;
|
|
63
|
+
systemPrompt?: string;
|
|
64
|
+
pollInterval?: number;
|
|
65
|
+
timeout?: number;
|
|
66
|
+
}
|
|
67
|
+
export interface BatchScrapeOptions {
|
|
68
|
+
formats?: string[];
|
|
69
|
+
pollInterval?: number;
|
|
70
|
+
timeout?: number;
|
|
71
|
+
[key: string]: unknown;
|
|
72
|
+
}
|
|
73
|
+
export interface ChangeTrackingOptions {
|
|
74
|
+
modes?: string[];
|
|
75
|
+
schema?: Json;
|
|
76
|
+
prompt?: string;
|
|
77
|
+
[key: string]: unknown;
|
|
78
|
+
}
|
|
79
|
+
export type ScrapeResult = Json;
|
|
80
|
+
export type CrawlResult = Json[];
|
|
81
|
+
export type SearchResult = Json | Json[];
|
|
82
|
+
export type ParseResult = Json;
|
|
83
|
+
export type ExtractResult = Json;
|
|
84
|
+
export type BatchResult = Json[];
|
|
85
|
+
export type Capabilities = Json;
|
|
86
|
+
export type DiffResult = Json;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Request/response types for the CRW SDK.
|
|
3
|
+
*
|
|
4
|
+
* Hand-written against the engine's OpenAPI spec
|
|
5
|
+
* (crates/crw-server/openapi/openapi.json). Results are returned as the engine's
|
|
6
|
+
* raw JSON objects, so the result aliases are intentionally permissive.
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "crw-sdk",
|
|
3
|
+
"version": "0.15.0",
|
|
4
|
+
"description": "TypeScript/JavaScript SDK for CRW — scrape, crawl, map, search, parse, and extract any website",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"homepage": "https://github.com/us/crw",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/us/crw.git"
|
|
10
|
+
},
|
|
11
|
+
"keywords": [
|
|
12
|
+
"web-scraping",
|
|
13
|
+
"crawler",
|
|
14
|
+
"scraper",
|
|
15
|
+
"ai-agent",
|
|
16
|
+
"firecrawl",
|
|
17
|
+
"mcp"
|
|
18
|
+
],
|
|
19
|
+
"type": "module",
|
|
20
|
+
"main": "./dist/cjs/index.js",
|
|
21
|
+
"module": "./dist/esm/index.js",
|
|
22
|
+
"types": "./dist/esm/index.d.ts",
|
|
23
|
+
"exports": {
|
|
24
|
+
".": {
|
|
25
|
+
"types": "./dist/esm/index.d.ts",
|
|
26
|
+
"import": "./dist/esm/index.js",
|
|
27
|
+
"require": "./dist/cjs/index.js"
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"files": [
|
|
31
|
+
"dist"
|
|
32
|
+
],
|
|
33
|
+
"engines": {
|
|
34
|
+
"node": ">=18"
|
|
35
|
+
},
|
|
36
|
+
"scripts": {
|
|
37
|
+
"build": "tsc -p tsconfig.json && tsc -p tsconfig.cjs.json && node scripts/postbuild.mjs",
|
|
38
|
+
"test": "npm run build && tsc -p tsconfig.test.json && node --test dist-test/*.test.js",
|
|
39
|
+
"prepublishOnly": "npm run build"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/node": "^20",
|
|
43
|
+
"typescript": "^5"
|
|
44
|
+
}
|
|
45
|
+
}
|