firecrawl 4.25.2 → 4.25.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-XCQC2QCZ.js → chunk-5D4KXCYO.js} +2 -2
- package/dist/index.cjs +150 -2
- package/dist/index.d.cts +189 -1
- package/dist/index.d.ts +189 -1
- package/dist/index.js +149 -2
- package/dist/{package-D6422PQU.js → package-HESILIET.js} +1 -1
- package/package.json +2 -2
- package/pnpm-workspace.yaml +3 -0
- package/src/__tests__/e2e/v1/index.test.ts +15 -15
- package/src/__tests__/unit/v2/research.test.ts +168 -0
- package/src/index.ts +2 -0
- package/src/v2/client.ts +12 -0
- package/src/v2/methods/research.ts +195 -0
- package/src/v2/types.ts +158 -0
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-5D4KXCYO.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -1301,6 +1301,142 @@ async function getTokenUsageHistorical(http, byApiKey) {
|
|
|
1301
1301
|
}
|
|
1302
1302
|
}
|
|
1303
1303
|
|
|
1304
|
+
// src/v2/methods/research.ts
|
|
1305
|
+
var BASE = "/v2/research";
|
|
1306
|
+
function appendParam(params, key, value) {
|
|
1307
|
+
if (value == null) return;
|
|
1308
|
+
if (Array.isArray(value)) {
|
|
1309
|
+
for (const v of value) {
|
|
1310
|
+
if (v != null && String(v).length > 0) params.append(key, String(v));
|
|
1311
|
+
}
|
|
1312
|
+
} else {
|
|
1313
|
+
params.append(key, String(value));
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
function withQuery(path, params) {
|
|
1317
|
+
const qs = params.toString();
|
|
1318
|
+
return qs ? `${path}?${qs}` : path;
|
|
1319
|
+
}
|
|
1320
|
+
function normalizeResearchError(err, action) {
|
|
1321
|
+
if (err?.isAxiosError) {
|
|
1322
|
+
const status = err.response?.status;
|
|
1323
|
+
const body = err.response?.data;
|
|
1324
|
+
if (body && (body.detail || body.title)) {
|
|
1325
|
+
const message = body.detail || body.title;
|
|
1326
|
+
throw new SdkError(message, status, body.type, body);
|
|
1327
|
+
}
|
|
1328
|
+
throw new SdkError(
|
|
1329
|
+
err.message || `Request failed while trying to ${action}`,
|
|
1330
|
+
status,
|
|
1331
|
+
err.code,
|
|
1332
|
+
body
|
|
1333
|
+
);
|
|
1334
|
+
}
|
|
1335
|
+
throw err;
|
|
1336
|
+
}
|
|
1337
|
+
var ResearchClient = class {
|
|
1338
|
+
constructor(http) {
|
|
1339
|
+
this.http = http;
|
|
1340
|
+
}
|
|
1341
|
+
/**
|
|
1342
|
+
* Search papers by abstract relevance.
|
|
1343
|
+
* @param query Natural-language search query.
|
|
1344
|
+
* @param options Optional filters (k, authors, categories, from, to).
|
|
1345
|
+
*/
|
|
1346
|
+
async searchPapers(query, options = {}) {
|
|
1347
|
+
if (!query || !query.trim()) throw new Error("query cannot be empty");
|
|
1348
|
+
if (options.k != null && options.k <= 0)
|
|
1349
|
+
throw new Error("k must be positive");
|
|
1350
|
+
const params = new URLSearchParams();
|
|
1351
|
+
appendParam(params, "query", query);
|
|
1352
|
+
appendParam(params, "k", options.k);
|
|
1353
|
+
appendParam(params, "authors", options.authors);
|
|
1354
|
+
appendParam(params, "categories", options.categories);
|
|
1355
|
+
appendParam(params, "from", options.from);
|
|
1356
|
+
appendParam(params, "to", options.to);
|
|
1357
|
+
try {
|
|
1358
|
+
const res = await this.http.get(
|
|
1359
|
+
withQuery(`${BASE}/papers`, params)
|
|
1360
|
+
);
|
|
1361
|
+
if (res.status !== 200) throwForBadResponse(res, "search papers");
|
|
1362
|
+
return res.data;
|
|
1363
|
+
} catch (err) {
|
|
1364
|
+
return normalizeResearchError(err, "search papers");
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
async getPaper(id, options = {}) {
|
|
1368
|
+
if (!id || !id.trim()) throw new Error("id cannot be empty");
|
|
1369
|
+
if (options.k != null && options.query == null)
|
|
1370
|
+
throw new Error("k is only valid together with query");
|
|
1371
|
+
if (options.k != null && options.k <= 0)
|
|
1372
|
+
throw new Error("k must be positive");
|
|
1373
|
+
const params = new URLSearchParams();
|
|
1374
|
+
appendParam(params, "query", options.query);
|
|
1375
|
+
appendParam(params, "k", options.k);
|
|
1376
|
+
try {
|
|
1377
|
+
const res = await this.http.get(
|
|
1378
|
+
withQuery(`${BASE}/papers/${encodeURIComponent(id)}`, params)
|
|
1379
|
+
);
|
|
1380
|
+
if (res.status !== 200) throwForBadResponse(res, "get paper");
|
|
1381
|
+
return res.data;
|
|
1382
|
+
} catch (err) {
|
|
1383
|
+
return normalizeResearchError(err, "get paper");
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
/**
|
|
1387
|
+
* Find related papers via the citation graph.
|
|
1388
|
+
* @param id Primary seed paper reference.
|
|
1389
|
+
* @param options Required `intent` plus optional mode, k, rerank, anchor.
|
|
1390
|
+
*/
|
|
1391
|
+
async similarPapers(id, options) {
|
|
1392
|
+
if (!id || !id.trim()) throw new Error("id cannot be empty");
|
|
1393
|
+
if (!options?.intent || !options.intent.trim())
|
|
1394
|
+
throw new Error("intent cannot be empty");
|
|
1395
|
+
if (options.k != null && options.k <= 0)
|
|
1396
|
+
throw new Error("k must be positive");
|
|
1397
|
+
const params = new URLSearchParams();
|
|
1398
|
+
appendParam(params, "intent", options.intent);
|
|
1399
|
+
appendParam(params, "mode", options.mode);
|
|
1400
|
+
appendParam(params, "k", options.k);
|
|
1401
|
+
if (options.rerank != null) appendParam(params, "rerank", options.rerank);
|
|
1402
|
+
appendParam(params, "anchor", options.anchor);
|
|
1403
|
+
try {
|
|
1404
|
+
const res = await this.http.get(
|
|
1405
|
+
withQuery(
|
|
1406
|
+
`${BASE}/papers/${encodeURIComponent(id)}/similar`,
|
|
1407
|
+
params
|
|
1408
|
+
)
|
|
1409
|
+
);
|
|
1410
|
+
if (res.status !== 200) throwForBadResponse(res, "find similar papers");
|
|
1411
|
+
return res.data;
|
|
1412
|
+
} catch (err) {
|
|
1413
|
+
return normalizeResearchError(err, "find similar papers");
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
/**
|
|
1417
|
+
* Search GitHub issue/PR history and repository readmes.
|
|
1418
|
+
* @param query Search query.
|
|
1419
|
+
* @param options Optional `k`.
|
|
1420
|
+
*/
|
|
1421
|
+
async searchGithub(query, options = {}) {
|
|
1422
|
+
if (!query || !query.trim()) throw new Error("query cannot be empty");
|
|
1423
|
+
if (options.k != null && options.k <= 0)
|
|
1424
|
+
throw new Error("k must be positive");
|
|
1425
|
+
const params = new URLSearchParams();
|
|
1426
|
+
appendParam(params, "query", query);
|
|
1427
|
+
appendParam(params, "k", options.k);
|
|
1428
|
+
try {
|
|
1429
|
+
const res = await this.http.get(
|
|
1430
|
+
withQuery(`${BASE}/github`, params)
|
|
1431
|
+
);
|
|
1432
|
+
if (res.status !== 200) throwForBadResponse(res, "search github");
|
|
1433
|
+
return res.data;
|
|
1434
|
+
} catch (err) {
|
|
1435
|
+
return normalizeResearchError(err, "search github");
|
|
1436
|
+
}
|
|
1437
|
+
}
|
|
1438
|
+
};
|
|
1439
|
+
|
|
1304
1440
|
// src/v2/methods/monitor.ts
|
|
1305
1441
|
function queryString(params) {
|
|
1306
1442
|
if (!params) return "";
|
|
@@ -1666,6 +1802,7 @@ var Watcher = class extends EventEmitter {
|
|
|
1666
1802
|
import "zod";
|
|
1667
1803
|
var FirecrawlClient = class {
|
|
1668
1804
|
http;
|
|
1805
|
+
_research;
|
|
1669
1806
|
isCloudService(url) {
|
|
1670
1807
|
return url.includes("api.firecrawl.dev");
|
|
1671
1808
|
}
|
|
@@ -1738,6 +1875,15 @@ var FirecrawlClient = class {
|
|
|
1738
1875
|
async search(query, req = {}) {
|
|
1739
1876
|
return search(this.http, { query, ...req });
|
|
1740
1877
|
}
|
|
1878
|
+
// Research
|
|
1879
|
+
/**
|
|
1880
|
+
* Access the v2 research endpoints (arXiv papers + GitHub history/readmes).
|
|
1881
|
+
* Example: `firecrawl.research.searchPapers("diffusion models")`.
|
|
1882
|
+
*/
|
|
1883
|
+
get research() {
|
|
1884
|
+
if (!this._research) this._research = new ResearchClient(this.http);
|
|
1885
|
+
return this._research;
|
|
1886
|
+
}
|
|
1741
1887
|
// Map
|
|
1742
1888
|
/**
|
|
1743
1889
|
* Map a site to discover URLs (sitemap-aware).
|
|
@@ -2097,7 +2243,7 @@ var FirecrawlApp = class {
|
|
|
2097
2243
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
2098
2244
|
return process.env.npm_package_version;
|
|
2099
2245
|
}
|
|
2100
|
-
const packageJson = await import("./package-
|
|
2246
|
+
const packageJson = await import("./package-HESILIET.js");
|
|
2101
2247
|
return packageJson.default.version;
|
|
2102
2248
|
} catch (error) {
|
|
2103
2249
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
@@ -3494,6 +3640,7 @@ export {
|
|
|
3494
3640
|
FirecrawlApp as FirecrawlAppV1,
|
|
3495
3641
|
FirecrawlClient,
|
|
3496
3642
|
JobTimeoutError,
|
|
3643
|
+
ResearchClient,
|
|
3497
3644
|
SdkError,
|
|
3498
3645
|
Watcher,
|
|
3499
3646
|
index_default as default
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl",
|
|
3
|
-
"version": "4.25.
|
|
3
|
+
"version": "4.25.3",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"ts-jest": "^29.4.5",
|
|
41
41
|
"tsup": "^8.5.0",
|
|
42
42
|
"typescript": "^5.4.5",
|
|
43
|
-
"uuid": "^
|
|
43
|
+
"uuid": "^14.0.0"
|
|
44
44
|
},
|
|
45
45
|
"keywords": [
|
|
46
46
|
"firecrawl",
|
|
@@ -26,10 +26,10 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
26
26
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
27
27
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
28
28
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
29
|
-
await expect(invalidApp.scrapeUrl('https://
|
|
29
|
+
await expect(invalidApp.scrapeUrl('https://firecrawl-test-site.vercel.app')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 401");
|
|
30
30
|
} else {
|
|
31
31
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
32
|
-
await expect(invalidApp.scrapeUrl('https://
|
|
32
|
+
await expect(invalidApp.scrapeUrl('https://firecrawl-test-site.vercel.app')).resolves.not.toThrow();
|
|
33
33
|
}
|
|
34
34
|
});
|
|
35
35
|
|
|
@@ -42,7 +42,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
42
42
|
test.concurrent('should return successful response for valid scrape', async () => {
|
|
43
43
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
44
44
|
|
|
45
|
-
const response = await app.scrapeUrl('https://
|
|
45
|
+
const response = await app.scrapeUrl('https://firecrawl-test-site.vercel.app');
|
|
46
46
|
if (!response.success) {
|
|
47
47
|
throw new Error(response.error);
|
|
48
48
|
}
|
|
@@ -51,7 +51,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
51
51
|
test.concurrent('should return successful response with valid API key and options', async () => {
|
|
52
52
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
53
53
|
const response = await app.scrapeUrl(
|
|
54
|
-
'https://
|
|
54
|
+
'https://firecrawl-test-site.vercel.app', {
|
|
55
55
|
formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'],
|
|
56
56
|
headers: { "x-key": "test" },
|
|
57
57
|
includeTags: ['h1'],
|
|
@@ -69,7 +69,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
69
69
|
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
|
|
70
70
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
71
71
|
const response = await app.scrapeUrl(
|
|
72
|
-
'https://
|
|
72
|
+
'https://firecrawl-test-site.vercel.app', {
|
|
73
73
|
formats: ['screenshot@fullPage'],
|
|
74
74
|
});
|
|
75
75
|
if (!response.success) {
|
|
@@ -132,16 +132,16 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
132
132
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
133
133
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
134
134
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
135
|
-
await expect(invalidApp.crawlUrl('https://
|
|
135
|
+
await expect(invalidApp.crawlUrl('https://firecrawl-test-site.vercel.app')).rejects.toThrow("Request failed with status code 401");
|
|
136
136
|
} else {
|
|
137
137
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
138
|
-
await expect(invalidApp.crawlUrl('https://
|
|
138
|
+
await expect(invalidApp.crawlUrl('https://firecrawl-test-site.vercel.app')).resolves.not.toThrow();
|
|
139
139
|
}
|
|
140
140
|
});
|
|
141
141
|
|
|
142
142
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
143
143
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
144
|
-
const response = await app.crawlUrl('https://
|
|
144
|
+
const response = await app.crawlUrl('https://firecrawl-test-site.vercel.app', {}, 30) as CrawlStatusResponse;
|
|
145
145
|
expect(response).not.toHaveProperty("next"); // wait until done
|
|
146
146
|
expect(response.data.length).toBeGreaterThan(0);
|
|
147
147
|
if (response.data[0]) {
|
|
@@ -151,7 +151,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
151
151
|
|
|
152
152
|
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
|
|
153
153
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
154
|
-
const response = await app.crawlUrl('https://
|
|
154
|
+
const response = await app.crawlUrl('https://firecrawl-test-site.vercel.app', {
|
|
155
155
|
excludePaths: ['blog/*'],
|
|
156
156
|
includePaths: ['/'],
|
|
157
157
|
maxDepth: 2,
|
|
@@ -183,11 +183,11 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
183
183
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
184
184
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
185
185
|
const uniqueIdempotencyKey = uuidv4();
|
|
186
|
-
const response = await app.asyncCrawlUrl('https://
|
|
186
|
+
const response = await app.asyncCrawlUrl('https://firecrawl-test-site.vercel.app', {}, uniqueIdempotencyKey) as CrawlResponse;
|
|
187
187
|
expect(response).not.toBeNull();
|
|
188
188
|
expect(response.id).toBeDefined();
|
|
189
189
|
|
|
190
|
-
await expect(app.crawlUrl('https://
|
|
190
|
+
await expect(app.crawlUrl('https://firecrawl-test-site.vercel.app', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
191
191
|
});
|
|
192
192
|
|
|
193
193
|
test.concurrent('should check crawl status', async () => {
|
|
@@ -236,10 +236,10 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
236
236
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
|
237
237
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
238
238
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
239
|
-
await expect(invalidApp.mapUrl('https://
|
|
239
|
+
await expect(invalidApp.mapUrl('https://firecrawl-test-site.vercel.app')).rejects.toThrow("Request failed with status code 401");
|
|
240
240
|
} else {
|
|
241
241
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
242
|
-
await expect(invalidApp.mapUrl('https://
|
|
242
|
+
await expect(invalidApp.mapUrl('https://firecrawl-test-site.vercel.app')).resolves.not.toThrow();
|
|
243
243
|
}
|
|
244
244
|
});
|
|
245
245
|
|
|
@@ -250,12 +250,12 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
250
250
|
});
|
|
251
251
|
|
|
252
252
|
test.concurrent('should return successful response for valid map', async () => {
|
|
253
|
-
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://
|
|
253
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://firecrawl-test-site.vercel.app') as MapResponse;
|
|
254
254
|
expect(response).not.toBeNull();
|
|
255
255
|
|
|
256
256
|
expect(response.links?.length).toBeGreaterThan(0);
|
|
257
257
|
expect(response.links?.[0]).toContain("https://");
|
|
258
|
-
const filteredLinks = response.links?.filter((link: string) => link.includes("
|
|
258
|
+
const filteredLinks = response.links?.filter((link: string) => link.includes("firecrawl-test-site.vercel.app"));
|
|
259
259
|
expect(filteredLinks?.length).toBeGreaterThan(0);
|
|
260
260
|
}, 30000); // 30 seconds timeout
|
|
261
261
|
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { describe, test, expect } from "@jest/globals";
|
|
2
|
+
import { ResearchClient } from "../../../v2/methods/research";
|
|
3
|
+
import { SdkError } from "../../../v2/types";
|
|
4
|
+
import type { HttpClient } from "../../../v2/utils/httpClient";
|
|
5
|
+
|
|
6
|
+
/** Build a ResearchClient whose http.get records the requested URL. */
|
|
7
|
+
function makeClient(
|
|
8
|
+
responder: (url: string) => { status: number; data: any } = () => ({
|
|
9
|
+
status: 200,
|
|
10
|
+
data: {},
|
|
11
|
+
}),
|
|
12
|
+
) {
|
|
13
|
+
const calls: string[] = [];
|
|
14
|
+
const http = {
|
|
15
|
+
get: async (url: string) => {
|
|
16
|
+
calls.push(url);
|
|
17
|
+
return responder(url);
|
|
18
|
+
},
|
|
19
|
+
} as unknown as HttpClient;
|
|
20
|
+
return { client: new ResearchClient(http), calls };
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Make an axios-like error carrying an RFC 7807 Problem body. */
|
|
24
|
+
function problemError(status: number, body: any) {
|
|
25
|
+
return { isAxiosError: true, response: { status, data: body }, message: "req failed" };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe("research.searchPapers", () => {
|
|
29
|
+
test("builds query string with explode arrays", async () => {
|
|
30
|
+
const { client, calls } = makeClient(() => ({
|
|
31
|
+
status: 200,
|
|
32
|
+
data: { results: [] },
|
|
33
|
+
}));
|
|
34
|
+
await client.searchPapers("diffusion models", {
|
|
35
|
+
k: 10,
|
|
36
|
+
authors: ["Ho", "Abbeel"],
|
|
37
|
+
categories: ["cs.LG", "stat.ML"],
|
|
38
|
+
from: "2020-01-01",
|
|
39
|
+
to: "2024-12-31",
|
|
40
|
+
});
|
|
41
|
+
const url = calls[0];
|
|
42
|
+
expect(url.startsWith("/v2/research/papers?")).toBe(true);
|
|
43
|
+
const qs = new URLSearchParams(url.split("?")[1]);
|
|
44
|
+
expect(qs.get("query")).toBe("diffusion models");
|
|
45
|
+
expect(qs.get("k")).toBe("10");
|
|
46
|
+
expect(qs.getAll("authors")).toEqual(["Ho", "Abbeel"]);
|
|
47
|
+
expect(qs.getAll("categories")).toEqual(["cs.LG", "stat.ML"]);
|
|
48
|
+
expect(qs.get("from")).toBe("2020-01-01");
|
|
49
|
+
expect(qs.get("to")).toBe("2024-12-31");
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test("omits absent options", async () => {
|
|
53
|
+
const { client, calls } = makeClient(() => ({ status: 200, data: { results: [] } }));
|
|
54
|
+
await client.searchPapers("q");
|
|
55
|
+
const qs = new URLSearchParams(calls[0].split("?")[1]);
|
|
56
|
+
expect([...qs.keys()]).toEqual(["query"]);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("rejects empty query", async () => {
|
|
60
|
+
const { client } = makeClient();
|
|
61
|
+
await expect(client.searchPapers(" ")).rejects.toThrow(/query cannot be empty/i);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("rejects non-positive k", async () => {
|
|
65
|
+
const { client } = makeClient();
|
|
66
|
+
await expect(client.searchPapers("q", { k: 0 })).rejects.toThrow(/k must be positive/i);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test("returns the response body verbatim", async () => {
|
|
70
|
+
const payload = { results: [{ paper_id: "1", title: "t", abstract: "a", score: 0.1 }] };
|
|
71
|
+
const { client } = makeClient(() => ({ status: 200, data: payload }));
|
|
72
|
+
await expect(client.searchPapers("q")).resolves.toEqual(payload);
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
describe("research.getPaper", () => {
|
|
77
|
+
test("detail mode encodes the id and sends no query params", async () => {
|
|
78
|
+
const { client, calls } = makeClient(() => ({ status: 200, data: { paper: {} } }));
|
|
79
|
+
await client.getPaper("arxiv:2105.05233");
|
|
80
|
+
expect(calls[0]).toBe("/v2/research/papers/arxiv%3A2105.05233");
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test("read mode adds query and k", async () => {
|
|
84
|
+
const { client, calls } = makeClient(() => ({
|
|
85
|
+
status: 200,
|
|
86
|
+
data: { paper: {}, paper_id: "1", query: "q", passages: [] },
|
|
87
|
+
}));
|
|
88
|
+
await client.getPaper("123", { query: "noise schedule", k: 4 });
|
|
89
|
+
const [path, query] = calls[0].split("?");
|
|
90
|
+
expect(path).toBe("/v2/research/papers/123");
|
|
91
|
+
const qs = new URLSearchParams(query);
|
|
92
|
+
expect(qs.get("query")).toBe("noise schedule");
|
|
93
|
+
expect(qs.get("k")).toBe("4");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("rejects k without query", async () => {
|
|
97
|
+
const { client } = makeClient();
|
|
98
|
+
await expect(client.getPaper("123", { k: 4 } as any)).rejects.toThrow(
|
|
99
|
+
/k is only valid together with query/i,
|
|
100
|
+
);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("research.similarPapers", () => {
|
|
105
|
+
test("requires intent", async () => {
|
|
106
|
+
const { client } = makeClient();
|
|
107
|
+
await expect(
|
|
108
|
+
client.similarPapers("123", { intent: "" }),
|
|
109
|
+
).rejects.toThrow(/intent cannot be empty/i);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test("builds path and query with repeated anchors and rerank", async () => {
|
|
113
|
+
const { client, calls } = makeClient(() => ({
|
|
114
|
+
status: 200,
|
|
115
|
+
data: { results: [], pool_size: 0, truncated: false },
|
|
116
|
+
}));
|
|
117
|
+
await client.similarPapers("2105.05233", {
|
|
118
|
+
intent: "diffusion image synthesis",
|
|
119
|
+
mode: "citers",
|
|
120
|
+
k: 20,
|
|
121
|
+
rerank: false,
|
|
122
|
+
anchor: ["arxiv:2006.11239", "1503.03585"],
|
|
123
|
+
});
|
|
124
|
+
const [path, query] = calls[0].split("?");
|
|
125
|
+
expect(path).toBe("/v2/research/papers/2105.05233/similar");
|
|
126
|
+
const qs = new URLSearchParams(query);
|
|
127
|
+
expect(qs.get("intent")).toBe("diffusion image synthesis");
|
|
128
|
+
expect(qs.get("mode")).toBe("citers");
|
|
129
|
+
expect(qs.get("k")).toBe("20");
|
|
130
|
+
expect(qs.get("rerank")).toBe("false");
|
|
131
|
+
expect(qs.getAll("anchor")).toEqual(["arxiv:2006.11239", "1503.03585"]);
|
|
132
|
+
});
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
describe("research.searchGithub", () => {
|
|
136
|
+
test("builds query string", async () => {
|
|
137
|
+
const { client, calls } = makeClient(() => ({ status: 200, data: { results: [] } }));
|
|
138
|
+
await client.searchGithub("milvus hybrid search", { k: 10 });
|
|
139
|
+
const qs = new URLSearchParams(calls[0].split("?")[1]);
|
|
140
|
+
expect(calls[0].startsWith("/v2/research/github?")).toBe(true);
|
|
141
|
+
expect(qs.get("query")).toBe("milvus hybrid search");
|
|
142
|
+
expect(qs.get("k")).toBe("10");
|
|
143
|
+
});
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
describe("research error mapping", () => {
|
|
147
|
+
test("maps RFC 7807 Problem detail to SdkError", async () => {
|
|
148
|
+
const { client } = makeClient(() => {
|
|
149
|
+
throw problemError(400, {
|
|
150
|
+
type: "urn:search-pipeline:invalid_request",
|
|
151
|
+
title: "Bad Request",
|
|
152
|
+
status: 400,
|
|
153
|
+
detail: "query is required",
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
await expect(client.searchPapers("q")).rejects.toMatchObject({
|
|
157
|
+
message: "query is required",
|
|
158
|
+
status: 400,
|
|
159
|
+
} as Partial<SdkError>);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
test("falls back to title when detail is absent", async () => {
|
|
163
|
+
const { client } = makeClient(() => {
|
|
164
|
+
throw problemError(404, { title: "Not Found", status: 404 });
|
|
165
|
+
});
|
|
166
|
+
await expect(client.getPaper("999")).rejects.toThrow(/Not Found/);
|
|
167
|
+
});
|
|
168
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -11,6 +11,8 @@ export { FirecrawlClient } from "./v2/client";
|
|
|
11
11
|
export * from "./v2/types";
|
|
12
12
|
/** Watcher class and options for crawl/batch job monitoring. */
|
|
13
13
|
export { Watcher, type WatcherOptions } from "./v2/watcher";
|
|
14
|
+
/** Research sub-client (accessed via `firecrawl.research`). */
|
|
15
|
+
export { ResearchClient } from "./v2/methods/research";
|
|
14
16
|
/** Legacy v1 client (feature‑frozen). */
|
|
15
17
|
export { default as FirecrawlAppV1 } from "./v1";
|
|
16
18
|
|
package/src/v2/client.ts
CHANGED
|
@@ -32,6 +32,7 @@ import {
|
|
|
32
32
|
listBrowsers,
|
|
33
33
|
} from "./methods/browser";
|
|
34
34
|
import { getConcurrency, getCreditUsage, getQueueStatus, getTokenUsage, getCreditUsageHistorical, getTokenUsageHistorical } from "./methods/usage";
|
|
35
|
+
import { ResearchClient } from "./methods/research";
|
|
35
36
|
import {
|
|
36
37
|
createMonitor as createMonitorMethod,
|
|
37
38
|
deleteMonitor as deleteMonitorMethod,
|
|
@@ -119,6 +120,7 @@ export type FirecrawlClientInput = FirecrawlClientOptions | string;
|
|
|
119
120
|
|
|
120
121
|
export class FirecrawlClient {
|
|
121
122
|
private readonly http: HttpClient;
|
|
123
|
+
private _research?: ResearchClient;
|
|
122
124
|
|
|
123
125
|
private isCloudService(url: string): boolean {
|
|
124
126
|
return url.includes('api.firecrawl.dev');
|
|
@@ -234,6 +236,16 @@ export class FirecrawlClient {
|
|
|
234
236
|
return search(this.http, { query, ...req });
|
|
235
237
|
}
|
|
236
238
|
|
|
239
|
+
// Research
|
|
240
|
+
/**
|
|
241
|
+
* Access the v2 research endpoints (arXiv papers + GitHub history/readmes).
|
|
242
|
+
* Example: `firecrawl.research.searchPapers("diffusion models")`.
|
|
243
|
+
*/
|
|
244
|
+
get research(): ResearchClient {
|
|
245
|
+
if (!this._research) this._research = new ResearchClient(this.http);
|
|
246
|
+
return this._research;
|
|
247
|
+
}
|
|
248
|
+
|
|
237
249
|
// Map
|
|
238
250
|
/**
|
|
239
251
|
* Map a site to discover URLs (sitemap-aware).
|