firecrawl 4.25.2 → 4.25.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/audit-ci.jsonc +1 -4
- package/dist/{chunk-XCQC2QCZ.js → chunk-BVQPX6RA.js} +9 -4
- package/dist/index.cjs +158 -4
- package/dist/index.d.cts +190 -31
- package/dist/index.d.ts +190 -31
- package/dist/index.js +150 -2
- package/dist/{package-D6422PQU.js → package-4T5PXLTT.js} +1 -1
- package/package.json +2 -2
- package/pnpm-workspace.yaml +3 -0
- package/src/__tests__/e2e/v1/index.test.ts +15 -15
- package/src/__tests__/unit/v2/research.test.ts +168 -0
- package/src/index.ts +2 -0
- package/src/v2/client.ts +12 -0
- package/src/v2/methods/research.ts +195 -0
- package/src/v2/types.ts +159 -42
package/audit-ci.jsonc
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
2
2
|
var __commonJS = (cb, mod) => function __require() {
|
|
3
|
-
|
|
3
|
+
try {
|
|
4
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
5
|
+
} catch (e) {
|
|
6
|
+
throw mod = 0, e;
|
|
7
|
+
}
|
|
4
8
|
};
|
|
5
9
|
|
|
6
10
|
// package.json
|
|
@@ -8,7 +12,7 @@ var require_package = __commonJS({
|
|
|
8
12
|
"package.json"(exports, module) {
|
|
9
13
|
module.exports = {
|
|
10
14
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.25.
|
|
15
|
+
version: "4.25.4",
|
|
12
16
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
17
|
main: "dist/index.js",
|
|
14
18
|
types: "dist/index.d.ts",
|
|
@@ -56,7 +60,7 @@ var require_package = __commonJS({
|
|
|
56
60
|
"ts-jest": "^29.4.5",
|
|
57
61
|
tsup: "^8.5.0",
|
|
58
62
|
typescript: "^5.4.5",
|
|
59
|
-
uuid: "^
|
|
63
|
+
uuid: "^14.0.0"
|
|
60
64
|
},
|
|
61
65
|
keywords: [
|
|
62
66
|
"firecrawl",
|
|
@@ -79,7 +83,8 @@ var require_package = __commonJS({
|
|
|
79
83
|
handlebars: ">=4.7.9",
|
|
80
84
|
"brace-expansion": ">=5.0.6",
|
|
81
85
|
"axios@>=1.0.0 <1.16.0": "1.16.1",
|
|
82
|
-
"follow-redirects@<1.16.0": ">=1.16.0 <2.0.0"
|
|
86
|
+
"follow-redirects@<1.16.0": ">=1.16.0 <2.0.0",
|
|
87
|
+
"esbuild@>=0.17.0 <0.28.1": "0.28.1"
|
|
83
88
|
}
|
|
84
89
|
}
|
|
85
90
|
};
|
package/dist/index.cjs
CHANGED
|
@@ -6,7 +6,11 @@ var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
8
|
var __commonJS = (cb, mod) => function __require() {
|
|
9
|
-
|
|
9
|
+
try {
|
|
10
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
11
|
+
} catch (e2) {
|
|
12
|
+
throw mod = 0, e2;
|
|
13
|
+
}
|
|
10
14
|
};
|
|
11
15
|
var __export = (target, all) => {
|
|
12
16
|
for (var name in all)
|
|
@@ -35,7 +39,7 @@ var require_package = __commonJS({
|
|
|
35
39
|
"package.json"(exports2, module2) {
|
|
36
40
|
module2.exports = {
|
|
37
41
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.25.
|
|
42
|
+
version: "4.25.4",
|
|
39
43
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
44
|
main: "dist/index.js",
|
|
41
45
|
types: "dist/index.d.ts",
|
|
@@ -83,7 +87,7 @@ var require_package = __commonJS({
|
|
|
83
87
|
"ts-jest": "^29.4.5",
|
|
84
88
|
tsup: "^8.5.0",
|
|
85
89
|
typescript: "^5.4.5",
|
|
86
|
-
uuid: "^
|
|
90
|
+
uuid: "^14.0.0"
|
|
87
91
|
},
|
|
88
92
|
keywords: [
|
|
89
93
|
"firecrawl",
|
|
@@ -106,7 +110,8 @@ var require_package = __commonJS({
|
|
|
106
110
|
handlebars: ">=4.7.9",
|
|
107
111
|
"brace-expansion": ">=5.0.6",
|
|
108
112
|
"axios@>=1.0.0 <1.16.0": "1.16.1",
|
|
109
|
-
"follow-redirects@<1.16.0": ">=1.16.0 <2.0.0"
|
|
113
|
+
"follow-redirects@<1.16.0": ">=1.16.0 <2.0.0",
|
|
114
|
+
"esbuild@>=0.17.0 <0.28.1": "0.28.1"
|
|
110
115
|
}
|
|
111
116
|
}
|
|
112
117
|
};
|
|
@@ -120,6 +125,7 @@ __export(index_exports, {
|
|
|
120
125
|
FirecrawlAppV1: () => FirecrawlApp,
|
|
121
126
|
FirecrawlClient: () => FirecrawlClient,
|
|
122
127
|
JobTimeoutError: () => JobTimeoutError,
|
|
128
|
+
ResearchClient: () => ResearchClient,
|
|
123
129
|
SdkError: () => SdkError,
|
|
124
130
|
Watcher: () => Watcher,
|
|
125
131
|
default: () => index_default
|
|
@@ -1425,6 +1431,143 @@ async function getTokenUsageHistorical(http, byApiKey) {
|
|
|
1425
1431
|
}
|
|
1426
1432
|
}
|
|
1427
1433
|
|
|
1434
|
+
// src/v2/methods/research.ts
|
|
1435
|
+
var BASE = "/v2/research";
|
|
1436
|
+
function appendParam(params, key, value) {
|
|
1437
|
+
if (value == null) return;
|
|
1438
|
+
if (Array.isArray(value)) {
|
|
1439
|
+
for (const v of value) {
|
|
1440
|
+
if (v != null && String(v).length > 0) params.append(key, String(v));
|
|
1441
|
+
}
|
|
1442
|
+
} else {
|
|
1443
|
+
params.append(key, String(value));
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
function withQuery(path, params) {
|
|
1447
|
+
const qs = params.toString();
|
|
1448
|
+
return qs ? `${path}?${qs}` : path;
|
|
1449
|
+
}
|
|
1450
|
+
function normalizeResearchError(err, action) {
|
|
1451
|
+
if (err?.isAxiosError) {
|
|
1452
|
+
const status = err.response?.status;
|
|
1453
|
+
const body = err.response?.data;
|
|
1454
|
+
if (body && (body.detail || body.title)) {
|
|
1455
|
+
const message = body.detail || body.title;
|
|
1456
|
+
throw new SdkError(message, status, body.type, body);
|
|
1457
|
+
}
|
|
1458
|
+
throw new SdkError(
|
|
1459
|
+
err.message || `Request failed while trying to ${action}`,
|
|
1460
|
+
status,
|
|
1461
|
+
err.code,
|
|
1462
|
+
body
|
|
1463
|
+
);
|
|
1464
|
+
}
|
|
1465
|
+
throw err;
|
|
1466
|
+
}
|
|
1467
|
+
var ResearchClient = class {
|
|
1468
|
+
constructor(http) {
|
|
1469
|
+
this.http = http;
|
|
1470
|
+
}
|
|
1471
|
+
http;
|
|
1472
|
+
/**
|
|
1473
|
+
* Search papers by abstract relevance.
|
|
1474
|
+
* @param query Natural-language search query.
|
|
1475
|
+
* @param options Optional filters (k, authors, categories, from, to).
|
|
1476
|
+
*/
|
|
1477
|
+
async searchPapers(query, options = {}) {
|
|
1478
|
+
if (!query || !query.trim()) throw new Error("query cannot be empty");
|
|
1479
|
+
if (options.k != null && options.k <= 0)
|
|
1480
|
+
throw new Error("k must be positive");
|
|
1481
|
+
const params = new URLSearchParams();
|
|
1482
|
+
appendParam(params, "query", query);
|
|
1483
|
+
appendParam(params, "k", options.k);
|
|
1484
|
+
appendParam(params, "authors", options.authors);
|
|
1485
|
+
appendParam(params, "categories", options.categories);
|
|
1486
|
+
appendParam(params, "from", options.from);
|
|
1487
|
+
appendParam(params, "to", options.to);
|
|
1488
|
+
try {
|
|
1489
|
+
const res = await this.http.get(
|
|
1490
|
+
withQuery(`${BASE}/papers`, params)
|
|
1491
|
+
);
|
|
1492
|
+
if (res.status !== 200) throwForBadResponse(res, "search papers");
|
|
1493
|
+
return res.data;
|
|
1494
|
+
} catch (err) {
|
|
1495
|
+
return normalizeResearchError(err, "search papers");
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
async getPaper(id, options = {}) {
|
|
1499
|
+
if (!id || !id.trim()) throw new Error("id cannot be empty");
|
|
1500
|
+
if (options.k != null && options.query == null)
|
|
1501
|
+
throw new Error("k is only valid together with query");
|
|
1502
|
+
if (options.k != null && options.k <= 0)
|
|
1503
|
+
throw new Error("k must be positive");
|
|
1504
|
+
const params = new URLSearchParams();
|
|
1505
|
+
appendParam(params, "query", options.query);
|
|
1506
|
+
appendParam(params, "k", options.k);
|
|
1507
|
+
try {
|
|
1508
|
+
const res = await this.http.get(
|
|
1509
|
+
withQuery(`${BASE}/papers/${encodeURIComponent(id)}`, params)
|
|
1510
|
+
);
|
|
1511
|
+
if (res.status !== 200) throwForBadResponse(res, "get paper");
|
|
1512
|
+
return res.data;
|
|
1513
|
+
} catch (err) {
|
|
1514
|
+
return normalizeResearchError(err, "get paper");
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
/**
|
|
1518
|
+
* Find related papers via the citation graph.
|
|
1519
|
+
* @param id Primary seed paper reference.
|
|
1520
|
+
* @param options Required `intent` plus optional mode, k, rerank, anchor.
|
|
1521
|
+
*/
|
|
1522
|
+
async similarPapers(id, options) {
|
|
1523
|
+
if (!id || !id.trim()) throw new Error("id cannot be empty");
|
|
1524
|
+
if (!options?.intent || !options.intent.trim())
|
|
1525
|
+
throw new Error("intent cannot be empty");
|
|
1526
|
+
if (options.k != null && options.k <= 0)
|
|
1527
|
+
throw new Error("k must be positive");
|
|
1528
|
+
const params = new URLSearchParams();
|
|
1529
|
+
appendParam(params, "intent", options.intent);
|
|
1530
|
+
appendParam(params, "mode", options.mode);
|
|
1531
|
+
appendParam(params, "k", options.k);
|
|
1532
|
+
if (options.rerank != null) appendParam(params, "rerank", options.rerank);
|
|
1533
|
+
appendParam(params, "anchor", options.anchor);
|
|
1534
|
+
try {
|
|
1535
|
+
const res = await this.http.get(
|
|
1536
|
+
withQuery(
|
|
1537
|
+
`${BASE}/papers/${encodeURIComponent(id)}/similar`,
|
|
1538
|
+
params
|
|
1539
|
+
)
|
|
1540
|
+
);
|
|
1541
|
+
if (res.status !== 200) throwForBadResponse(res, "find similar papers");
|
|
1542
|
+
return res.data;
|
|
1543
|
+
} catch (err) {
|
|
1544
|
+
return normalizeResearchError(err, "find similar papers");
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
/**
|
|
1548
|
+
* Search GitHub issue/PR history and repository readmes.
|
|
1549
|
+
* @param query Search query.
|
|
1550
|
+
* @param options Optional `k`.
|
|
1551
|
+
*/
|
|
1552
|
+
async searchGithub(query, options = {}) {
|
|
1553
|
+
if (!query || !query.trim()) throw new Error("query cannot be empty");
|
|
1554
|
+
if (options.k != null && options.k <= 0)
|
|
1555
|
+
throw new Error("k must be positive");
|
|
1556
|
+
const params = new URLSearchParams();
|
|
1557
|
+
appendParam(params, "query", query);
|
|
1558
|
+
appendParam(params, "k", options.k);
|
|
1559
|
+
try {
|
|
1560
|
+
const res = await this.http.get(
|
|
1561
|
+
withQuery(`${BASE}/github`, params)
|
|
1562
|
+
);
|
|
1563
|
+
if (res.status !== 200) throwForBadResponse(res, "search github");
|
|
1564
|
+
return res.data;
|
|
1565
|
+
} catch (err) {
|
|
1566
|
+
return normalizeResearchError(err, "search github");
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
};
|
|
1570
|
+
|
|
1428
1571
|
// src/v2/methods/monitor.ts
|
|
1429
1572
|
function queryString(params) {
|
|
1430
1573
|
if (!params) return "";
|
|
@@ -1790,6 +1933,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
1790
1933
|
var zt = require("zod");
|
|
1791
1934
|
var FirecrawlClient = class {
|
|
1792
1935
|
http;
|
|
1936
|
+
_research;
|
|
1793
1937
|
isCloudService(url) {
|
|
1794
1938
|
return url.includes("api.firecrawl.dev");
|
|
1795
1939
|
}
|
|
@@ -1862,6 +2006,15 @@ var FirecrawlClient = class {
|
|
|
1862
2006
|
async search(query, req = {}) {
|
|
1863
2007
|
return search(this.http, { query, ...req });
|
|
1864
2008
|
}
|
|
2009
|
+
// Research
|
|
2010
|
+
/**
|
|
2011
|
+
* Access the v2 research endpoints (arXiv papers + GitHub history/readmes).
|
|
2012
|
+
* Example: `firecrawl.research.searchPapers("diffusion models")`.
|
|
2013
|
+
*/
|
|
2014
|
+
get research() {
|
|
2015
|
+
if (!this._research) this._research = new ResearchClient(this.http);
|
|
2016
|
+
return this._research;
|
|
2017
|
+
}
|
|
1865
2018
|
// Map
|
|
1866
2019
|
/**
|
|
1867
2020
|
* Map a site to discover URLs (sitemap-aware).
|
|
@@ -3619,6 +3772,7 @@ var index_default = Firecrawl;
|
|
|
3619
3772
|
FirecrawlAppV1,
|
|
3620
3773
|
FirecrawlClient,
|
|
3621
3774
|
JobTimeoutError,
|
|
3775
|
+
ResearchClient,
|
|
3622
3776
|
SdkError,
|
|
3623
3777
|
Watcher
|
|
3624
3778
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -4,7 +4,7 @@ import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
|
|
4
4
|
import { EventEmitter } from 'events';
|
|
5
5
|
import { TypedEventTarget } from 'typescript-event-target';
|
|
6
6
|
|
|
7
|
-
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video"
|
|
7
|
+
type FormatString = "markdown" | "html" | "rawHtml" | "links" | "images" | "screenshot" | "summary" | "changeTracking" | "json" | "attributes" | "branding" | "audio" | "video";
|
|
8
8
|
interface Viewport {
|
|
9
9
|
width: number;
|
|
10
10
|
height: number;
|
|
@@ -165,34 +165,6 @@ interface RedactPIIOptions {
|
|
|
165
165
|
*/
|
|
166
166
|
replaceStyle?: "tag" | "mask" | "remove";
|
|
167
167
|
}
|
|
168
|
-
type PIISource = "model" | "heuristics" | "unknown";
|
|
169
|
-
interface PIISpan {
|
|
170
|
-
start: number;
|
|
171
|
-
end: number;
|
|
172
|
-
/** Unified entity bucket. Omitted when `kind` doesn't map onto one. */
|
|
173
|
-
entity?: RedactPIIEntity;
|
|
174
|
-
/** Granular recognizer label from fire-privacy. */
|
|
175
|
-
kind: string;
|
|
176
|
-
source: PIISource;
|
|
177
|
-
/** Confidence in [0, 1] when supplied. */
|
|
178
|
-
score?: number;
|
|
179
|
-
}
|
|
180
|
-
/**
|
|
181
|
-
* - ok: redaction completed; redactedMarkdown is the result.
|
|
182
|
-
* - skipped: redaction was not performed; see `reason`.
|
|
183
|
-
* - failed: redaction was attempted but did not produce a usable result.
|
|
184
|
-
*/
|
|
185
|
-
type PIIStatus = "ok" | "skipped" | "failed";
|
|
186
|
-
/** Always set when status !== "ok". */
|
|
187
|
-
type PIIReason = "empty_input" | "too_large" | "upstream_skipped" | "service_unavailable" | "timeout" | "error";
|
|
188
|
-
interface PIIBlock {
|
|
189
|
-
status: PIIStatus;
|
|
190
|
-
reason?: PIIReason;
|
|
191
|
-
redactedMarkdown: string | null;
|
|
192
|
-
spans: PIISpan[];
|
|
193
|
-
/** Span count per entity bucket. Only non-zero entries are present. */
|
|
194
|
-
counts: Partial<Record<RedactPIIEntity, number>>;
|
|
195
|
-
}
|
|
196
168
|
type ParseFileData = Blob | File | Buffer | Uint8Array | ArrayBuffer | string;
|
|
197
169
|
interface ParseFile {
|
|
198
170
|
data: ParseFileData;
|
|
@@ -410,7 +382,6 @@ interface Document {
|
|
|
410
382
|
warning?: string;
|
|
411
383
|
changeTracking?: Record<string, unknown>;
|
|
412
384
|
branding?: BrandingProfile;
|
|
413
|
-
pii?: PIIBlock;
|
|
414
385
|
}
|
|
415
386
|
interface PaginationConfig {
|
|
416
387
|
/** When true (default), automatically follow `next` links and aggregate all documents. */
|
|
@@ -918,6 +889,146 @@ interface BrowserListResponse {
|
|
|
918
889
|
sessions?: BrowserSession[];
|
|
919
890
|
error?: string;
|
|
920
891
|
}
|
|
892
|
+
/**
|
|
893
|
+
* Source identifiers grouped by namespace. Currently only `arxiv` is
|
|
894
|
+
* populated; each value is an array of ids in that namespace.
|
|
895
|
+
*/
|
|
896
|
+
type IdMap = Record<string, string[]>;
|
|
897
|
+
/** Per-candidate ranking signals (present on similarity results). */
|
|
898
|
+
interface PaperSignals {
|
|
899
|
+
/** Raw structural strength (co-citation / coupling counts, or seed overlap). */
|
|
900
|
+
structural: number;
|
|
901
|
+
/** Semantic score from the intent abstract search (0 if absent). */
|
|
902
|
+
semantic: number;
|
|
903
|
+
/** Citation-graph PageRank of the candidate. */
|
|
904
|
+
pagerank: number;
|
|
905
|
+
/** Number of distinct seeds connected to this candidate. */
|
|
906
|
+
seed_overlap: number;
|
|
907
|
+
}
|
|
908
|
+
/** A ranked paper. `paper_id` is canonical; arXiv lives in `ids`. */
|
|
909
|
+
interface PaperResult {
|
|
910
|
+
/** Canonical paper id — the Milvus INT64 primary key as a decimal string. */
|
|
911
|
+
paper_id: string;
|
|
912
|
+
ids?: IdMap;
|
|
913
|
+
title: string;
|
|
914
|
+
abstract: string;
|
|
915
|
+
/** Final ranking score (post-rerank when enabled). Not normalized. */
|
|
916
|
+
score: number;
|
|
917
|
+
/** Present on similarity results. */
|
|
918
|
+
signals?: PaperSignals;
|
|
919
|
+
}
|
|
920
|
+
interface PaperMetadata {
|
|
921
|
+
paper_id: string;
|
|
922
|
+
ids?: IdMap;
|
|
923
|
+
title: string;
|
|
924
|
+
abstract: string;
|
|
925
|
+
/** Comma-joined author names. Omitted if unknown. */
|
|
926
|
+
authors?: string;
|
|
927
|
+
/** arXiv categories. Omitted if unknown. */
|
|
928
|
+
categories?: string[];
|
|
929
|
+
/** Original creation date string (format varies). Omitted if unknown. */
|
|
930
|
+
created_date?: string;
|
|
931
|
+
/** Last-updated date string. Omitted if unknown. */
|
|
932
|
+
update_date?: string;
|
|
933
|
+
}
|
|
934
|
+
interface Passage {
|
|
935
|
+
/** In-body passage text (may be markdown, including tables). */
|
|
936
|
+
text: string;
|
|
937
|
+
/** Dense similarity score for the passage. */
|
|
938
|
+
score: number;
|
|
939
|
+
}
|
|
940
|
+
interface SearchPapersResponse {
|
|
941
|
+
results: PaperResult[];
|
|
942
|
+
}
|
|
943
|
+
interface PaperMetadataResponse {
|
|
944
|
+
paper: PaperMetadata;
|
|
945
|
+
}
|
|
946
|
+
interface ReadPaperResponse {
|
|
947
|
+
paper: PaperMetadata;
|
|
948
|
+
/** Resolved canonical paper id (empty string if not found via id-key). */
|
|
949
|
+
paper_id: string;
|
|
950
|
+
/** Echo of the read query. */
|
|
951
|
+
query: string;
|
|
952
|
+
/** Top matching in-body passages. */
|
|
953
|
+
passages: Passage[];
|
|
954
|
+
}
|
|
955
|
+
interface SimilarPapersResponse {
|
|
956
|
+
/** Ranked related papers; each carries `signals`. */
|
|
957
|
+
results: PaperResult[];
|
|
958
|
+
/** Number of resolved candidates considered before truncation to `k`. */
|
|
959
|
+
pool_size: number;
|
|
960
|
+
/** True if more resolved candidates existed than were returned. */
|
|
961
|
+
truncated: boolean;
|
|
962
|
+
/** Human-readable note when no results are produced. */
|
|
963
|
+
note?: string | null;
|
|
964
|
+
}
|
|
965
|
+
/** Component scores; each field is present only when that signal contributed. */
|
|
966
|
+
interface GitHubScoreBreakdown {
|
|
967
|
+
rrf?: number;
|
|
968
|
+
semantic?: number;
|
|
969
|
+
lexical?: number;
|
|
970
|
+
fusion?: number;
|
|
971
|
+
}
|
|
972
|
+
interface GitHubSearchItem {
|
|
973
|
+
resultType: "github_history" | "repo_readme";
|
|
974
|
+
/** `owner/name`. */
|
|
975
|
+
repo: string;
|
|
976
|
+
url: string;
|
|
977
|
+
/** History page type (e.g. `issue`, `pull`). Omitted for readmes. */
|
|
978
|
+
pageType?: string;
|
|
979
|
+
/** Issue/PR number. Omitted for readmes. */
|
|
980
|
+
number?: number;
|
|
981
|
+
/** Number of matched segments/chunks. Omitted when not applicable. */
|
|
982
|
+
segmentCount?: number;
|
|
983
|
+
/** Readme URL (readme results). Omitted otherwise. */
|
|
984
|
+
readmeUrl?: string;
|
|
985
|
+
/** Short matched excerpt. */
|
|
986
|
+
snippet: string;
|
|
987
|
+
/** Full matched content in markdown. Omitted unless available. */
|
|
988
|
+
contentMd?: string;
|
|
989
|
+
scores: GitHubScoreBreakdown;
|
|
990
|
+
}
|
|
991
|
+
interface GitHubSearchResponse {
|
|
992
|
+
results: GitHubSearchItem[];
|
|
993
|
+
}
|
|
994
|
+
/** Options for `research.searchPapers`. */
|
|
995
|
+
interface SearchPapersOptions {
|
|
996
|
+
/** Number of results to return (1–500, default 40). */
|
|
997
|
+
k?: number;
|
|
998
|
+
/** Author substring filter(s); ALL must match (case-insensitive). */
|
|
999
|
+
authors?: string[];
|
|
1000
|
+
/** arXiv category filter(s) (e.g. `cs.LG`); ALL must match. */
|
|
1001
|
+
categories?: string[];
|
|
1002
|
+
/** Inclusive lower bound on created/updated date (ISO `YYYY-MM-DD`). */
|
|
1003
|
+
from?: string;
|
|
1004
|
+
/** Inclusive upper bound on created/updated date (lexicographic). */
|
|
1005
|
+
to?: string;
|
|
1006
|
+
}
|
|
1007
|
+
/** Options for `research.getPaper`. */
|
|
1008
|
+
interface GetPaperOptions {
|
|
1009
|
+
/** When present, switches to read mode and returns in-body passages. */
|
|
1010
|
+
query?: string;
|
|
1011
|
+
/** Passage count (read mode only; 1–50, default 4). Requires `query`. */
|
|
1012
|
+
k?: number;
|
|
1013
|
+
}
|
|
1014
|
+
/** Options for `research.similarPapers`. */
|
|
1015
|
+
interface SimilarPapersOptions {
|
|
1016
|
+
/** Natural-language intent used to semantically rerank candidates. Required. */
|
|
1017
|
+
intent: string;
|
|
1018
|
+
/** Traversal mode (default `similar`). */
|
|
1019
|
+
mode?: "similar" | "citers" | "references";
|
|
1020
|
+
/** Number of related papers to return (1–500, default 40). */
|
|
1021
|
+
k?: number;
|
|
1022
|
+
/** Apply an additional ZeroEntropy rerank over the fused candidates. */
|
|
1023
|
+
rerank?: boolean;
|
|
1024
|
+
/** Additional seed paper reference(s), same format as `id`. */
|
|
1025
|
+
anchor?: string[];
|
|
1026
|
+
}
|
|
1027
|
+
/** Options for `research.searchGithub`. */
|
|
1028
|
+
interface SearchGithubOptions {
|
|
1029
|
+
/** Number of results to return (1–100, default 20). */
|
|
1030
|
+
k?: number;
|
|
1031
|
+
}
|
|
921
1032
|
|
|
922
1033
|
interface HttpClientOptions {
|
|
923
1034
|
apiKey: string;
|
|
@@ -1002,6 +1113,48 @@ declare function listBrowsers(http: HttpClient, args?: {
|
|
|
1002
1113
|
status?: "active" | "destroyed";
|
|
1003
1114
|
}): Promise<BrowserListResponse>;
|
|
1004
1115
|
|
|
1116
|
+
/**
|
|
1117
|
+
* Client for the v2 research endpoints (arXiv papers + GitHub history/readmes).
|
|
1118
|
+
* Accessed via `firecrawl.research`.
|
|
1119
|
+
*/
|
|
1120
|
+
declare class ResearchClient {
|
|
1121
|
+
private readonly http;
|
|
1122
|
+
constructor(http: HttpClient);
|
|
1123
|
+
/**
|
|
1124
|
+
* Search papers by abstract relevance.
|
|
1125
|
+
* @param query Natural-language search query.
|
|
1126
|
+
* @param options Optional filters (k, authors, categories, from, to).
|
|
1127
|
+
*/
|
|
1128
|
+
searchPapers(query: string, options?: SearchPapersOptions): Promise<SearchPapersResponse>;
|
|
1129
|
+
/**
|
|
1130
|
+
* Get paper metadata (detail mode), or read in-body passages (when `query` is
|
|
1131
|
+
* supplied). `k` is only valid together with `query`.
|
|
1132
|
+
* @param id Paper reference: a canonical `paper_id`, an `arxiv:<id>` key, or a
|
|
1133
|
+
* bare arXiv id / URL.
|
|
1134
|
+
* @param options Optional `query` (switches to read mode) and `k`.
|
|
1135
|
+
*/
|
|
1136
|
+
getPaper(id: string, options?: {
|
|
1137
|
+
query?: undefined;
|
|
1138
|
+
k?: undefined;
|
|
1139
|
+
}): Promise<PaperMetadataResponse>;
|
|
1140
|
+
getPaper(id: string, options: {
|
|
1141
|
+
query: string;
|
|
1142
|
+
k?: number;
|
|
1143
|
+
}): Promise<ReadPaperResponse>;
|
|
1144
|
+
/**
|
|
1145
|
+
* Find related papers via the citation graph.
|
|
1146
|
+
* @param id Primary seed paper reference.
|
|
1147
|
+
* @param options Required `intent` plus optional mode, k, rerank, anchor.
|
|
1148
|
+
*/
|
|
1149
|
+
similarPapers(id: string, options: SimilarPapersOptions): Promise<SimilarPapersResponse>;
|
|
1150
|
+
/**
|
|
1151
|
+
* Search GitHub issue/PR history and repository readmes.
|
|
1152
|
+
* @param query Search query.
|
|
1153
|
+
* @param options Optional `k`.
|
|
1154
|
+
*/
|
|
1155
|
+
searchGithub(query: string, options?: SearchGithubOptions): Promise<GitHubSearchResponse>;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1005
1158
|
type JobKind = "crawl" | "batch";
|
|
1006
1159
|
interface WatcherOptions {
|
|
1007
1160
|
kind?: JobKind;
|
|
@@ -1057,6 +1210,7 @@ type FirecrawlClientInput = FirecrawlClientOptions | string;
|
|
|
1057
1210
|
*/
|
|
1058
1211
|
declare class FirecrawlClient {
|
|
1059
1212
|
private readonly http;
|
|
1213
|
+
private _research?;
|
|
1060
1214
|
private isCloudService;
|
|
1061
1215
|
/**
|
|
1062
1216
|
* Create a v2 client.
|
|
@@ -1117,6 +1271,11 @@ declare class FirecrawlClient {
|
|
|
1117
1271
|
* @returns Structured search results.
|
|
1118
1272
|
*/
|
|
1119
1273
|
search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;
|
|
1274
|
+
/**
|
|
1275
|
+
* Access the v2 research endpoints (arXiv papers + GitHub history/readmes).
|
|
1276
|
+
* Example: `firecrawl.research.searchPapers("diffusion models")`.
|
|
1277
|
+
*/
|
|
1278
|
+
get research(): ResearchClient;
|
|
1120
1279
|
/**
|
|
1121
1280
|
* Map a site to discover URLs (sitemap-aware).
|
|
1122
1281
|
* @param url Root URL to map.
|
|
@@ -2280,4 +2439,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
2280
2439
|
get v1(): FirecrawlApp;
|
|
2281
2440
|
}
|
|
2282
2441
|
|
|
2283
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type HighlightsFormat, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type
|
|
2442
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig, type AgentWebhookEvent, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type BrowserCreateResponse, type BrowserDeleteResponse, type BrowserExecuteResponse, type BrowserListResponse, type BrowserSession, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreateMonitorRequest, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientInput, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type GetMonitorCheckOptions, type GetPaperOptions, type GitHubScoreBreakdown, type GitHubSearchItem, type GitHubSearchResponse, type HighlightsFormat, type IdMap, JobTimeoutError, type JsonFormat, type ListMonitorChecksOptions, type ListMonitorsOptions, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type Monitor, type MonitorCheck, type MonitorCheckDetail, type MonitorCheckPage, type MonitorCrawlTarget, type MonitorEmailNotification, type MonitorEmailRecipientSubscription, type MonitorJsonFieldDiff, type MonitorNotification, type MonitorPageDiff, type MonitorPageJudgment, type MonitorPageSnapshot, type MonitorSchedule, type MonitorScrapeTarget, type MonitorSummary, type MonitorTarget, type MonitorWebhookConfig, type PDFAction, type PaginationConfig, type PaperMetadata, type PaperMetadataResponse, type PaperResult, type PaperSignals, type ParseFile, type ParseFileData, type ParseFormat, type ParseFormatOption, type ParseFormatString, type ParseOptions, type Passage, type PressAction, type QueryFormat, type QuestionFormat, type QueueStatusResponse$1 as QueueStatusResponse, type ReadPaperResponse, type RedactPIIEntity, type RedactPIIOptions, ResearchClient, type ScrapeAction, type ScrapeBrowserDeleteResponse, type ScrapeExecuteRequest, type ScrapeExecuteResponse, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchGithubOptions, type SearchPapersOptions, type SearchPapersResponse, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type SimilarPapersOptions, type SimilarPapersResponse, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type UpdateMonitorRequest, type Viewport, type WaitAction, Watcher, type WatcherOptions, type WebhookConfig, type WriteAction, Firecrawl as default };
|