firecrawl 4.25.1 → 4.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ import { describe, test, expect } from "@jest/globals";
2
+ import { ResearchClient } from "../../../v2/methods/research";
3
+ import { SdkError } from "../../../v2/types";
4
+ import type { HttpClient } from "../../../v2/utils/httpClient";
5
+
6
+ /** Build a ResearchClient whose http.get records the requested URL. */
7
+ function makeClient(
8
+ responder: (url: string) => { status: number; data: any } = () => ({
9
+ status: 200,
10
+ data: {},
11
+ }),
12
+ ) {
13
+ const calls: string[] = [];
14
+ const http = {
15
+ get: async (url: string) => {
16
+ calls.push(url);
17
+ return responder(url);
18
+ },
19
+ } as unknown as HttpClient;
20
+ return { client: new ResearchClient(http), calls };
21
+ }
22
+
23
+ /** Make an axios-like error carrying an RFC 7807 Problem body. */
24
+ function problemError(status: number, body: any) {
25
+ return { isAxiosError: true, response: { status, data: body }, message: "req failed" };
26
+ }
27
+
28
+ describe("research.searchPapers", () => {
29
+ test("builds query string with explode arrays", async () => {
30
+ const { client, calls } = makeClient(() => ({
31
+ status: 200,
32
+ data: { results: [] },
33
+ }));
34
+ await client.searchPapers("diffusion models", {
35
+ k: 10,
36
+ authors: ["Ho", "Abbeel"],
37
+ categories: ["cs.LG", "stat.ML"],
38
+ from: "2020-01-01",
39
+ to: "2024-12-31",
40
+ });
41
+ const url = calls[0];
42
+ expect(url.startsWith("/v2/research/papers?")).toBe(true);
43
+ const qs = new URLSearchParams(url.split("?")[1]);
44
+ expect(qs.get("query")).toBe("diffusion models");
45
+ expect(qs.get("k")).toBe("10");
46
+ expect(qs.getAll("authors")).toEqual(["Ho", "Abbeel"]);
47
+ expect(qs.getAll("categories")).toEqual(["cs.LG", "stat.ML"]);
48
+ expect(qs.get("from")).toBe("2020-01-01");
49
+ expect(qs.get("to")).toBe("2024-12-31");
50
+ });
51
+
52
+ test("omits absent options", async () => {
53
+ const { client, calls } = makeClient(() => ({ status: 200, data: { results: [] } }));
54
+ await client.searchPapers("q");
55
+ const qs = new URLSearchParams(calls[0].split("?")[1]);
56
+ expect([...qs.keys()]).toEqual(["query"]);
57
+ });
58
+
59
+ test("rejects empty query", async () => {
60
+ const { client } = makeClient();
61
+ await expect(client.searchPapers(" ")).rejects.toThrow(/query cannot be empty/i);
62
+ });
63
+
64
+ test("rejects non-positive k", async () => {
65
+ const { client } = makeClient();
66
+ await expect(client.searchPapers("q", { k: 0 })).rejects.toThrow(/k must be positive/i);
67
+ });
68
+
69
+ test("returns the response body verbatim", async () => {
70
+ const payload = { results: [{ paper_id: "1", title: "t", abstract: "a", score: 0.1 }] };
71
+ const { client } = makeClient(() => ({ status: 200, data: payload }));
72
+ await expect(client.searchPapers("q")).resolves.toEqual(payload);
73
+ });
74
+ });
75
+
76
+ describe("research.getPaper", () => {
77
+ test("detail mode encodes the id and sends no query params", async () => {
78
+ const { client, calls } = makeClient(() => ({ status: 200, data: { paper: {} } }));
79
+ await client.getPaper("arxiv:2105.05233");
80
+ expect(calls[0]).toBe("/v2/research/papers/arxiv%3A2105.05233");
81
+ });
82
+
83
+ test("read mode adds query and k", async () => {
84
+ const { client, calls } = makeClient(() => ({
85
+ status: 200,
86
+ data: { paper: {}, paper_id: "1", query: "q", passages: [] },
87
+ }));
88
+ await client.getPaper("123", { query: "noise schedule", k: 4 });
89
+ const [path, query] = calls[0].split("?");
90
+ expect(path).toBe("/v2/research/papers/123");
91
+ const qs = new URLSearchParams(query);
92
+ expect(qs.get("query")).toBe("noise schedule");
93
+ expect(qs.get("k")).toBe("4");
94
+ });
95
+
96
+ test("rejects k without query", async () => {
97
+ const { client } = makeClient();
98
+ await expect(client.getPaper("123", { k: 4 } as any)).rejects.toThrow(
99
+ /k is only valid together with query/i,
100
+ );
101
+ });
102
+ });
103
+
104
+ describe("research.similarPapers", () => {
105
+ test("requires intent", async () => {
106
+ const { client } = makeClient();
107
+ await expect(
108
+ client.similarPapers("123", { intent: "" }),
109
+ ).rejects.toThrow(/intent cannot be empty/i);
110
+ });
111
+
112
+ test("builds path and query with repeated anchors and rerank", async () => {
113
+ const { client, calls } = makeClient(() => ({
114
+ status: 200,
115
+ data: { results: [], pool_size: 0, truncated: false },
116
+ }));
117
+ await client.similarPapers("2105.05233", {
118
+ intent: "diffusion image synthesis",
119
+ mode: "citers",
120
+ k: 20,
121
+ rerank: false,
122
+ anchor: ["arxiv:2006.11239", "1503.03585"],
123
+ });
124
+ const [path, query] = calls[0].split("?");
125
+ expect(path).toBe("/v2/research/papers/2105.05233/similar");
126
+ const qs = new URLSearchParams(query);
127
+ expect(qs.get("intent")).toBe("diffusion image synthesis");
128
+ expect(qs.get("mode")).toBe("citers");
129
+ expect(qs.get("k")).toBe("20");
130
+ expect(qs.get("rerank")).toBe("false");
131
+ expect(qs.getAll("anchor")).toEqual(["arxiv:2006.11239", "1503.03585"]);
132
+ });
133
+ });
134
+
135
+ describe("research.searchGithub", () => {
136
+ test("builds query string", async () => {
137
+ const { client, calls } = makeClient(() => ({ status: 200, data: { results: [] } }));
138
+ await client.searchGithub("milvus hybrid search", { k: 10 });
139
+ const qs = new URLSearchParams(calls[0].split("?")[1]);
140
+ expect(calls[0].startsWith("/v2/research/github?")).toBe(true);
141
+ expect(qs.get("query")).toBe("milvus hybrid search");
142
+ expect(qs.get("k")).toBe("10");
143
+ });
144
+ });
145
+
146
+ describe("research error mapping", () => {
147
+ test("maps RFC 7807 Problem detail to SdkError", async () => {
148
+ const { client } = makeClient(() => {
149
+ throw problemError(400, {
150
+ type: "urn:search-pipeline:invalid_request",
151
+ title: "Bad Request",
152
+ status: 400,
153
+ detail: "query is required",
154
+ });
155
+ });
156
+ await expect(client.searchPapers("q")).rejects.toMatchObject({
157
+ message: "query is required",
158
+ status: 400,
159
+ } as Partial<SdkError>);
160
+ });
161
+
162
+ test("falls back to title when detail is absent", async () => {
163
+ const { client } = makeClient(() => {
164
+ throw problemError(404, { title: "Not Found", status: 404 });
165
+ });
166
+ await expect(client.getPaper("999")).rejects.toThrow(/Not Found/);
167
+ });
168
+ });
package/src/index.ts CHANGED
@@ -11,6 +11,8 @@ export { FirecrawlClient } from "./v2/client";
11
11
  export * from "./v2/types";
12
12
  /** Watcher class and options for crawl/batch job monitoring. */
13
13
  export { Watcher, type WatcherOptions } from "./v2/watcher";
14
+ /** Research sub-client (accessed via `firecrawl.research`). */
15
+ export { ResearchClient } from "./v2/methods/research";
14
16
  /** Legacy v1 client (feature‑frozen). */
15
17
  export { default as FirecrawlAppV1 } from "./v1";
16
18
 
package/src/v2/client.ts CHANGED
@@ -32,6 +32,7 @@ import {
32
32
  listBrowsers,
33
33
  } from "./methods/browser";
34
34
  import { getConcurrency, getCreditUsage, getQueueStatus, getTokenUsage, getCreditUsageHistorical, getTokenUsageHistorical } from "./methods/usage";
35
+ import { ResearchClient } from "./methods/research";
35
36
  import {
36
37
  createMonitor as createMonitorMethod,
37
38
  deleteMonitor as deleteMonitorMethod,
@@ -119,6 +120,7 @@ export type FirecrawlClientInput = FirecrawlClientOptions | string;
119
120
 
120
121
  export class FirecrawlClient {
121
122
  private readonly http: HttpClient;
123
+ private _research?: ResearchClient;
122
124
 
123
125
  private isCloudService(url: string): boolean {
124
126
  return url.includes('api.firecrawl.dev');
@@ -234,6 +236,16 @@ export class FirecrawlClient {
234
236
  return search(this.http, { query, ...req });
235
237
  }
236
238
 
239
+ // Research
240
+ /**
241
+ * Access the v2 research endpoints (arXiv papers + GitHub history/readmes).
242
+ * Example: `firecrawl.research.searchPapers("diffusion models")`.
243
+ */
244
+ get research(): ResearchClient {
245
+ if (!this._research) this._research = new ResearchClient(this.http);
246
+ return this._research;
247
+ }
248
+
237
249
  // Map
238
250
  /**
239
251
  * Map a site to discover URLs (sitemap-aware).
@@ -0,0 +1,195 @@
1
+ import type {
2
+ SearchPapersOptions,
3
+ SearchPapersResponse,
4
+ GetPaperOptions,
5
+ PaperMetadataResponse,
6
+ ReadPaperResponse,
7
+ SimilarPapersOptions,
8
+ SimilarPapersResponse,
9
+ SearchGithubOptions,
10
+ GitHubSearchResponse,
11
+ } from "../types";
12
+ import { SdkError } from "../types";
13
+ import { HttpClient } from "../utils/httpClient";
14
+ import { throwForBadResponse } from "../utils/errorHandler";
15
+
16
+ const BASE = "/v2/research";
17
+
18
+ /** Append a value (or repeated array values) to a URLSearchParams instance. */
19
+ function appendParam(
20
+ params: URLSearchParams,
21
+ key: string,
22
+ value: string | number | boolean | string[] | undefined,
23
+ ): void {
24
+ if (value == null) return;
25
+ if (Array.isArray(value)) {
26
+ for (const v of value) {
27
+ if (v != null && String(v).length > 0) params.append(key, String(v));
28
+ }
29
+ } else {
30
+ params.append(key, String(value));
31
+ }
32
+ }
33
+
34
+ function withQuery(path: string, params: URLSearchParams): string {
35
+ const qs = params.toString();
36
+ return qs ? `${path}?${qs}` : path;
37
+ }
38
+
39
+ /**
40
+ * Translate the RFC 7807 Problem body returned by the research service into an
41
+ * SdkError. Falls back to the generic axios normalization otherwise.
42
+ */
43
+ function normalizeResearchError(err: any, action: string): never {
44
+ if (err?.isAxiosError) {
45
+ const status: number | undefined = err.response?.status;
46
+ const body: any = err.response?.data;
47
+ if (body && (body.detail || body.title)) {
48
+ const message = body.detail || body.title;
49
+ throw new SdkError(message, status, body.type, body);
50
+ }
51
+ throw new SdkError(
52
+ err.message || `Request failed while trying to ${action}`,
53
+ status,
54
+ err.code,
55
+ body,
56
+ );
57
+ }
58
+ throw err;
59
+ }
60
+
61
+ /**
62
+ * Client for the v2 research endpoints (arXiv papers + GitHub history/readmes).
63
+ * Accessed via `firecrawl.research`.
64
+ */
65
+ export class ResearchClient {
66
+ constructor(private readonly http: HttpClient) {}
67
+
68
+ /**
69
+ * Search papers by abstract relevance.
70
+ * @param query Natural-language search query.
71
+ * @param options Optional filters (k, authors, categories, from, to).
72
+ */
73
+ async searchPapers(
74
+ query: string,
75
+ options: SearchPapersOptions = {},
76
+ ): Promise<SearchPapersResponse> {
77
+ if (!query || !query.trim()) throw new Error("query cannot be empty");
78
+ if (options.k != null && options.k <= 0)
79
+ throw new Error("k must be positive");
80
+ const params = new URLSearchParams();
81
+ appendParam(params, "query", query);
82
+ appendParam(params, "k", options.k);
83
+ appendParam(params, "authors", options.authors);
84
+ appendParam(params, "categories", options.categories);
85
+ appendParam(params, "from", options.from);
86
+ appendParam(params, "to", options.to);
87
+ try {
88
+ const res = await this.http.get<SearchPapersResponse>(
89
+ withQuery(`${BASE}/papers`, params),
90
+ );
91
+ if (res.status !== 200) throwForBadResponse(res, "search papers");
92
+ return res.data;
93
+ } catch (err) {
94
+ return normalizeResearchError(err, "search papers");
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Get paper metadata (detail mode), or read in-body passages (when `query` is
100
+ * supplied). `k` is only valid together with `query`.
101
+ * @param id Paper reference: a canonical `paper_id`, an `arxiv:<id>` key, or a
102
+ * bare arXiv id / URL.
103
+ * @param options Optional `query` (switches to read mode) and `k`.
104
+ */
105
+ async getPaper(
106
+ id: string,
107
+ options?: { query?: undefined; k?: undefined },
108
+ ): Promise<PaperMetadataResponse>;
109
+ async getPaper(
110
+ id: string,
111
+ options: { query: string; k?: number },
112
+ ): Promise<ReadPaperResponse>;
113
+ async getPaper(
114
+ id: string,
115
+ options: GetPaperOptions = {},
116
+ ): Promise<PaperMetadataResponse | ReadPaperResponse> {
117
+ if (!id || !id.trim()) throw new Error("id cannot be empty");
118
+ if (options.k != null && options.query == null)
119
+ throw new Error("k is only valid together with query");
120
+ if (options.k != null && options.k <= 0)
121
+ throw new Error("k must be positive");
122
+ const params = new URLSearchParams();
123
+ appendParam(params, "query", options.query);
124
+ appendParam(params, "k", options.k);
125
+ try {
126
+ const res = await this.http.get<PaperMetadataResponse | ReadPaperResponse>(
127
+ withQuery(`${BASE}/papers/${encodeURIComponent(id)}`, params),
128
+ );
129
+ if (res.status !== 200) throwForBadResponse(res, "get paper");
130
+ return res.data;
131
+ } catch (err) {
132
+ return normalizeResearchError(err, "get paper");
133
+ }
134
+ }
135
+
136
+ /**
137
+ * Find related papers via the citation graph.
138
+ * @param id Primary seed paper reference.
139
+ * @param options Required `intent` plus optional mode, k, rerank, anchor.
140
+ */
141
+ async similarPapers(
142
+ id: string,
143
+ options: SimilarPapersOptions,
144
+ ): Promise<SimilarPapersResponse> {
145
+ if (!id || !id.trim()) throw new Error("id cannot be empty");
146
+ if (!options?.intent || !options.intent.trim())
147
+ throw new Error("intent cannot be empty");
148
+ if (options.k != null && options.k <= 0)
149
+ throw new Error("k must be positive");
150
+ const params = new URLSearchParams();
151
+ appendParam(params, "intent", options.intent);
152
+ appendParam(params, "mode", options.mode);
153
+ appendParam(params, "k", options.k);
154
+ if (options.rerank != null) appendParam(params, "rerank", options.rerank);
155
+ appendParam(params, "anchor", options.anchor);
156
+ try {
157
+ const res = await this.http.get<SimilarPapersResponse>(
158
+ withQuery(
159
+ `${BASE}/papers/${encodeURIComponent(id)}/similar`,
160
+ params,
161
+ ),
162
+ );
163
+ if (res.status !== 200) throwForBadResponse(res, "find similar papers");
164
+ return res.data;
165
+ } catch (err) {
166
+ return normalizeResearchError(err, "find similar papers");
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Search GitHub issue/PR history and repository readmes.
172
+ * @param query Search query.
173
+ * @param options Optional `k`.
174
+ */
175
+ async searchGithub(
176
+ query: string,
177
+ options: SearchGithubOptions = {},
178
+ ): Promise<GitHubSearchResponse> {
179
+ if (!query || !query.trim()) throw new Error("query cannot be empty");
180
+ if (options.k != null && options.k <= 0)
181
+ throw new Error("k must be positive");
182
+ const params = new URLSearchParams();
183
+ appendParam(params, "query", query);
184
+ appendParam(params, "k", options.k);
185
+ try {
186
+ const res = await this.http.get<GitHubSearchResponse>(
187
+ withQuery(`${BASE}/github`, params),
188
+ );
189
+ if (res.status !== 200) throwForBadResponse(res, "search github");
190
+ return res.data;
191
+ } catch (err) {
192
+ return normalizeResearchError(err, "search github");
193
+ }
194
+ }
195
+ }
package/src/v2/types.ts CHANGED
@@ -14,7 +14,8 @@ export type FormatString =
14
14
  | "attributes"
15
15
  | "branding"
16
16
  | "audio"
17
- | "video";
17
+ | "video"
18
+ | "pii";
18
19
 
19
20
  export interface Viewport {
20
21
  width: number;
@@ -205,6 +206,7 @@ export interface ScrapeOptions {
205
206
  minAge?: number;
206
207
  storeInCache?: boolean;
207
208
  lockdown?: boolean;
209
+ redactPII?: boolean | RedactPIIOptions;
208
210
  profile?: {
209
211
  name: string;
210
212
  saveChanges?: boolean;
@@ -213,6 +215,70 @@ export interface ScrapeOptions {
213
215
  origin?: string;
214
216
  }
215
217
 
218
+ export type RedactPIIEntity =
219
+ | "PERSON"
220
+ | "EMAIL"
221
+ | "PHONE"
222
+ | "LOCATION"
223
+ | "FINANCIAL"
224
+ | "SECRET";
225
+
226
+ export interface RedactPIIOptions {
227
+ /**
228
+ * accurate (default): model-only redaction. Best precision, cleanest output.
229
+ * aggressive: model + Presidio + spaCy. Higher recall at the cost of precision.
230
+ * fast: Presidio only, no model call. Lower F1, ~2x throughput.
231
+ */
232
+ mode?: "accurate" | "aggressive" | "fast";
233
+ /** Restrict redaction to these entity buckets. Unset means all entities. */
234
+ entities?: RedactPIIEntity[];
235
+ /**
236
+ * tag (default): replace spans with `<KIND>` placeholders.
237
+ * mask: replace spans with `*` of equal length.
238
+ * remove: drop span characters entirely.
239
+ */
240
+ replaceStyle?: "tag" | "mask" | "remove";
241
+ }
242
+
243
+ export type PIISource = "model" | "heuristics" | "unknown";
244
+
245
+ export interface PIISpan {
246
+ start: number;
247
+ end: number;
248
+ /** Unified entity bucket. Omitted when `kind` doesn't map onto one. */
249
+ entity?: RedactPIIEntity;
250
+ /** Granular recognizer label from fire-privacy. */
251
+ kind: string;
252
+ source: PIISource;
253
+ /** Confidence in [0, 1] when supplied. */
254
+ score?: number;
255
+ }
256
+
257
+ /**
258
+ * - ok: redaction completed; redactedMarkdown is the result.
259
+ * - skipped: redaction was not performed; see `reason`.
260
+ * - failed: redaction was attempted but did not produce a usable result.
261
+ */
262
+ export type PIIStatus = "ok" | "skipped" | "failed";
263
+
264
+ /** Always set when status !== "ok". */
265
+ export type PIIReason =
266
+ | "empty_input"
267
+ | "too_large"
268
+ | "upstream_skipped"
269
+ | "service_unavailable"
270
+ | "timeout"
271
+ | "error";
272
+
273
+ export interface PIIBlock {
274
+ status: PIIStatus;
275
+ reason?: PIIReason;
276
+ redactedMarkdown: string | null;
277
+ spans: PIISpan[];
278
+ /** Span count per entity bucket. Only non-zero entries are present. */
279
+ counts: Partial<Record<RedactPIIEntity, number>>;
280
+ }
281
+
216
282
  export type ParseFileData =
217
283
  | Blob
218
284
  | File
@@ -483,6 +549,7 @@ export interface Document {
483
549
  warning?: string;
484
550
  changeTracking?: Record<string, unknown>;
485
551
  branding?: BrandingProfile;
552
+ pii?: PIIBlock;
486
553
  }
487
554
 
488
555
  // Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL
@@ -660,6 +727,25 @@ export interface MonitorEmailNotification {
660
727
  includeDiffs?: boolean;
661
728
  }
662
729
 
730
+ /**
731
+ * Per-recipient opt-in state for monitor email notifications.
732
+ *
733
+ * External recipients (not members of the team that owns the monitor) must
734
+ * confirm their subscription via a one-time email before they receive any
735
+ * monitor notifications. Team members are auto-confirmed.
736
+ *
737
+ * - `pending` → confirmation email sent, no notifications yet
738
+ * - `confirmed` → notifications enabled
739
+ * - `unsubscribed` → recipient opted out and cannot be re-added without a new
740
+ * confirmation flow
741
+ */
742
+ export interface MonitorEmailRecipientSubscription {
743
+ email: string;
744
+ status: "pending" | "confirmed" | "unsubscribed";
745
+ source: "team" | "opt_in" | "legacy";
746
+ confirmationEmailSent?: boolean;
747
+ }
748
+
663
749
  export interface MonitorNotification {
664
750
  email?: MonitorEmailNotification;
665
751
  }
@@ -731,6 +817,13 @@ export interface Monitor {
731
817
  targets: MonitorTarget[];
732
818
  webhook?: MonitorWebhookConfig | null;
733
819
  notification?: MonitorNotification | null;
820
+ /**
821
+ * Present on create/update/get responses. Reflects the opt-in state of every
822
+ * email recipient currently configured on the monitor. Absent when the API
823
+ * has not reconciled recipients (e.g. team-default delivery with no
824
+ * explicit recipients).
825
+ */
826
+ emailRecipientSubscriptions?: MonitorEmailRecipientSubscription[];
734
827
  retentionDays: number;
735
828
  estimatedCreditsPerMonth?: number | null;
736
829
  lastCheckSummary?: MonitorSummary | null;
@@ -1062,3 +1155,161 @@ export interface BrowserListResponse {
1062
1155
  sessions?: BrowserSession[];
1063
1156
  error?: string;
1064
1157
  }
1158
+
1159
+ // ---------- Research (v2) ----------
1160
+
1161
+ /**
1162
+ * Source identifiers grouped by namespace. Currently only `arxiv` is
1163
+ * populated; each value is an array of ids in that namespace.
1164
+ */
1165
+ export type IdMap = Record<string, string[]>;
1166
+
1167
+ /** Per-candidate ranking signals (present on similarity results). */
1168
+ export interface PaperSignals {
1169
+ /** Raw structural strength (co-citation / coupling counts, or seed overlap). */
1170
+ structural: number;
1171
+ /** Semantic score from the intent abstract search (0 if absent). */
1172
+ semantic: number;
1173
+ /** Citation-graph PageRank of the candidate. */
1174
+ pagerank: number;
1175
+ /** Number of distinct seeds connected to this candidate. */
1176
+ seed_overlap: number;
1177
+ }
1178
+
1179
+ /** A ranked paper. `paper_id` is canonical; arXiv lives in `ids`. */
1180
+ export interface PaperResult {
1181
+ /** Canonical paper id — the Milvus INT64 primary key as a decimal string. */
1182
+ paper_id: string;
1183
+ ids?: IdMap;
1184
+ title: string;
1185
+ abstract: string;
1186
+ /** Final ranking score (post-rerank when enabled). Not normalized. */
1187
+ score: number;
1188
+ /** Present on similarity results. */
1189
+ signals?: PaperSignals;
1190
+ }
1191
+
1192
+ export interface PaperMetadata {
1193
+ paper_id: string;
1194
+ ids?: IdMap;
1195
+ title: string;
1196
+ abstract: string;
1197
+ /** Comma-joined author names. Omitted if unknown. */
1198
+ authors?: string;
1199
+ /** arXiv categories. Omitted if unknown. */
1200
+ categories?: string[];
1201
+ /** Original creation date string (format varies). Omitted if unknown. */
1202
+ created_date?: string;
1203
+ /** Last-updated date string. Omitted if unknown. */
1204
+ update_date?: string;
1205
+ }
1206
+
1207
+ export interface Passage {
1208
+ /** In-body passage text (may be markdown, including tables). */
1209
+ text: string;
1210
+ /** Dense similarity score for the passage. */
1211
+ score: number;
1212
+ }
1213
+
1214
+ export interface SearchPapersResponse {
1215
+ results: PaperResult[];
1216
+ }
1217
+
1218
+ export interface PaperMetadataResponse {
1219
+ paper: PaperMetadata;
1220
+ }
1221
+
1222
+ export interface ReadPaperResponse {
1223
+ paper: PaperMetadata;
1224
+ /** Resolved canonical paper id (empty string if not found via id-key). */
1225
+ paper_id: string;
1226
+ /** Echo of the read query. */
1227
+ query: string;
1228
+ /** Top matching in-body passages. */
1229
+ passages: Passage[];
1230
+ }
1231
+
1232
+ export interface SimilarPapersResponse {
1233
+ /** Ranked related papers; each carries `signals`. */
1234
+ results: PaperResult[];
1235
+ /** Number of resolved candidates considered before truncation to `k`. */
1236
+ pool_size: number;
1237
+ /** True if more resolved candidates existed than were returned. */
1238
+ truncated: boolean;
1239
+ /** Human-readable note when no results are produced. */
1240
+ note?: string | null;
1241
+ }
1242
+
1243
+ /** Component scores; each field is present only when that signal contributed. */
1244
+ export interface GitHubScoreBreakdown {
1245
+ rrf?: number;
1246
+ semantic?: number;
1247
+ lexical?: number;
1248
+ fusion?: number;
1249
+ }
1250
+
1251
+ export interface GitHubSearchItem {
1252
+ resultType: "github_history" | "repo_readme";
1253
+ /** `owner/name`. */
1254
+ repo: string;
1255
+ url: string;
1256
+ /** History page type (e.g. `issue`, `pull`). Omitted for readmes. */
1257
+ pageType?: string;
1258
+ /** Issue/PR number. Omitted for readmes. */
1259
+ number?: number;
1260
+ /** Number of matched segments/chunks. Omitted when not applicable. */
1261
+ segmentCount?: number;
1262
+ /** Readme URL (readme results). Omitted otherwise. */
1263
+ readmeUrl?: string;
1264
+ /** Short matched excerpt. */
1265
+ snippet: string;
1266
+ /** Full matched content in markdown. Omitted unless available. */
1267
+ contentMd?: string;
1268
+ scores: GitHubScoreBreakdown;
1269
+ }
1270
+
1271
+ export interface GitHubSearchResponse {
1272
+ results: GitHubSearchItem[];
1273
+ }
1274
+
1275
+ /** Options for `research.searchPapers`. */
1276
+ export interface SearchPapersOptions {
1277
+ /** Number of results to return (1–500, default 40). */
1278
+ k?: number;
1279
+ /** Author substring filter(s); ALL must match (case-insensitive). */
1280
+ authors?: string[];
1281
+ /** arXiv category filter(s) (e.g. `cs.LG`); ALL must match. */
1282
+ categories?: string[];
1283
+ /** Inclusive lower bound on created/updated date (ISO `YYYY-MM-DD`). */
1284
+ from?: string;
1285
+ /** Inclusive upper bound on created/updated date (lexicographic). */
1286
+ to?: string;
1287
+ }
1288
+
1289
+ /** Options for `research.getPaper`. */
1290
+ export interface GetPaperOptions {
1291
+ /** When present, switches to read mode and returns in-body passages. */
1292
+ query?: string;
1293
+ /** Passage count (read mode only; 1–50, default 4). Requires `query`. */
1294
+ k?: number;
1295
+ }
1296
+
1297
+ /** Options for `research.similarPapers`. */
1298
+ export interface SimilarPapersOptions {
1299
+ /** Natural-language intent used to semantically rerank candidates. Required. */
1300
+ intent: string;
1301
+ /** Traversal mode (default `similar`). */
1302
+ mode?: "similar" | "citers" | "references";
1303
+ /** Number of related papers to return (1–500, default 40). */
1304
+ k?: number;
1305
+ /** Apply an additional ZeroEntropy rerank over the fused candidates. */
1306
+ rerank?: boolean;
1307
+ /** Additional seed paper reference(s), same format as `id`. */
1308
+ anchor?: string[];
1309
+ }
1310
+
1311
+ /** Options for `research.searchGithub`. */
1312
+ export interface SearchGithubOptions {
1313
+ /** Number of results to return (1–100, default 20). */
1314
+ k?: number;
1315
+ }