@fbraza/pi-cite 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import { Text } from "@earendil-works/pi-tui";
2
+ import type { PaperRecord } from "./types.ts";
3
+
4
+ export const MAX_STREAMED_PAPERS_PER_QUERY = 5;
5
+ export const MAX_FINAL_MERGED_PAPERS = 20;
6
+
7
+ type ThemeLike = {
8
+ fg?: (color: string, text: string) => string;
9
+ bold?: (text: string) => string;
10
+ };
11
+
12
+ export type CompactPaperForDisplay = {
13
+ first_author: string;
14
+ title: string;
15
+ id: string;
16
+ source: string;
17
+ year?: number;
18
+ journal?: string;
19
+ citation_count?: number;
20
+ };
21
+
22
+ export type LiteratureSearchDisplayEvent =
23
+ | { phase: "start" }
24
+ | {
25
+ phase: "query_start";
26
+ provider: "pubmed" | "semantic_scholar";
27
+ query_index: number;
28
+ query: string;
29
+ }
30
+ | {
31
+ phase: "query_results";
32
+ provider: "pubmed" | "semantic_scholar";
33
+ query_index: number;
34
+ query: string;
35
+ count: number;
36
+ papers: CompactPaperForDisplay[];
37
+ }
38
+ | {
39
+ phase: "query_error";
40
+ provider: "pubmed" | "semantic_scholar";
41
+ query_index: number;
42
+ query: string;
43
+ error: string;
44
+ }
45
+ | { phase: "dedupe" }
46
+ | { phase: "complete"; count: number; papers: CompactPaperForDisplay[] };
47
+
48
+ export type LiteratureSearchDisplaySearch = {
49
+ provider: "pubmed" | "semantic_scholar";
50
+ query_index: number;
51
+ query: string;
52
+ count: number;
53
+ papers: CompactPaperForDisplay[];
54
+ };
55
+
56
+ function terminalText(text: string): Text {
57
+ return new Text(text, 0, 0);
58
+ }
59
+
60
+ function color(theme: ThemeLike | undefined, colorName: string, text: string): string {
61
+ try {
62
+ return theme?.fg ? theme.fg(colorName, text) : text;
63
+ } catch {
64
+ return text;
65
+ }
66
+ }
67
+
68
+ function bold(theme: ThemeLike | undefined, text: string): string {
69
+ try {
70
+ return theme?.bold ? theme.bold(text) : text;
71
+ } catch {
72
+ return text;
73
+ }
74
+ }
75
+
76
+ export function truncateText(value: unknown, maxLength: number): string {
77
+ const text = String(value ?? "").replace(/\s+/g, " ").trim();
78
+ if (text.length <= maxLength) return text;
79
+ return `${text.slice(0, Math.max(0, maxLength - 1)).trimEnd()}…`;
80
+ }
81
+
82
+ export function padText(value: unknown, width: number): string {
83
+ const text = truncateText(value, width);
84
+ return text + " ".repeat(Math.max(0, width - text.length));
85
+ }
86
+
87
+ function authorSurname(author: string): string {
88
+ const cleaned = author.trim();
89
+ if (!cleaned) return "Unknown";
90
+ const parts = cleaned.split(/\s+/);
91
+ return parts.length > 1 ? parts[parts.length - 1] : cleaned;
92
+ }
93
+
94
+ export function firstAuthor(paper: PaperRecord): string {
95
+ const authors = paper.authors ?? [];
96
+ if (authors.length === 0) return "Unknown";
97
+ return authorSurname(authors[0]);
98
+ }
99
+
100
+ export function authorRange(paper: PaperRecord): string {
101
+ const authors = paper.authors ?? [];
102
+ if (authors.length === 0) return "Unknown";
103
+ if (authors.length === 1) return authorSurname(authors[0]);
104
+ return `${authorSurname(authors[0])}→${authorSurname(authors[authors.length - 1])}`;
105
+ }
106
+
107
+ export function paperIdentifier(paper: PaperRecord): string {
108
+ if (paper.doi) return `DOI:${paper.doi}`;
109
+ if (paper.pmid) return `PMID:${paper.pmid}`;
110
+ if (paper.s2_id) return `S2:${paper.s2_id}`;
111
+ return "—";
112
+ }
113
+
114
+ export function sourceLabel(paper: PaperRecord): string {
115
+ const sources = new Set(
116
+ [
117
+ ...(paper.sources ?? []),
118
+ ...(paper.source ? paper.source.split(";") : []),
119
+ ]
120
+ .map((source) => source.trim())
121
+ .filter(Boolean),
122
+ );
123
+ const hasPubmed = sources.has("pubmed");
124
+ const hasS2 = sources.has("semantic_scholar");
125
+ if (hasPubmed && hasS2) return "PM+S2";
126
+ if (hasPubmed) return "PM";
127
+ if (hasS2) return "S2";
128
+ return paper.source ?? "—";
129
+ }
130
+
131
+ export function compactPaperForDisplay(paper: PaperRecord): CompactPaperForDisplay {
132
+ return {
133
+ first_author: firstAuthor(paper),
134
+ title: paper.title,
135
+ id: paperIdentifier(paper),
136
+ source: sourceLabel(paper),
137
+ year: paper.year,
138
+ journal: paper.journal,
139
+ citation_count: paper.citation_count,
140
+ };
141
+ }
142
+
143
+ export function compactPapersForDisplay(papers: PaperRecord[]): CompactPaperForDisplay[] {
144
+ return papers.map(compactPaperForDisplay);
145
+ }
146
+
147
+ function providerLabel(provider: "pubmed" | "semantic_scholar"): string {
148
+ return provider === "pubmed" ? "PubMed" : "Semantic Scholar";
149
+ }
150
+
151
+ function providerColor(provider: "pubmed" | "semantic_scholar"): string {
152
+ return provider === "pubmed" ? "success" : "accent";
153
+ }
154
+
155
+ export function formatFoundLine(
156
+ paper: CompactPaperForDisplay,
157
+ theme?: ThemeLike,
158
+ ): string {
159
+ const author = padText(paper.first_author, 10);
160
+ const title = padText(paper.title, 62);
161
+ const id = padText(paper.id, 28);
162
+ return ` ${color(theme, "success", "✓ found:")} ${author} ${title} ${color(theme, "muted", id)}`;
163
+ }
164
+
165
+ export function formatMergedLine(
166
+ paper: CompactPaperForDisplay,
167
+ index: number,
168
+ theme?: ThemeLike,
169
+ ): string {
170
+ const title = truncateText(paper.title, 72);
171
+ const source = color(theme, paper.source.includes("S2") ? "accent" : "success", `(${paper.source})`);
172
+ return ` ${color(theme, "success", "+")} ${index + 1}. ${title} ${source}`;
173
+ }
174
+
175
+ function renderEvent(
176
+ event: LiteratureSearchDisplayEvent,
177
+ theme?: ThemeLike,
178
+ ): string[] {
179
+ if (event.phase === "start") {
180
+ return [`${color(theme, "accent", "●")} ${color(theme, "toolTitle", "literature_search")} starting`];
181
+ }
182
+ if (event.phase === "query_start") {
183
+ return [
184
+ `${color(theme, providerColor(event.provider), "→")} ${color(theme, providerColor(event.provider), providerLabel(event.provider))} q${event.query_index}: ${event.query}`,
185
+ ];
186
+ }
187
+ if (event.phase === "query_results") {
188
+ const lines = event.papers
189
+ .slice(0, MAX_STREAMED_PAPERS_PER_QUERY)
190
+ .map((paper) => formatFoundLine(paper, theme));
191
+ const hidden = event.count - Math.min(event.count, MAX_STREAMED_PAPERS_PER_QUERY);
192
+ if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more candidate papers`);
193
+ if (event.count === 0) lines.push(` ${color(theme, "muted", "no candidate papers found")}`);
194
+ return lines;
195
+ }
196
+ if (event.phase === "query_error") {
197
+ return [
198
+ ` ${color(theme, "error", "! failed:")} ${providerLabel(event.provider)} q${event.query_index}: ${truncateText(event.error, 96)}`,
199
+ ];
200
+ }
201
+ if (event.phase === "dedupe") {
202
+ return [`${color(theme, "warning", "→")} deduplicating by DOI / PMID / title-year`];
203
+ }
204
+ const lines = event.papers
205
+ .slice(0, MAX_FINAL_MERGED_PAPERS)
206
+ .map((paper, index) => formatMergedLine(paper, index, theme));
207
+ const hidden = event.count - Math.min(event.count, MAX_FINAL_MERGED_PAPERS);
208
+ if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more merged papers`);
209
+ lines.push(`${color(theme, "success", "✓")} done: ${event.count} merged papers`);
210
+ return lines;
211
+ }
212
+
213
+ export function renderLiteratureEventTranscript(
214
+ events: LiteratureSearchDisplayEvent[] | undefined,
215
+ theme?: ThemeLike,
216
+ ): string {
217
+ if (!events?.length) return "";
218
+ return events.flatMap((event) => renderEvent(event, theme)).join("\n");
219
+ }
220
+
221
+ type RenderOptions = { expanded?: boolean; isPartial?: boolean };
222
+
223
+ type TextContentResult = { type: string; text?: string };
224
+
225
+ type ToolRenderResult<TDetails> = {
226
+ content?: TextContentResult[];
227
+ details?: TDetails;
228
+ };
229
+
230
+ type ProviderSearchSummary = {
231
+ searched?: boolean;
232
+ count?: number;
233
+ };
234
+
235
+ type LiteratureResultDetails = {
236
+ count?: number;
237
+ papers?: PaperRecord[];
238
+ providers?: {
239
+ pubmed?: ProviderSearchSummary;
240
+ semantic_scholar?: ProviderSearchSummary;
241
+ };
242
+ events?: LiteratureSearchDisplayEvent[];
243
+ };
244
+
245
+ type ProviderResultDetails = {
246
+ papers?: PaperRecord[];
247
+ query?: string;
248
+ params?: { query?: string };
249
+ };
250
+
251
+ function renderCollapsedLiteratureResult(details: LiteratureResultDetails, theme?: ThemeLike): string {
252
+ const pubmed = details?.providers?.pubmed;
253
+ const s2 = details?.providers?.semantic_scholar;
254
+ const pubmedText = pubmed?.searched ? `PubMed: ${pubmed.count}` : "PubMed: —";
255
+ const s2Text = s2?.searched ? `S2: ${s2.count}` : "S2: skipped";
256
+ const count = details?.count ?? details?.papers?.length ?? 0;
257
+ return `${color(theme, "success", "✓")} ${color(theme, "toolTitle", "literature_search")} ${color(theme, "success", pubmedText)} | ${color(theme, "accent", s2Text)} | merged: ${count}`;
258
+ }
259
+
260
+ export function renderLiteratureSearchResult(
261
+ result: ToolRenderResult<LiteratureResultDetails>,
262
+ options: RenderOptions,
263
+ theme?: ThemeLike,
264
+ ): Text {
265
+ const details = result.details ?? {};
266
+ const transcript = renderLiteratureEventTranscript(details.events, theme);
267
+ if (options.isPartial) {
268
+ return terminalText(transcript || color(theme, "warning", "Searching literature..."));
269
+ }
270
+ if (!options.expanded) {
271
+ return terminalText(renderCollapsedLiteratureResult(details, theme));
272
+ }
273
+ if (transcript) return terminalText(transcript);
274
+
275
+ const papers = compactPapersForDisplay(details.papers ?? []);
276
+ const lines = [
277
+ `${color(theme, "accent", "●")} ${color(theme, "toolTitle", "literature_search")} result`,
278
+ renderCollapsedLiteratureResult(details, theme),
279
+ `${color(theme, "warning", "→")} deduplicating by DOI / PMID / title-year`,
280
+ ...papers.slice(0, MAX_FINAL_MERGED_PAPERS).map((paper, index) => formatMergedLine(paper, index, theme)),
281
+ `${color(theme, "success", "✓")} done: ${papers.length} merged papers`,
282
+ ];
283
+ return terminalText(lines.join("\n"));
284
+ }
285
+
286
+ export function renderProviderSearchResult(
287
+ provider: "pubmed" | "semantic_scholar",
288
+ result: ToolRenderResult<ProviderResultDetails>,
289
+ options: RenderOptions,
290
+ theme?: ThemeLike,
291
+ ): Text {
292
+ const providerName = providerLabel(provider);
293
+ const details = result.details ?? {};
294
+ const papers = compactPapersForDisplay(details.papers ?? []);
295
+ const query = details.query ?? details.params?.query ?? "";
296
+ if (options.isPartial) {
297
+ const text = result.content?.[0]?.type === "text" ? result.content[0].text ?? "" : `Searching ${providerName}...`;
298
+ return terminalText(color(theme, "warning", text));
299
+ }
300
+ if (!options.expanded) {
301
+ return terminalText(`${color(theme, "success", "✓")} ${color(theme, "toolTitle", provider === "pubmed" ? "pubmed_search" : "semantic_scholar_search")} ${papers.length} papers`);
302
+ }
303
+ const lines = [
304
+ `${color(theme, providerColor(provider), "→")} ${color(theme, providerColor(provider), providerName)} q1: ${query}`,
305
+ ...papers.slice(0, MAX_STREAMED_PAPERS_PER_QUERY).map((paper) => formatFoundLine(paper, theme)),
306
+ ];
307
+ const hidden = papers.length - Math.min(papers.length, MAX_STREAMED_PAPERS_PER_QUERY);
308
+ if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more candidate papers`);
309
+ lines.push(`${color(theme, "success", "✓")} done: ${papers.length} papers`);
310
+ return terminalText(lines.join("\n"));
311
+ }
@@ -0,0 +1,199 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { Type, type Static } from "typebox";
3
+ import { renderProviderSearchResult } from "./rendering.ts";
4
+ import { emitProgress, textResult, type TextToolUpdate } from "./tool-output.ts";
5
+ import { fetchJson, formatPaperText, normalizeDoi } from "./shared.ts";
6
+ import type { FullTextRouteResult, PaperRecord } from "./types.ts";
7
+
8
+ export const SEMANTIC_SCHOLAR_PARAMS = Type.Object({
9
+ query: Type.String({ description: "Search query" }),
10
+ max_results: Type.Optional(
11
+ Type.Number({
12
+ description: "Maximum results to return (default 20, max 100)",
13
+ }),
14
+ ),
15
+ year_from: Type.Optional(
16
+ Type.Number({ description: "Minimum publication year" }),
17
+ ),
18
+ year_to: Type.Optional(
19
+ Type.Number({ description: "Maximum publication year" }),
20
+ ),
21
+ fields_of_study: Type.Optional(
22
+ Type.Array(Type.String({ description: "Field of study" })),
23
+ ),
24
+ min_citation_count: Type.Optional(
25
+ Type.Number({ description: "Minimum citation count" }),
26
+ ),
27
+ open_access_only: Type.Optional(
28
+ Type.Boolean({ description: "Only keep open access papers" }),
29
+ ),
30
+ });
31
+
32
+ export type SemanticScholarSearchParams = Static<typeof SEMANTIC_SCHOLAR_PARAMS>;
33
+
34
+ type SemanticScholarPaperResponse = {
35
+ paperId?: string;
36
+ title?: string;
37
+ abstract?: string | null;
38
+ year?: number | null;
39
+ citationCount?: number | null;
40
+ tldr?: { text?: string | null } | null;
41
+ externalIds?: Record<string, string | undefined>;
42
+ openAccessPdf?: { url?: string | null } | null;
43
+ fieldsOfStudy?: string[] | null;
44
+ authors?: Array<{ name?: string | null }> | null;
45
+ };
46
+
47
+ export async function trySemanticScholarOpenAccess(
48
+ doi: string,
49
+ signal?: AbortSignal,
50
+ ): Promise<FullTextRouteResult> {
51
+ const url = new URL("https://api.semanticscholar.org/graph/v1/paper/search");
52
+ url.searchParams.set("query", doi);
53
+ url.searchParams.set("limit", "5");
54
+ url.searchParams.set(
55
+ "fields",
56
+ "title,openAccessPdf,externalIds,isOpenAccess",
57
+ );
58
+ const data = await fetchJson<{
59
+ data?: Array<{
60
+ openAccessPdf?: { url?: string };
61
+ externalIds?: Record<string, string>;
62
+ }>;
63
+ }>(url.toString(), signal);
64
+ const match = (data.data ?? []).find(
65
+ (item) =>
66
+ normalizeDoi(item.externalIds?.DOI) === normalizeDoi(doi) &&
67
+ item.openAccessPdf?.url,
68
+ );
69
+ const pdfUrl = match?.openAccessPdf?.url;
70
+ if (!pdfUrl) {
71
+ return {
72
+ source: "not_found",
73
+ access_note: "No open-access PDF found via Semantic Scholar",
74
+ };
75
+ }
76
+ return {
77
+ source: "semantic_scholar_oa",
78
+ pdf_url: pdfUrl,
79
+ access_note: "Open-access PDF found via Semantic Scholar openAccessPdf",
80
+ };
81
+ }
82
+
83
+ export type SemanticScholarSearchResult = {
84
+ count: number;
85
+ papers: PaperRecord[];
86
+ query?: string;
87
+ };
88
+
89
+ export async function searchSemanticScholar(
90
+ params: SemanticScholarSearchParams,
91
+ signal?: AbortSignal,
92
+ onUpdate?: TextToolUpdate,
93
+ ): Promise<SemanticScholarSearchResult> {
94
+ const maxResults = Math.min(
95
+ 100,
96
+ Math.max(1, Math.floor(params.max_results ?? 20)),
97
+ );
98
+ const url = new URL(
99
+ "https://api.semanticscholar.org/graph/v1/paper/search",
100
+ );
101
+ url.searchParams.set("query", params.query);
102
+ url.searchParams.set("limit", String(maxResults));
103
+ url.searchParams.set(
104
+ "fields",
105
+ [
106
+ "paperId",
107
+ "title",
108
+ "abstract",
109
+ "year",
110
+ "citationCount",
111
+ "tldr",
112
+ "externalIds",
113
+ "openAccessPdf",
114
+ "fieldsOfStudy",
115
+ "isOpenAccess",
116
+ "authors",
117
+ ].join(","),
118
+ );
119
+ if (params.year_from)
120
+ url.searchParams.set(
121
+ "year",
122
+ `${params.year_from}-${params.year_to ?? ""}`,
123
+ );
124
+ emitProgress(onUpdate, `Searching Semantic Scholar for: ${params.query}`);
125
+ const apiKey = process.env.SEMANTIC_SCHOLAR_API_KEY?.trim();
126
+ const response = await fetchJson<{ data?: SemanticScholarPaperResponse[] }>(
127
+ url.toString(),
128
+ signal,
129
+ apiKey ? { "x-api-key": apiKey } : undefined,
130
+ );
131
+ let papers: PaperRecord[] = (response.data ?? []).map((item) => ({
132
+ s2_id: item.paperId,
133
+ title: item.title ?? "Untitled",
134
+ abstract: item.abstract ?? undefined,
135
+ year: item.year ?? undefined,
136
+ citation_count: item.citationCount ?? undefined,
137
+ tldr: item.tldr?.text ?? undefined,
138
+ open_access_pdf: item.openAccessPdf?.url ?? undefined,
139
+ external_ids: item.externalIds ?? undefined,
140
+ doi: normalizeDoi(item.externalIds?.DOI),
141
+ pmid: item.externalIds?.PubMed ?? item.externalIds?.PMID,
142
+ authors: Array.isArray(item.authors)
143
+ ? item.authors.map((author) => author.name).filter(Boolean)
144
+ : [],
145
+ source: "semantic_scholar",
146
+ }));
147
+ if (params.fields_of_study?.length) {
148
+ const wanted = new Set(
149
+ params.fields_of_study.map((item: string) => item.toLowerCase()),
150
+ );
151
+ papers = papers.filter((paper, index) => {
152
+ const fields = (response.data?.[index]?.fieldsOfStudy ?? []).map(
153
+ (item: string) => item.toLowerCase(),
154
+ );
155
+ return fields.some((item: string) => wanted.has(item));
156
+ });
157
+ }
158
+ if (params.min_citation_count !== undefined)
159
+ papers = papers.filter(
160
+ (paper) => (paper.citation_count ?? 0) >= params.min_citation_count,
161
+ );
162
+ if (params.open_access_only)
163
+ papers = papers.filter((paper) => !!paper.open_access_pdf);
164
+ if (params.year_from !== undefined)
165
+ papers = papers.filter((paper) => (paper.year ?? 0) >= params.year_from);
166
+ if (params.year_to !== undefined)
167
+ papers = papers.filter((paper) => (paper.year ?? 9999) <= params.year_to);
168
+ return { count: papers.length, papers, query: params.query };
169
+ }
170
+
171
+ export function createSemanticScholarSearchTool() {
172
+ return {
173
+ name: "semantic_scholar_search",
174
+ label: "Semantic Scholar Search",
175
+ description:
176
+ "Search Semantic Scholar for relevance-ranked papers, citation counts, and open-access metadata.",
177
+ parameters: SEMANTIC_SCHOLAR_PARAMS,
178
+ async execute(
179
+ _toolCallId: string,
180
+ params: SemanticScholarSearchParams,
181
+ signal?: AbortSignal,
182
+ onUpdate?: TextToolUpdate,
183
+ ) {
184
+ const result = await searchSemanticScholar(params, signal, onUpdate);
185
+ return textResult(formatPaperText(result.papers), result);
186
+ },
187
+ renderResult(
188
+ result: Parameters<typeof renderProviderSearchResult>[1],
189
+ options: Parameters<typeof renderProviderSearchResult>[2],
190
+ theme: Parameters<typeof renderProviderSearchResult>[3],
191
+ ) {
192
+ return renderProviderSearchResult("semantic_scholar", result, options, theme);
193
+ },
194
+ };
195
+ }
196
+
197
+ export function registerSemanticScholarSearchTool(pi: ExtensionAPI): void {
198
+ pi.registerTool(createSemanticScholarSearchTool());
199
+ }
package/src/shared.ts ADDED
@@ -0,0 +1,103 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import type { PaperRecord } from "./types.ts";
4
+
5
+ export const USER_AGENT = "research-skills-literature-tools/0.1 (+https://github.com/fbraza/research-skills)";
6
+
7
+ export function unique<T>(items: T[]): T[] {
8
+ return [...new Set(items.filter((item) => item !== undefined && item !== null && item !== ""))];
9
+ }
10
+
11
+ export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
12
+ return new Promise((resolve, reject) => {
13
+ const timeout = setTimeout(resolve, ms);
14
+ if (!signal) return;
15
+ const onAbort = () => {
16
+ clearTimeout(timeout);
17
+ reject(new Error("Request aborted"));
18
+ };
19
+ if (signal.aborted) onAbort();
20
+ signal.addEventListener("abort", onAbort, { once: true });
21
+ });
22
+ }
23
+
24
+ export function htmlDecode(text: string): string {
25
+ return text
26
+ .replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, "$1")
27
+ .replace(/&lt;/g, "<")
28
+ .replace(/&gt;/g, ">")
29
+ .replace(/&amp;/g, "&")
30
+ .replace(/&quot;/g, '"')
31
+ .replace(/&#39;/g, "'")
32
+ .replace(/<[^>]+>/g, " ")
33
+ .replace(/\s+/g, " ")
34
+ .trim();
35
+ }
36
+
37
+ export function normalizeDoi(raw?: string): string | undefined {
38
+ if (!raw) return undefined;
39
+ return raw
40
+ .trim()
41
+ .replace(/^doi:\s*/i, "")
42
+ .replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "")
43
+ .trim() || undefined;
44
+ }
45
+
46
+ export function xmlDecode(text: string): string {
47
+ return htmlDecode(text);
48
+ }
49
+
50
+ export function pickAll(regex: RegExp, text: string): string[] {
51
+ const matches: string[] = [];
52
+ for (const match of text.matchAll(regex)) {
53
+ if (match[1]) matches.push(xmlDecode(match[1]));
54
+ }
55
+ return matches;
56
+ }
57
+
58
+ export function pickOne(regex: RegExp, text: string): string | undefined {
59
+ const match = regex.exec(text);
60
+ return match?.[1] ? xmlDecode(match[1]) : undefined;
61
+ }
62
+
63
+ export async function fetchText(url: string, signal?: AbortSignal, headers?: Record<string, string>): Promise<string> {
64
+ const response = await fetch(url, {
65
+ headers: {
66
+ "user-agent": USER_AGENT,
67
+ accept: "application/json, text/xml, application/xml, text/html;q=0.9, */*;q=0.8",
68
+ ...headers,
69
+ },
70
+ signal,
71
+ redirect: "follow",
72
+ });
73
+ if (!response.ok) throw new Error(`${response.status} ${response.statusText} for ${url}`);
74
+ return await response.text();
75
+ }
76
+
77
+ export async function fetchJson<T>(url: string, signal?: AbortSignal, headers?: Record<string, string>): Promise<T> {
78
+ const text = await fetchText(url, signal, headers);
79
+ return JSON.parse(text) as T;
80
+ }
81
+
82
+ export function formatPaperText(papers: PaperRecord[]): string {
83
+ return JSON.stringify(papers, null, 2);
84
+ }
85
+
86
+ export function sanitizeFilename(value: string): string {
87
+ return value.replace(/[^a-z0-9._-]+/gi, "_").replace(/^_+|_+$/g, "") || "paper";
88
+ }
89
+
90
+ export async function savePdf(pdfUrl: string, outputDir: string, preferredId: string, signal?: AbortSignal): Promise<string> {
91
+ await mkdir(outputDir, { recursive: true });
92
+ const response = await fetch(pdfUrl, {
93
+ method: "GET",
94
+ signal,
95
+ headers: { "user-agent": USER_AGENT, accept: "application/pdf,*/*" },
96
+ redirect: "follow",
97
+ });
98
+ if (!response.ok) throw new Error(`Failed to download PDF (${response.status})`);
99
+ const bytes = Buffer.from(await response.arrayBuffer());
100
+ const filePath = path.resolve(outputDir, `${sanitizeFilename(preferredId)}.pdf`);
101
+ await writeFile(filePath, bytes);
102
+ return filePath;
103
+ }
@@ -0,0 +1,47 @@
1
+ export type TextToolBlock = {
2
+ type: "text";
3
+ text: string;
4
+ };
5
+
6
+ export type TextToolPayload<TDetails extends Record<string, unknown> = Record<string, unknown>> = {
7
+ content: TextToolBlock[];
8
+ details: TDetails;
9
+ isError?: boolean;
10
+ };
11
+
12
+ export type TextToolUpdate<TDetails extends Record<string, unknown> = Record<string, unknown>> = (
13
+ update: TextToolPayload<TDetails>,
14
+ ) => void;
15
+
16
+ export function textBlock(text: string): TextToolBlock {
17
+ return { type: "text", text };
18
+ }
19
+
20
+ export function textResult<TDetails extends Record<string, unknown> = Record<string, unknown>>(
21
+ text: string,
22
+ details?: TDetails,
23
+ ): TextToolPayload<TDetails> {
24
+ return {
25
+ content: [textBlock(text)],
26
+ details: (details ?? {}) as TDetails,
27
+ };
28
+ }
29
+
30
+ export function errorResult<TDetails extends Record<string, unknown> = Record<string, unknown>>(
31
+ text: string,
32
+ details?: TDetails,
33
+ ): TextToolPayload<TDetails> {
34
+ return {
35
+ content: [textBlock(text)],
36
+ details: (details ?? {}) as TDetails,
37
+ isError: true,
38
+ };
39
+ }
40
+
41
+ export function emitProgress<TDetails extends Record<string, unknown> = Record<string, unknown>>(
42
+ onUpdate: TextToolUpdate<TDetails> | undefined,
43
+ text: string,
44
+ details?: TDetails,
45
+ ): void {
46
+ onUpdate?.(textResult(text, details));
47
+ }
package/src/types.ts ADDED
@@ -0,0 +1,30 @@
1
+ export type PaperRecord = {
2
+ pmid?: string;
3
+ doi?: string;
4
+ s2_id?: string;
5
+ title: string;
6
+ abstract?: string;
7
+ authors?: string[];
8
+ journal?: string;
9
+ year?: number;
10
+ publication_types?: string[];
11
+ mesh_terms?: string[];
12
+ citation_count?: number;
13
+ tldr?: string;
14
+ open_access_pdf?: string;
15
+ external_ids?: Record<string, string>;
16
+ source?: string;
17
+ sources?: string[];
18
+ date?: string;
19
+ category?: string;
20
+ version?: string;
21
+ license?: string;
22
+ pdf_url?: string;
23
+ };
24
+
25
+ export type FullTextRouteResult = {
26
+ source: string;
27
+ pdf_url?: string;
28
+ access_note: string;
29
+ is_preprint?: boolean;
30
+ };