@fbraza/pi-cite 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -12
- package/package.json +3 -4
- package/skills/literature/SKILL.md +21 -40
- package/skills/literature/references/preclinical-extraction-guide.md +1 -1
- package/skills/literature/scripts/generate_table.py +1 -3
- package/skills/literature/scripts/synthesis.py +4 -3
- package/src/index.ts +0 -4
- package/src/literature-search.ts +2 -110
- package/src/rendering.ts +13 -23
- package/src/shared.ts +0 -21
- package/src/types.ts +0 -13
- package/skills/literature/references/full-text-access-guide.md +0 -34
- package/skills/literature/references/scihub_routine.md +0 -40
- package/skills/literature/references/semanticscholar_routine.md +0 -50
- package/skills/literature/scripts/scihub_pdf_resolver.py +0 -289
- package/src/fulltext.ts +0 -524
- package/src/semantic-scholar.ts +0 -199
package/src/fulltext.ts
DELETED
|
@@ -1,524 +0,0 @@
|
|
|
1
|
-
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
-
import { Type, type Static } from "typebox";
|
|
3
|
-
import { addNcbiApiKeyParam, lookupPubmedIdentifiers } from "./pubmed.ts";
|
|
4
|
-
import {
|
|
5
|
-
fetchText,
|
|
6
|
-
normalizeDoi,
|
|
7
|
-
savePdf,
|
|
8
|
-
unique,
|
|
9
|
-
USER_AGENT,
|
|
10
|
-
} from "./shared.ts";
|
|
11
|
-
import { emitProgress, errorResult, textResult, type TextToolUpdate } from "./tool-output.ts";
|
|
12
|
-
import { trySemanticScholarOpenAccess } from "./semantic-scholar.ts";
|
|
13
|
-
import type { FullTextRouteResult } from "./types.ts";
|
|
14
|
-
|
|
15
|
-
export const FETCH_FULLTEXT_PARAMS = Type.Object({
|
|
16
|
-
pmid: Type.Optional(Type.String({ description: "PubMed ID" })),
|
|
17
|
-
doi: Type.Optional(Type.String({ description: "Digital Object Identifier" })),
|
|
18
|
-
output_dir: Type.Optional(
|
|
19
|
-
Type.String({ description: "Directory where the PDF should be saved" }),
|
|
20
|
-
),
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
export type FetchFulltextParams = Static<typeof FETCH_FULLTEXT_PARAMS>;
|
|
24
|
-
|
|
25
|
-
const SCIHUB_MIRRORS = [
|
|
26
|
-
"https://sci-hub.st",
|
|
27
|
-
"https://sci-hub.ru",
|
|
28
|
-
"https://sci-hub.se",
|
|
29
|
-
];
|
|
30
|
-
const PDF_PATTERNS = [
|
|
31
|
-
/<meta[^>]+name=["']citation_pdf_url["'][^>]+content=["']([^"']+)["']/gi,
|
|
32
|
-
/<meta[^>]+property=["']og:pdf["'][^>]+content=["']([^"']+)["']/gi,
|
|
33
|
-
/<meta[^>]+name=["']dc\.identifier["'][^>]+content=["']([^"']*\.pdf[^"']*)["']/gi,
|
|
34
|
-
/<(?:iframe|embed|object)[^>]+(?:src|data)=["']([^"']+)["']/gi,
|
|
35
|
-
/<a[^>]+data-track-action=["'][^"']*pdf[^"']*["'][^>]+href=["']([^"']+)["']/gi,
|
|
36
|
-
/<a[^>]+aria-label=["'][^"']*pdf[^"']*["'][^>]+href=["']([^"']+)["']/gi,
|
|
37
|
-
/["']((?:https?:)?\/\/[^"']+?\.pdf(?:\?[^"']*)?)["']/gi,
|
|
38
|
-
/["']((?:https?:)?\/\/[^"']+?\/pdf(?:\/|\?|$)[^"']*)["']/gi,
|
|
39
|
-
/<a[^>]+href=["']([^"']+\.pdf(?:\?[^"']*)?)["']/gi,
|
|
40
|
-
];
|
|
41
|
-
|
|
42
|
-
const OA_LINK_PATTERNS = [
|
|
43
|
-
/<a[^>]+href=["']([^"']+)["'][^>]*>[^<]*(?:download\s+pdf|pdf|full\s+text|view\s+pdf)[^<]*<\/a>/gi,
|
|
44
|
-
/<a[^>]+class=["'][^"']*(?:pdf|download|article-pdf)[^"']*["'][^>]+href=["']([^"']+)["']/gi,
|
|
45
|
-
/<link[^>]+type=["']application\/pdf["'][^>]+href=["']([^"']+)["']/gi,
|
|
46
|
-
];
|
|
47
|
-
|
|
48
|
-
const KNOWN_PDF_QUERY_FLAGS = [
|
|
49
|
-
"pdf=1",
|
|
50
|
-
"download=true",
|
|
51
|
-
"download=1",
|
|
52
|
-
"downloadpdf=true",
|
|
53
|
-
"is_pdf=true",
|
|
54
|
-
];
|
|
55
|
-
|
|
56
|
-
function extractPdfCandidates(html: string, pageUrl: string): string[] {
|
|
57
|
-
const urls: string[] = [];
|
|
58
|
-
for (const pattern of PDF_PATTERNS) {
|
|
59
|
-
for (const match of html.matchAll(pattern)) {
|
|
60
|
-
const raw = match[1];
|
|
61
|
-
if (!raw) continue;
|
|
62
|
-
try {
|
|
63
|
-
urls.push(
|
|
64
|
-
new URL(
|
|
65
|
-
raw.startsWith("//") ? `https:${raw}` : raw,
|
|
66
|
-
pageUrl,
|
|
67
|
-
).toString(),
|
|
68
|
-
);
|
|
69
|
-
} catch {
|
|
70
|
-
// ignore invalid candidate
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
return unique(urls);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function extractOpenAccessLinks(html: string, pageUrl: string): string[] {
|
|
78
|
-
const urls: string[] = [];
|
|
79
|
-
for (const pattern of OA_LINK_PATTERNS) {
|
|
80
|
-
for (const match of html.matchAll(pattern)) {
|
|
81
|
-
const raw = match[1];
|
|
82
|
-
if (!raw) continue;
|
|
83
|
-
try {
|
|
84
|
-
urls.push(
|
|
85
|
-
new URL(
|
|
86
|
-
raw.startsWith("//") ? `https:${raw}` : raw,
|
|
87
|
-
pageUrl,
|
|
88
|
-
).toString(),
|
|
89
|
-
);
|
|
90
|
-
} catch {
|
|
91
|
-
// ignore invalid candidate
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
return unique(urls);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function candidatePdfVariants(url: string): string[] {
|
|
99
|
-
const variants = [url];
|
|
100
|
-
try {
|
|
101
|
-
const parsed = new URL(url);
|
|
102
|
-
if (!parsed.pathname.toLowerCase().endsWith(".pdf")) {
|
|
103
|
-
variants.push(
|
|
104
|
-
new URL(
|
|
105
|
-
`${parsed.pathname}.pdf${parsed.search}`,
|
|
106
|
-
`${parsed.origin}`,
|
|
107
|
-
).toString(),
|
|
108
|
-
);
|
|
109
|
-
}
|
|
110
|
-
for (const flag of KNOWN_PDF_QUERY_FLAGS) {
|
|
111
|
-
const withFlag = new URL(parsed.toString());
|
|
112
|
-
const [key, value] = flag.split("=");
|
|
113
|
-
withFlag.searchParams.set(key, value);
|
|
114
|
-
variants.push(withFlag.toString());
|
|
115
|
-
}
|
|
116
|
-
if (/article|full|abstract/i.test(parsed.pathname)) {
|
|
117
|
-
variants.push(
|
|
118
|
-
new URL(
|
|
119
|
-
parsed.pathname.replace(/(article|full|abstract)/i, "pdf"),
|
|
120
|
-
parsed.origin,
|
|
121
|
-
).toString(),
|
|
122
|
-
);
|
|
123
|
-
}
|
|
124
|
-
} catch {
|
|
125
|
-
// ignore malformed URLs
|
|
126
|
-
}
|
|
127
|
-
return unique(variants);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
function publisherSpecificPdfVariants(url: string): string[] {
|
|
131
|
-
const variants: string[] = [];
|
|
132
|
-
try {
|
|
133
|
-
const parsed = new URL(url);
|
|
134
|
-
const host = parsed.hostname.toLowerCase();
|
|
135
|
-
const pathname = parsed.pathname;
|
|
136
|
-
|
|
137
|
-
if (/nature\.com$/.test(host)) {
|
|
138
|
-
variants.push(
|
|
139
|
-
new URL(pathname.replace(/$/, ".pdf"), parsed.origin).toString(),
|
|
140
|
-
);
|
|
141
|
-
variants.push(new URL(`${pathname}.pdf`, parsed.origin).toString());
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
if (/cell\.com$/.test(host) || /sciencedirect\.com$/.test(host)) {
|
|
145
|
-
variants.push(
|
|
146
|
-
new URL(
|
|
147
|
-
pathname.replace(/\/fulltext$/i, "/pdf"),
|
|
148
|
-
parsed.origin,
|
|
149
|
-
).toString(),
|
|
150
|
-
);
|
|
151
|
-
variants.push(
|
|
152
|
-
new URL(
|
|
153
|
-
pathname.replace(/\/fulltext$/i, "/pdf?download=true"),
|
|
154
|
-
parsed.origin,
|
|
155
|
-
).toString(),
|
|
156
|
-
);
|
|
157
|
-
variants.push(
|
|
158
|
-
new URL(
|
|
159
|
-
pathname.replace(/\/article\//i, "/article/am/pii/"),
|
|
160
|
-
parsed.origin,
|
|
161
|
-
).toString(),
|
|
162
|
-
);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
if (/wiley\.com$/.test(host) || /onlinelibrary\.wiley\.com$/.test(host)) {
|
|
166
|
-
variants.push(
|
|
167
|
-
new URL(pathname.replace(/\/full$/i, "/pdf"), parsed.origin).toString(),
|
|
168
|
-
);
|
|
169
|
-
variants.push(
|
|
170
|
-
new URL(
|
|
171
|
-
pathname.replace(/\/full$/i, "/pdfdirect"),
|
|
172
|
-
parsed.origin,
|
|
173
|
-
).toString(),
|
|
174
|
-
);
|
|
175
|
-
variants.push(
|
|
176
|
-
new URL(
|
|
177
|
-
pathname.replace(/\/doi\//i, "/doi/pdf/"),
|
|
178
|
-
parsed.origin,
|
|
179
|
-
).toString(),
|
|
180
|
-
);
|
|
181
|
-
variants.push(
|
|
182
|
-
new URL(
|
|
183
|
-
pathname.replace(/\/doi\//i, "/doi/epdf/"),
|
|
184
|
-
parsed.origin,
|
|
185
|
-
).toString(),
|
|
186
|
-
);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
if (/tandfonline\.com$/.test(host)) {
|
|
190
|
-
variants.push(
|
|
191
|
-
new URL(pathname.replace(/\/full$/i, "/pdf"), parsed.origin).toString(),
|
|
192
|
-
);
|
|
193
|
-
variants.push(
|
|
194
|
-
new URL(
|
|
195
|
-
pathname.replace(/\/full$/i, "/pdf?download=true"),
|
|
196
|
-
parsed.origin,
|
|
197
|
-
).toString(),
|
|
198
|
-
);
|
|
199
|
-
}
|
|
200
|
-
} catch {
|
|
201
|
-
// ignore malformed URLs
|
|
202
|
-
}
|
|
203
|
-
return unique(variants);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
async function headOrGet(url: string, signal?: AbortSignal): Promise<boolean> {
|
|
207
|
-
try {
|
|
208
|
-
const response = await fetch(url, {
|
|
209
|
-
method: "HEAD",
|
|
210
|
-
signal,
|
|
211
|
-
headers: { "user-agent": USER_AGENT, accept: "application/pdf,*/*" },
|
|
212
|
-
redirect: "follow",
|
|
213
|
-
});
|
|
214
|
-
const contentType = response.headers.get("content-type") ?? "";
|
|
215
|
-
if (
|
|
216
|
-
response.ok &&
|
|
217
|
-
(contentType.includes("pdf") || url.toLowerCase().includes(".pdf"))
|
|
218
|
-
)
|
|
219
|
-
return true;
|
|
220
|
-
} catch {
|
|
221
|
-
// fall through
|
|
222
|
-
}
|
|
223
|
-
try {
|
|
224
|
-
const response = await fetch(url, {
|
|
225
|
-
method: "GET",
|
|
226
|
-
signal,
|
|
227
|
-
headers: { "user-agent": USER_AGENT, accept: "application/pdf,*/*" },
|
|
228
|
-
redirect: "follow",
|
|
229
|
-
});
|
|
230
|
-
const contentType = response.headers.get("content-type") ?? "";
|
|
231
|
-
return (
|
|
232
|
-
response.ok &&
|
|
233
|
-
(contentType.includes("pdf") || url.toLowerCase().includes(".pdf"))
|
|
234
|
-
);
|
|
235
|
-
} catch {
|
|
236
|
-
return false;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
async function resolvePublisherPdfFromPage(
|
|
241
|
-
pageUrl: string,
|
|
242
|
-
html: string,
|
|
243
|
-
signal?: AbortSignal,
|
|
244
|
-
): Promise<string | undefined> {
|
|
245
|
-
const directCandidates = extractPdfCandidates(html, pageUrl);
|
|
246
|
-
for (const candidate of directCandidates.flatMap((value) => [
|
|
247
|
-
...candidatePdfVariants(value),
|
|
248
|
-
...publisherSpecificPdfVariants(value),
|
|
249
|
-
])) {
|
|
250
|
-
if (await headOrGet(candidate, signal)) return candidate;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
for (const candidate of [
|
|
254
|
-
...candidatePdfVariants(pageUrl),
|
|
255
|
-
...publisherSpecificPdfVariants(pageUrl),
|
|
256
|
-
]) {
|
|
257
|
-
if (await headOrGet(candidate, signal)) return candidate;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
for (const landingPage of extractOpenAccessLinks(html, pageUrl)) {
|
|
261
|
-
for (const candidate of [
|
|
262
|
-
...candidatePdfVariants(landingPage),
|
|
263
|
-
...publisherSpecificPdfVariants(landingPage),
|
|
264
|
-
]) {
|
|
265
|
-
if (await headOrGet(candidate, signal)) return candidate;
|
|
266
|
-
}
|
|
267
|
-
try {
|
|
268
|
-
const nestedHtml = await fetchText(landingPage, signal, {
|
|
269
|
-
accept: "text/html,application/xhtml+xml,*/*",
|
|
270
|
-
});
|
|
271
|
-
const nestedCandidates = extractPdfCandidates(nestedHtml, landingPage);
|
|
272
|
-
for (const candidate of nestedCandidates.flatMap((value) => [
|
|
273
|
-
...candidatePdfVariants(value),
|
|
274
|
-
...publisherSpecificPdfVariants(value),
|
|
275
|
-
])) {
|
|
276
|
-
if (await headOrGet(candidate, signal)) return candidate;
|
|
277
|
-
}
|
|
278
|
-
} catch {
|
|
279
|
-
// ignore nested landing page failures
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
return undefined;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
async function tryPubmedCentral(
|
|
287
|
-
pmid: string,
|
|
288
|
-
signal?: AbortSignal,
|
|
289
|
-
): Promise<FullTextRouteResult> {
|
|
290
|
-
const url = new URL(
|
|
291
|
-
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi",
|
|
292
|
-
);
|
|
293
|
-
url.searchParams.set("dbfrom", "pubmed");
|
|
294
|
-
url.searchParams.set("db", "pmc");
|
|
295
|
-
url.searchParams.set("id", pmid);
|
|
296
|
-
addNcbiApiKeyParam(url);
|
|
297
|
-
const xml = await fetchText(url.toString(), signal);
|
|
298
|
-
const linkSet =
|
|
299
|
-
/<LinkSetDb>[\s\S]*?<LinkName>pubmed_pmc<\/LinkName>[\s\S]*?<Id>(\d+)<\/Id>[\s\S]*?<\/LinkSetDb>/i.exec(
|
|
300
|
-
xml,
|
|
301
|
-
);
|
|
302
|
-
if (!linkSet?.[1]) {
|
|
303
|
-
return {
|
|
304
|
-
source: "not_found",
|
|
305
|
-
access_note: "No PMC full text linked from PubMed",
|
|
306
|
-
is_preprint: false,
|
|
307
|
-
};
|
|
308
|
-
}
|
|
309
|
-
const pmcId = `PMC${linkSet[1]}`;
|
|
310
|
-
const articleUrl = `https://pmc.ncbi.nlm.nih.gov/articles/${pmcId}/`;
|
|
311
|
-
const articleHtml = await fetchText(articleUrl, signal);
|
|
312
|
-
const pdfMatch = articleHtml.match(
|
|
313
|
-
/href=["']([^"']+\.pdf(?:\?pdf=render)?)["']/i,
|
|
314
|
-
);
|
|
315
|
-
const pdfUrl = pdfMatch?.[1]
|
|
316
|
-
? new URL(pdfMatch[1], articleUrl).toString()
|
|
317
|
-
: `https://pmc.ncbi.nlm.nih.gov/articles/${pmcId}/pdf`;
|
|
318
|
-
return {
|
|
319
|
-
source: "pmc",
|
|
320
|
-
pdf_url: pdfUrl,
|
|
321
|
-
access_note: `Open access via PubMed Central (${pmcId})`,
|
|
322
|
-
is_preprint: false,
|
|
323
|
-
};
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
async function tryPublisherOpenAccess(
|
|
327
|
-
doi: string,
|
|
328
|
-
signal?: AbortSignal,
|
|
329
|
-
): Promise<FullTextRouteResult> {
|
|
330
|
-
const doiUrl = `https://doi.org/${encodeURIComponent(doi)}`;
|
|
331
|
-
const response = await fetch(doiUrl, {
|
|
332
|
-
method: "GET",
|
|
333
|
-
signal,
|
|
334
|
-
headers: {
|
|
335
|
-
"user-agent": USER_AGENT,
|
|
336
|
-
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
337
|
-
},
|
|
338
|
-
redirect: "follow",
|
|
339
|
-
});
|
|
340
|
-
if (!response.ok) {
|
|
341
|
-
return {
|
|
342
|
-
source: "not_found",
|
|
343
|
-
access_note: `DOI landing page unavailable (${response.status})`,
|
|
344
|
-
is_preprint: false,
|
|
345
|
-
};
|
|
346
|
-
}
|
|
347
|
-
const html = await response.text();
|
|
348
|
-
const finalUrl = response.url || doiUrl;
|
|
349
|
-
const pdfUrl = await resolvePublisherPdfFromPage(finalUrl, html, signal);
|
|
350
|
-
if (pdfUrl) {
|
|
351
|
-
return {
|
|
352
|
-
source: "publisher_oa",
|
|
353
|
-
pdf_url: pdfUrl,
|
|
354
|
-
access_note:
|
|
355
|
-
"PDF found on publisher/open-access landing page or linked OA page",
|
|
356
|
-
is_preprint: false,
|
|
357
|
-
};
|
|
358
|
-
}
|
|
359
|
-
return {
|
|
360
|
-
source: "not_found",
|
|
361
|
-
access_note: "No direct PDF found on publisher/open-access landing page",
|
|
362
|
-
is_preprint: false,
|
|
363
|
-
};
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
async function trySciHub(
|
|
367
|
-
doi: string,
|
|
368
|
-
signal?: AbortSignal,
|
|
369
|
-
): Promise<FullTextRouteResult> {
|
|
370
|
-
for (const mirror of SCIHUB_MIRRORS) {
|
|
371
|
-
try {
|
|
372
|
-
const pageUrl = `${mirror}/${encodeURIComponent(doi)}`;
|
|
373
|
-
const html = await fetchText(pageUrl, signal, {
|
|
374
|
-
accept: "text/html,*/*",
|
|
375
|
-
});
|
|
376
|
-
const candidates = extractPdfCandidates(html, pageUrl);
|
|
377
|
-
for (const candidate of candidates) {
|
|
378
|
-
if (await headOrGet(candidate, signal)) {
|
|
379
|
-
return {
|
|
380
|
-
source: "scihub",
|
|
381
|
-
pdf_url: candidate,
|
|
382
|
-
access_note: `PDF resolved through Sci-Hub mirror ${mirror}`,
|
|
383
|
-
is_preprint: false,
|
|
384
|
-
};
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
} catch {
|
|
388
|
-
// try next mirror
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
return {
|
|
392
|
-
source: "not_found",
|
|
393
|
-
access_note: "Sci-Hub did not yield a PDF",
|
|
394
|
-
is_preprint: false,
|
|
395
|
-
};
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
function hasResolvedPdf(route: FullTextRouteResult): route is FullTextRouteResult & { pdf_url: string } {
|
|
399
|
-
return route.source !== "not_found" && Boolean(route.pdf_url);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
async function buildResolvedPdfResult(
|
|
403
|
-
route: FullTextRouteResult & { pdf_url: string },
|
|
404
|
-
outputDir: string | undefined,
|
|
405
|
-
preferredId: string,
|
|
406
|
-
signal?: AbortSignal,
|
|
407
|
-
): Promise<Record<string, unknown>> {
|
|
408
|
-
const result: Record<string, unknown> = { ...route };
|
|
409
|
-
if (outputDir) {
|
|
410
|
-
result.pdf_path = await savePdf(route.pdf_url, outputDir, preferredId, signal);
|
|
411
|
-
}
|
|
412
|
-
return result;
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
export function createFetchFulltextTool() {
|
|
416
|
-
return {
|
|
417
|
-
name: "fetch_fulltext",
|
|
418
|
-
label: "Fetch Full Text",
|
|
419
|
-
description:
|
|
420
|
-
"Retrieve a paper PDF using PMC, publisher OA, then Sci-Hub fallback.",
|
|
421
|
-
parameters: FETCH_FULLTEXT_PARAMS,
|
|
422
|
-
async execute(
|
|
423
|
-
_toolCallId: string,
|
|
424
|
-
params: FetchFulltextParams,
|
|
425
|
-
signal?: AbortSignal,
|
|
426
|
-
onUpdate?: TextToolUpdate,
|
|
427
|
-
) {
|
|
428
|
-
if (!params.pmid && !params.doi) {
|
|
429
|
-
return errorResult("Provide at least one of `pmid` or `doi`.");
|
|
430
|
-
}
|
|
431
|
-
let pmid = params.pmid?.trim() || undefined;
|
|
432
|
-
let doi = normalizeDoi(params.doi);
|
|
433
|
-
if (!doi && pmid) {
|
|
434
|
-
emitProgress(onUpdate, `Resolving DOI from PubMed for PMID ${pmid}...`);
|
|
435
|
-
const identifiers = await lookupPubmedIdentifiers(pmid, signal);
|
|
436
|
-
doi = identifiers.doi;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
const attempts: FullTextRouteResult[] = [];
|
|
440
|
-
|
|
441
|
-
if (pmid) {
|
|
442
|
-
emitProgress(onUpdate, `Checking PubMed Central for PMID ${pmid}...`);
|
|
443
|
-
const pmc = await tryPubmedCentral(pmid, signal);
|
|
444
|
-
attempts.push(pmc);
|
|
445
|
-
if (hasResolvedPdf(pmc)) {
|
|
446
|
-
const result = await buildResolvedPdfResult(
|
|
447
|
-
pmc,
|
|
448
|
-
params.output_dir,
|
|
449
|
-
pmid ?? doi ?? "paper",
|
|
450
|
-
signal,
|
|
451
|
-
);
|
|
452
|
-
return textResult(JSON.stringify(result, null, 2), result);
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
if (doi) {
|
|
457
|
-
emitProgress(
|
|
458
|
-
onUpdate,
|
|
459
|
-
`Checking publisher open-access routes for DOI ${doi}...`,
|
|
460
|
-
);
|
|
461
|
-
const publisher = await tryPublisherOpenAccess(doi, signal);
|
|
462
|
-
attempts.push(publisher);
|
|
463
|
-
if (hasResolvedPdf(publisher)) {
|
|
464
|
-
const result = await buildResolvedPdfResult(
|
|
465
|
-
publisher,
|
|
466
|
-
params.output_dir,
|
|
467
|
-
doi,
|
|
468
|
-
signal,
|
|
469
|
-
);
|
|
470
|
-
return textResult(JSON.stringify(result, null, 2), result);
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
emitProgress(
|
|
474
|
-
onUpdate,
|
|
475
|
-
`Checking Semantic Scholar open-access PDF metadata for DOI ${doi}...`,
|
|
476
|
-
);
|
|
477
|
-
let preprint: FullTextRouteResult;
|
|
478
|
-
try {
|
|
479
|
-
preprint = await trySemanticScholarOpenAccess(doi, signal);
|
|
480
|
-
} catch (err) {
|
|
481
|
-
preprint = {
|
|
482
|
-
source: "not_found",
|
|
483
|
-
access_note: `Semantic Scholar lookup failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
484
|
-
};
|
|
485
|
-
}
|
|
486
|
-
attempts.push(preprint);
|
|
487
|
-
if (hasResolvedPdf(preprint)) {
|
|
488
|
-
const result = await buildResolvedPdfResult(
|
|
489
|
-
preprint,
|
|
490
|
-
params.output_dir,
|
|
491
|
-
doi,
|
|
492
|
-
signal,
|
|
493
|
-
);
|
|
494
|
-
return textResult(JSON.stringify(result, null, 2), result);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
emitProgress(onUpdate, `Trying Sci-Hub fallback for DOI ${doi}...`);
|
|
498
|
-
const scihub = await trySciHub(doi, signal);
|
|
499
|
-
attempts.push(scihub);
|
|
500
|
-
if (hasResolvedPdf(scihub)) {
|
|
501
|
-
const result = await buildResolvedPdfResult(
|
|
502
|
-
scihub,
|
|
503
|
-
params.output_dir,
|
|
504
|
-
doi,
|
|
505
|
-
signal,
|
|
506
|
-
);
|
|
507
|
-
return textResult(JSON.stringify(result, null, 2), result);
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
const result = {
|
|
512
|
-
source: "not_found",
|
|
513
|
-
access_note:
|
|
514
|
-
"No full-text PDF found via PMC, publisher OA, Semantic Scholar OA, or Sci-Hub",
|
|
515
|
-
attempts,
|
|
516
|
-
};
|
|
517
|
-
return textResult(JSON.stringify(result, null, 2), result);
|
|
518
|
-
},
|
|
519
|
-
};
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
export function registerFetchFulltextTool(pi: ExtensionAPI): void {
|
|
523
|
-
pi.registerTool(createFetchFulltextTool());
|
|
524
|
-
}
|