@fbraza/pi-cite 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -6
- package/package.json +7 -4
- package/skills/literature/SKILL.md +189 -0
- package/skills/literature/references/preclinical-extraction-guide.md +215 -0
- package/skills/literature/references/pubmed_api_reference.md +298 -0
- package/skills/literature/references/pubmed_common_queries.md +453 -0
- package/skills/literature/references/pubmed_routine.md +93 -0
- package/skills/literature/references/pubmed_search_syntax.md +436 -0
- package/skills/literature/scripts/export_all.py +53 -0
- package/skills/literature/scripts/extract_experiments.py +401 -0
- package/skills/literature/scripts/generate_table.py +94 -0
- package/skills/literature/scripts/synthesis.py +94 -0
- package/src/index.ts +0 -4
- package/src/literature-search.ts +2 -110
- package/src/rendering.ts +13 -23
- package/src/shared.ts +0 -21
- package/src/types.ts +0 -13
- package/src/fulltext.ts +0 -524
- package/src/semantic-scholar.ts +0 -199
package/src/literature-search.ts
CHANGED
|
@@ -7,7 +7,6 @@ import {
|
|
|
7
7
|
type LiteratureSearchDisplayEvent,
|
|
8
8
|
type LiteratureSearchDisplaySearch,
|
|
9
9
|
} from "./rendering.ts";
|
|
10
|
-
import { searchSemanticScholar } from "./semantic-scholar.ts";
|
|
11
10
|
import { formatPaperText, normalizeDoi, unique } from "./shared.ts";
|
|
12
11
|
import { emitProgress, textResult, type TextToolUpdate } from "./tool-output.ts";
|
|
13
12
|
import type { PaperRecord } from "./types.ts";
|
|
@@ -17,12 +16,6 @@ export const LITERATURE_SEARCH_PARAMS = Type.Object({
|
|
|
17
16
|
description:
|
|
18
17
|
"PubMed-ready query using PubMed syntax such as MeSH [mh], title/abstract [tiab], publication type [pt], substance [nm], and Boolean logic.",
|
|
19
18
|
}),
|
|
20
|
-
semantic_scholar_query: Type.Optional(
|
|
21
|
-
Type.String({
|
|
22
|
-
description:
|
|
23
|
-
"Optional natural-language Semantic Scholar query for supplementary search. If omitted and Semantic Scholar is configured, a simplified query is derived from pubmed_query.",
|
|
24
|
-
}),
|
|
25
|
-
),
|
|
26
19
|
max_results: Type.Optional(
|
|
27
20
|
Type.Number({ description: "Maximum results per provider (default 20)" }),
|
|
28
21
|
),
|
|
@@ -51,27 +44,11 @@ export type LiteratureSearchResult = {
|
|
|
51
44
|
papers: PaperRecord[];
|
|
52
45
|
providers: {
|
|
53
46
|
pubmed: ProviderExecution;
|
|
54
|
-
semantic_scholar: ProviderExecution;
|
|
55
47
|
};
|
|
56
48
|
searches: LiteratureSearchDisplaySearch[];
|
|
57
49
|
events: LiteratureSearchDisplayEvent[];
|
|
58
50
|
};
|
|
59
51
|
|
|
60
|
-
function firstYear(value?: string): number | undefined {
|
|
61
|
-
const match = value?.match(/^(\d{4})/);
|
|
62
|
-
return match?.[1] ? Number(match[1]) : undefined;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export function simplifyPubmedQueryForSemanticScholar(query: string): string {
|
|
66
|
-
const simplified = query
|
|
67
|
-
.replace(/\[[^\]]+\]/g, " ")
|
|
68
|
-
.replace(/\b(?:AND|OR|NOT)\b/gi, " ")
|
|
69
|
-
.replace(/[()"']/g, " ")
|
|
70
|
-
.replace(/\s+/g, " ")
|
|
71
|
-
.trim();
|
|
72
|
-
return simplified || query.trim();
|
|
73
|
-
}
|
|
74
|
-
|
|
75
52
|
function sourceList(paper: PaperRecord): string[] {
|
|
76
53
|
return unique([
|
|
77
54
|
...(paper.sources ?? []),
|
|
@@ -92,7 +69,6 @@ function dedupeKeys(paper: PaperRecord): string[] {
|
|
|
92
69
|
const keys = [
|
|
93
70
|
doi ? `doi:${doi}` : undefined,
|
|
94
71
|
paper.pmid ? `pmid:${paper.pmid}` : undefined,
|
|
95
|
-
paper.s2_id ? `s2:${paper.s2_id}` : undefined,
|
|
96
72
|
];
|
|
97
73
|
const title = normalizedTitle(paper.title);
|
|
98
74
|
if (title && paper.year) keys.push(`title-year:${title}:${paper.year}`);
|
|
@@ -106,7 +82,6 @@ function mergePapers(existing: PaperRecord, incoming: PaperRecord): PaperRecord
|
|
|
106
82
|
...existing,
|
|
107
83
|
doi: normalizeDoi(existing.doi) ?? normalizeDoi(incoming.doi),
|
|
108
84
|
pmid: existing.pmid ?? incoming.pmid,
|
|
109
|
-
s2_id: existing.s2_id ?? incoming.s2_id,
|
|
110
85
|
title: existing.title !== "Untitled" ? existing.title : incoming.title,
|
|
111
86
|
abstract: existing.abstract ?? incoming.abstract,
|
|
112
87
|
authors: unique([...(existing.authors ?? []), ...(incoming.authors ?? [])]),
|
|
@@ -117,10 +92,6 @@ function mergePapers(existing: PaperRecord, incoming: PaperRecord): PaperRecord
|
|
|
117
92
|
...(incoming.publication_types ?? []),
|
|
118
93
|
]),
|
|
119
94
|
mesh_terms: unique([...(existing.mesh_terms ?? []), ...(incoming.mesh_terms ?? [])]),
|
|
120
|
-
citation_count: existing.citation_count ?? incoming.citation_count,
|
|
121
|
-
tldr: existing.tldr ?? incoming.tldr,
|
|
122
|
-
open_access_pdf: existing.open_access_pdf ?? incoming.open_access_pdf,
|
|
123
|
-
external_ids: { ...(incoming.external_ids ?? {}), ...(existing.external_ids ?? {}) },
|
|
124
95
|
source: sources.join(";"),
|
|
125
96
|
sources,
|
|
126
97
|
};
|
|
@@ -208,88 +179,10 @@ export async function searchLiterature(
|
|
|
208
179
|
});
|
|
209
180
|
emitEvent(`PubMed q1 found ${pubmed.count} candidate papers.`);
|
|
210
181
|
|
|
211
|
-
const semanticScholarApiKey = process.env.SEMANTIC_SCHOLAR_API_KEY?.trim();
|
|
212
|
-
let semanticScholar: ProviderExecution = {
|
|
213
|
-
searched: false,
|
|
214
|
-
reason: "SEMANTIC_SCHOLAR_API_KEY not configured",
|
|
215
|
-
};
|
|
216
|
-
let semanticScholarPapers: PaperRecord[] = [];
|
|
217
|
-
|
|
218
|
-
if (semanticScholarApiKey) {
|
|
219
|
-
const semanticScholarQuery =
|
|
220
|
-
params.semantic_scholar_query?.trim() ||
|
|
221
|
-
simplifyPubmedQueryForSemanticScholar(params.pubmed_query);
|
|
222
|
-
|
|
223
|
-
events.push({
|
|
224
|
-
phase: "query_start",
|
|
225
|
-
provider: "semantic_scholar",
|
|
226
|
-
query_index: 1,
|
|
227
|
-
query: semanticScholarQuery,
|
|
228
|
-
});
|
|
229
|
-
emitEvent(`Searching Semantic Scholar q1: ${semanticScholarQuery}`);
|
|
230
|
-
|
|
231
|
-
try {
|
|
232
|
-
const semanticScholarResult = await searchSemanticScholar(
|
|
233
|
-
{
|
|
234
|
-
query: semanticScholarQuery,
|
|
235
|
-
max_results: Math.min(100, maxResults),
|
|
236
|
-
year_from: firstYear(params.date_from),
|
|
237
|
-
year_to: firstYear(params.date_to),
|
|
238
|
-
},
|
|
239
|
-
signal,
|
|
240
|
-
undefined,
|
|
241
|
-
);
|
|
242
|
-
semanticScholarPapers = semanticScholarResult.papers;
|
|
243
|
-
const semanticScholarDisplayPapers = compactPapersForDisplay(
|
|
244
|
-
semanticScholarResult.papers,
|
|
245
|
-
);
|
|
246
|
-
searches.push({
|
|
247
|
-
provider: "semantic_scholar",
|
|
248
|
-
query_index: 1,
|
|
249
|
-
query: semanticScholarQuery,
|
|
250
|
-
count: semanticScholarResult.count,
|
|
251
|
-
papers: semanticScholarDisplayPapers,
|
|
252
|
-
});
|
|
253
|
-
events.push({
|
|
254
|
-
phase: "query_results",
|
|
255
|
-
provider: "semantic_scholar",
|
|
256
|
-
query_index: 1,
|
|
257
|
-
query: semanticScholarQuery,
|
|
258
|
-
count: semanticScholarResult.count,
|
|
259
|
-
papers: semanticScholarDisplayPapers,
|
|
260
|
-
});
|
|
261
|
-
emitEvent(
|
|
262
|
-
`Semantic Scholar q1 found ${semanticScholarResult.count} candidate papers.`,
|
|
263
|
-
);
|
|
264
|
-
semanticScholar = {
|
|
265
|
-
searched: true,
|
|
266
|
-
count: semanticScholarResult.count,
|
|
267
|
-
query: semanticScholarQuery,
|
|
268
|
-
};
|
|
269
|
-
} catch (err) {
|
|
270
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
271
|
-
events.push({
|
|
272
|
-
phase: "query_error",
|
|
273
|
-
provider: "semantic_scholar",
|
|
274
|
-
query_index: 1,
|
|
275
|
-
query: semanticScholarQuery,
|
|
276
|
-
error: message,
|
|
277
|
-
});
|
|
278
|
-
semanticScholar = {
|
|
279
|
-
searched: false,
|
|
280
|
-
reason: `Semantic Scholar search failed: ${message}`,
|
|
281
|
-
};
|
|
282
|
-
emitEvent(`Semantic Scholar q1 failed: ${message}`);
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
182
|
events.push({ phase: "dedupe" });
|
|
287
183
|
emitEvent("Deduplicating literature results...");
|
|
288
184
|
|
|
289
|
-
const papers = dedupeLiteraturePapers(
|
|
290
|
-
...pubmed.papers,
|
|
291
|
-
...semanticScholarPapers,
|
|
292
|
-
]);
|
|
185
|
+
const papers = dedupeLiteraturePapers(pubmed.papers);
|
|
293
186
|
events.push({
|
|
294
187
|
phase: "complete",
|
|
295
188
|
count: papers.length,
|
|
@@ -307,7 +200,6 @@ export async function searchLiterature(
|
|
|
307
200
|
query: pubmed.query ?? params.pubmed_query,
|
|
308
201
|
total: pubmed.total,
|
|
309
202
|
},
|
|
310
|
-
semantic_scholar: semanticScholar,
|
|
311
203
|
},
|
|
312
204
|
searches,
|
|
313
205
|
events,
|
|
@@ -319,7 +211,7 @@ export function createLiteratureSearchTool() {
|
|
|
319
211
|
name: "literature_search",
|
|
320
212
|
label: "Literature Search",
|
|
321
213
|
description:
|
|
322
|
-
"Run the literature workflow search
|
|
214
|
+
"Run the literature workflow search against PubMed using a PubMed-ready query (MeSH [mh], title/abstract [tiab], publication type [pt], substance [nm], and Boolean logic).",
|
|
323
215
|
parameters: LITERATURE_SEARCH_PARAMS,
|
|
324
216
|
async execute(
|
|
325
217
|
_toolCallId: string,
|
package/src/rendering.ts
CHANGED
|
@@ -16,20 +16,19 @@ export type CompactPaperForDisplay = {
|
|
|
16
16
|
source: string;
|
|
17
17
|
year?: number;
|
|
18
18
|
journal?: string;
|
|
19
|
-
citation_count?: number;
|
|
20
19
|
};
|
|
21
20
|
|
|
22
21
|
export type LiteratureSearchDisplayEvent =
|
|
23
22
|
| { phase: "start" }
|
|
24
23
|
| {
|
|
25
24
|
phase: "query_start";
|
|
26
|
-
provider: "pubmed"
|
|
25
|
+
provider: "pubmed";
|
|
27
26
|
query_index: number;
|
|
28
27
|
query: string;
|
|
29
28
|
}
|
|
30
29
|
| {
|
|
31
30
|
phase: "query_results";
|
|
32
|
-
provider: "pubmed"
|
|
31
|
+
provider: "pubmed";
|
|
33
32
|
query_index: number;
|
|
34
33
|
query: string;
|
|
35
34
|
count: number;
|
|
@@ -37,7 +36,7 @@ export type LiteratureSearchDisplayEvent =
|
|
|
37
36
|
}
|
|
38
37
|
| {
|
|
39
38
|
phase: "query_error";
|
|
40
|
-
provider: "pubmed"
|
|
39
|
+
provider: "pubmed";
|
|
41
40
|
query_index: number;
|
|
42
41
|
query: string;
|
|
43
42
|
error: string;
|
|
@@ -46,7 +45,7 @@ export type LiteratureSearchDisplayEvent =
|
|
|
46
45
|
| { phase: "complete"; count: number; papers: CompactPaperForDisplay[] };
|
|
47
46
|
|
|
48
47
|
export type LiteratureSearchDisplaySearch = {
|
|
49
|
-
provider: "pubmed"
|
|
48
|
+
provider: "pubmed";
|
|
50
49
|
query_index: number;
|
|
51
50
|
query: string;
|
|
52
51
|
count: number;
|
|
@@ -107,7 +106,6 @@ export function authorRange(paper: PaperRecord): string {
|
|
|
107
106
|
export function paperIdentifier(paper: PaperRecord): string {
|
|
108
107
|
if (paper.doi) return `DOI:${paper.doi}`;
|
|
109
108
|
if (paper.pmid) return `PMID:${paper.pmid}`;
|
|
110
|
-
if (paper.s2_id) return `S2:${paper.s2_id}`;
|
|
111
109
|
return "—";
|
|
112
110
|
}
|
|
113
111
|
|
|
@@ -120,11 +118,7 @@ export function sourceLabel(paper: PaperRecord): string {
|
|
|
120
118
|
.map((source) => source.trim())
|
|
121
119
|
.filter(Boolean),
|
|
122
120
|
);
|
|
123
|
-
|
|
124
|
-
const hasS2 = sources.has("semantic_scholar");
|
|
125
|
-
if (hasPubmed && hasS2) return "PM+S2";
|
|
126
|
-
if (hasPubmed) return "PM";
|
|
127
|
-
if (hasS2) return "S2";
|
|
121
|
+
if (sources.has("pubmed")) return "PM";
|
|
128
122
|
return paper.source ?? "—";
|
|
129
123
|
}
|
|
130
124
|
|
|
@@ -136,7 +130,6 @@ export function compactPaperForDisplay(paper: PaperRecord): CompactPaperForDispl
|
|
|
136
130
|
source: sourceLabel(paper),
|
|
137
131
|
year: paper.year,
|
|
138
132
|
journal: paper.journal,
|
|
139
|
-
citation_count: paper.citation_count,
|
|
140
133
|
};
|
|
141
134
|
}
|
|
142
135
|
|
|
@@ -144,12 +137,12 @@ export function compactPapersForDisplay(papers: PaperRecord[]): CompactPaperForD
|
|
|
144
137
|
return papers.map(compactPaperForDisplay);
|
|
145
138
|
}
|
|
146
139
|
|
|
147
|
-
function providerLabel(provider: "pubmed"
|
|
148
|
-
return
|
|
140
|
+
function providerLabel(provider: "pubmed"): string {
|
|
141
|
+
return "PubMed";
|
|
149
142
|
}
|
|
150
143
|
|
|
151
|
-
function providerColor(provider: "pubmed"
|
|
152
|
-
return
|
|
144
|
+
function providerColor(provider: "pubmed"): string {
|
|
145
|
+
return "success";
|
|
153
146
|
}
|
|
154
147
|
|
|
155
148
|
export function formatFoundLine(
|
|
@@ -168,7 +161,7 @@ export function formatMergedLine(
|
|
|
168
161
|
theme?: ThemeLike,
|
|
169
162
|
): string {
|
|
170
163
|
const title = truncateText(paper.title, 72);
|
|
171
|
-
const source = color(theme,
|
|
164
|
+
const source = color(theme, "success", `(${paper.source})`);
|
|
172
165
|
return ` ${color(theme, "success", "+")} ${index + 1}. ${title} ${source}`;
|
|
173
166
|
}
|
|
174
167
|
|
|
@@ -237,7 +230,6 @@ type LiteratureResultDetails = {
|
|
|
237
230
|
papers?: PaperRecord[];
|
|
238
231
|
providers?: {
|
|
239
232
|
pubmed?: ProviderSearchSummary;
|
|
240
|
-
semantic_scholar?: ProviderSearchSummary;
|
|
241
233
|
};
|
|
242
234
|
events?: LiteratureSearchDisplayEvent[];
|
|
243
235
|
};
|
|
@@ -250,11 +242,9 @@ type ProviderResultDetails = {
|
|
|
250
242
|
|
|
251
243
|
function renderCollapsedLiteratureResult(details: LiteratureResultDetails, theme?: ThemeLike): string {
|
|
252
244
|
const pubmed = details?.providers?.pubmed;
|
|
253
|
-
const s2 = details?.providers?.semantic_scholar;
|
|
254
245
|
const pubmedText = pubmed?.searched ? `PubMed: ${pubmed.count}` : "PubMed: —";
|
|
255
|
-
const s2Text = s2?.searched ? `S2: ${s2.count}` : "S2: skipped";
|
|
256
246
|
const count = details?.count ?? details?.papers?.length ?? 0;
|
|
257
|
-
return `${color(theme, "success", "✓")} ${color(theme, "toolTitle", "literature_search")} ${color(theme, "success", pubmedText)} |
|
|
247
|
+
return `${color(theme, "success", "✓")} ${color(theme, "toolTitle", "literature_search")} ${color(theme, "success", pubmedText)} | merged: ${count}`;
|
|
258
248
|
}
|
|
259
249
|
|
|
260
250
|
export function renderLiteratureSearchResult(
|
|
@@ -284,7 +274,7 @@ export function renderLiteratureSearchResult(
|
|
|
284
274
|
}
|
|
285
275
|
|
|
286
276
|
export function renderProviderSearchResult(
|
|
287
|
-
provider: "pubmed"
|
|
277
|
+
provider: "pubmed",
|
|
288
278
|
result: ToolRenderResult<ProviderResultDetails>,
|
|
289
279
|
options: RenderOptions,
|
|
290
280
|
theme?: ThemeLike,
|
|
@@ -298,7 +288,7 @@ export function renderProviderSearchResult(
|
|
|
298
288
|
return terminalText(color(theme, "warning", text));
|
|
299
289
|
}
|
|
300
290
|
if (!options.expanded) {
|
|
301
|
-
return terminalText(`${color(theme, "success", "✓")} ${color(theme, "toolTitle",
|
|
291
|
+
return terminalText(`${color(theme, "success", "✓")} ${color(theme, "toolTitle", "pubmed_search")} ${papers.length} papers`);
|
|
302
292
|
}
|
|
303
293
|
const lines = [
|
|
304
294
|
`${color(theme, providerColor(provider), "→")} ${color(theme, providerColor(provider), providerName)} q1: ${query}`,
|
package/src/shared.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
-
import path from "node:path";
|
|
3
1
|
import type { PaperRecord } from "./types.ts";
|
|
4
2
|
|
|
5
3
|
export const USER_AGENT = "research-skills-literature-tools/0.1 (+https://github.com/fbraza/research-skills)";
|
|
@@ -82,22 +80,3 @@ export async function fetchJson<T>(url: string, signal?: AbortSignal, headers?:
|
|
|
82
80
|
export function formatPaperText(papers: PaperRecord[]): string {
|
|
83
81
|
return JSON.stringify(papers, null, 2);
|
|
84
82
|
}
|
|
85
|
-
|
|
86
|
-
export function sanitizeFilename(value: string): string {
|
|
87
|
-
return value.replace(/[^a-z0-9._-]+/gi, "_").replace(/^_+|_+$/g, "") || "paper";
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export async function savePdf(pdfUrl: string, outputDir: string, preferredId: string, signal?: AbortSignal): Promise<string> {
|
|
91
|
-
await mkdir(outputDir, { recursive: true });
|
|
92
|
-
const response = await fetch(pdfUrl, {
|
|
93
|
-
method: "GET",
|
|
94
|
-
signal,
|
|
95
|
-
headers: { "user-agent": USER_AGENT, accept: "application/pdf,*/*" },
|
|
96
|
-
redirect: "follow",
|
|
97
|
-
});
|
|
98
|
-
if (!response.ok) throw new Error(`Failed to download PDF (${response.status})`);
|
|
99
|
-
const bytes = Buffer.from(await response.arrayBuffer());
|
|
100
|
-
const filePath = path.resolve(outputDir, `${sanitizeFilename(preferredId)}.pdf`);
|
|
101
|
-
await writeFile(filePath, bytes);
|
|
102
|
-
return filePath;
|
|
103
|
-
}
|
package/src/types.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
export type PaperRecord = {
|
|
2
2
|
pmid?: string;
|
|
3
3
|
doi?: string;
|
|
4
|
-
s2_id?: string;
|
|
5
4
|
title: string;
|
|
6
5
|
abstract?: string;
|
|
7
6
|
authors?: string[];
|
|
@@ -9,22 +8,10 @@ export type PaperRecord = {
|
|
|
9
8
|
year?: number;
|
|
10
9
|
publication_types?: string[];
|
|
11
10
|
mesh_terms?: string[];
|
|
12
|
-
citation_count?: number;
|
|
13
|
-
tldr?: string;
|
|
14
|
-
open_access_pdf?: string;
|
|
15
|
-
external_ids?: Record<string, string>;
|
|
16
11
|
source?: string;
|
|
17
12
|
sources?: string[];
|
|
18
13
|
date?: string;
|
|
19
14
|
category?: string;
|
|
20
15
|
version?: string;
|
|
21
16
|
license?: string;
|
|
22
|
-
pdf_url?: string;
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
export type FullTextRouteResult = {
|
|
26
|
-
source: string;
|
|
27
|
-
pdf_url?: string;
|
|
28
|
-
access_note: string;
|
|
29
|
-
is_preprint?: boolean;
|
|
30
17
|
};
|