@fbraza/pi-cite 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/rendering.ts CHANGED
@@ -2,7 +2,7 @@ import { Text } from "@earendil-works/pi-tui";
2
2
  import type { PaperRecord } from "./types.ts";
3
3
 
4
4
  export const MAX_STREAMED_PAPERS_PER_QUERY = 5;
5
- export const MAX_FINAL_MERGED_PAPERS = 20;
5
+ export const MAX_EXPANDED_PAPER_PREVIEW = 5;
6
6
 
7
7
  type ThemeLike = {
8
8
  fg?: (color: string, text: string) => string;
@@ -16,37 +16,35 @@ export type CompactPaperForDisplay = {
16
16
  source: string;
17
17
  year?: number;
18
18
  journal?: string;
19
- citation_count?: number;
20
19
  };
21
20
 
22
21
  export type LiteratureSearchDisplayEvent =
23
22
  | { phase: "start" }
24
23
  | {
25
24
  phase: "query_start";
26
- provider: "pubmed" | "semantic_scholar";
25
+ provider: "pubmed";
27
26
  query_index: number;
28
27
  query: string;
29
28
  }
30
29
  | {
31
30
  phase: "query_results";
32
- provider: "pubmed" | "semantic_scholar";
31
+ provider: "pubmed";
33
32
  query_index: number;
34
33
  query: string;
35
34
  count: number;
36
- papers: CompactPaperForDisplay[];
37
35
  }
38
36
  | {
39
37
  phase: "query_error";
40
- provider: "pubmed" | "semantic_scholar";
38
+ provider: "pubmed";
41
39
  query_index: number;
42
40
  query: string;
43
41
  error: string;
44
42
  }
45
43
  | { phase: "dedupe" }
46
- | { phase: "complete"; count: number; papers: CompactPaperForDisplay[] };
44
+ | { phase: "complete"; count: number };
47
45
 
48
46
  export type LiteratureSearchDisplaySearch = {
49
- provider: "pubmed" | "semantic_scholar";
47
+ provider: "pubmed";
50
48
  query_index: number;
51
49
  query: string;
52
50
  count: number;
@@ -107,7 +105,6 @@ export function authorRange(paper: PaperRecord): string {
107
105
  export function paperIdentifier(paper: PaperRecord): string {
108
106
  if (paper.doi) return `DOI:${paper.doi}`;
109
107
  if (paper.pmid) return `PMID:${paper.pmid}`;
110
- if (paper.s2_id) return `S2:${paper.s2_id}`;
111
108
  return "—";
112
109
  }
113
110
 
@@ -120,12 +117,8 @@ export function sourceLabel(paper: PaperRecord): string {
120
117
  .map((source) => source.trim())
121
118
  .filter(Boolean),
122
119
  );
123
- const hasPubmed = sources.has("pubmed");
124
- const hasS2 = sources.has("semantic_scholar");
125
- if (hasPubmed && hasS2) return "PM+S2";
126
- if (hasPubmed) return "PM";
127
- if (hasS2) return "S2";
128
- return paper.source ?? "—";
120
+ if (sources.has("pubmed")) return "PM";
121
+ return "";
129
122
  }
130
123
 
131
124
  export function compactPaperForDisplay(paper: PaperRecord): CompactPaperForDisplay {
@@ -136,7 +129,6 @@ export function compactPaperForDisplay(paper: PaperRecord): CompactPaperForDispl
136
129
  source: sourceLabel(paper),
137
130
  year: paper.year,
138
131
  journal: paper.journal,
139
- citation_count: paper.citation_count,
140
132
  };
141
133
  }
142
134
 
@@ -144,12 +136,16 @@ export function compactPapersForDisplay(papers: PaperRecord[]): CompactPaperForD
144
136
  return papers.map(compactPaperForDisplay);
145
137
  }
146
138
 
147
- function providerLabel(provider: "pubmed" | "semantic_scholar"): string {
148
- return provider === "pubmed" ? "PubMed" : "Semantic Scholar";
139
+ function providerLabel(provider: "pubmed"): string {
140
+ return "PubMed";
141
+ }
142
+
143
+ function providerColor(provider: "pubmed"): string {
144
+ return "success";
149
145
  }
150
146
 
151
- function providerColor(provider: "pubmed" | "semantic_scholar"): string {
152
- return provider === "pubmed" ? "success" : "accent";
147
+ function pluralize(count: number, singular: string, plural = `${singular}s`): string {
148
+ return count === 1 ? singular : plural;
153
149
  }
154
150
 
155
151
  export function formatFoundLine(
@@ -162,60 +158,14 @@ export function formatFoundLine(
162
158
  return ` ${color(theme, "success", "✓ found:")} ${author} ${title} ${color(theme, "muted", id)}`;
163
159
  }
164
160
 
165
- export function formatMergedLine(
161
+ export function formatPaperPreviewLine(
166
162
  paper: CompactPaperForDisplay,
167
163
  index: number,
168
164
  theme?: ThemeLike,
169
165
  ): string {
170
- const title = truncateText(paper.title, 72);
171
- const source = color(theme, paper.source.includes("S2") ? "accent" : "success", `(${paper.source})`);
172
- return ` ${color(theme, "success", "+")} ${index + 1}. ${title} ${source}`;
173
- }
174
-
175
- function renderEvent(
176
- event: LiteratureSearchDisplayEvent,
177
- theme?: ThemeLike,
178
- ): string[] {
179
- if (event.phase === "start") {
180
- return [`${color(theme, "accent", "●")} ${color(theme, "toolTitle", "literature_search")} starting`];
181
- }
182
- if (event.phase === "query_start") {
183
- return [
184
- `${color(theme, providerColor(event.provider), "→")} ${color(theme, providerColor(event.provider), providerLabel(event.provider))} q${event.query_index}: ${event.query}`,
185
- ];
186
- }
187
- if (event.phase === "query_results") {
188
- const lines = event.papers
189
- .slice(0, MAX_STREAMED_PAPERS_PER_QUERY)
190
- .map((paper) => formatFoundLine(paper, theme));
191
- const hidden = event.count - Math.min(event.count, MAX_STREAMED_PAPERS_PER_QUERY);
192
- if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more candidate papers`);
193
- if (event.count === 0) lines.push(` ${color(theme, "muted", "no candidate papers found")}`);
194
- return lines;
195
- }
196
- if (event.phase === "query_error") {
197
- return [
198
- ` ${color(theme, "error", "! failed:")} ${providerLabel(event.provider)} q${event.query_index}: ${truncateText(event.error, 96)}`,
199
- ];
200
- }
201
- if (event.phase === "dedupe") {
202
- return [`${color(theme, "warning", "→")} deduplicating by DOI / PMID / title-year`];
203
- }
204
- const lines = event.papers
205
- .slice(0, MAX_FINAL_MERGED_PAPERS)
206
- .map((paper, index) => formatMergedLine(paper, index, theme));
207
- const hidden = event.count - Math.min(event.count, MAX_FINAL_MERGED_PAPERS);
208
- if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more merged papers`);
209
- lines.push(`${color(theme, "success", "✓")} done: ${event.count} merged papers`);
210
- return lines;
211
- }
212
-
213
- export function renderLiteratureEventTranscript(
214
- events: LiteratureSearchDisplayEvent[] | undefined,
215
- theme?: ThemeLike,
216
- ): string {
217
- if (!events?.length) return "";
218
- return events.flatMap((event) => renderEvent(event, theme)).join("\n");
166
+ const year = paper.year ? ` ${paper.year}` : "";
167
+ const title = truncateText(paper.title, 88);
168
+ return ` ${color(theme, "success", `${index + 1}.`)} ${paper.first_author}${year} ${title}`;
219
169
  }
220
170
 
221
171
  type RenderOptions = { expanded?: boolean; isPartial?: boolean };
@@ -230,6 +180,7 @@ type ToolRenderResult<TDetails> = {
230
180
  type ProviderSearchSummary = {
231
181
  searched?: boolean;
232
182
  count?: number;
183
+ query?: string;
233
184
  };
234
185
 
235
186
  type LiteratureResultDetails = {
@@ -237,7 +188,6 @@ type LiteratureResultDetails = {
237
188
  papers?: PaperRecord[];
238
189
  providers?: {
239
190
  pubmed?: ProviderSearchSummary;
240
- semantic_scholar?: ProviderSearchSummary;
241
191
  };
242
192
  events?: LiteratureSearchDisplayEvent[];
243
193
  };
@@ -249,12 +199,40 @@ type ProviderResultDetails = {
249
199
  };
250
200
 
251
201
  function renderCollapsedLiteratureResult(details: LiteratureResultDetails, theme?: ThemeLike): string {
252
- const pubmed = details?.providers?.pubmed;
253
- const s2 = details?.providers?.semantic_scholar;
254
- const pubmedText = pubmed?.searched ? `PubMed: ${pubmed.count}` : "PubMed: —";
255
- const s2Text = s2?.searched ? `S2: ${s2.count}` : "S2: skipped";
256
- const count = details?.count ?? details?.papers?.length ?? 0;
257
- return `${color(theme, "success", "✓")} ${color(theme, "toolTitle", "literature_search")} ${color(theme, "success", pubmedText)} | ${color(theme, "accent", s2Text)} | merged: ${count}`;
202
+ const count = details.count ?? details.papers?.length ?? details.providers?.pubmed?.count;
203
+ const prefix = `${color(theme, "success", "✓")} ${color(theme, "toolTitle", "literature_search")}`;
204
+ if (count === undefined) return `${prefix} PubMed papers`;
205
+ if (count === 0) return `${prefix} no PubMed papers found`;
206
+ return `${prefix} ${count} PubMed ${pluralize(count, "paper")}`;
207
+ }
208
+
209
+ function renderLiteratureStreamingStatus(details: LiteratureResultDetails, theme?: ThemeLike): string {
210
+ const event = details.events?.at(-1);
211
+ const prefix = `${color(theme, "accent", "●")} ${color(theme, "toolTitle", "literature_search")}`;
212
+ if (!event || event.phase === "start" || event.phase === "query_start" || event.phase === "dedupe") {
213
+ return `${prefix} searching PubMed…`;
214
+ }
215
+ if (event.phase === "query_error") {
216
+ return `${color(theme, "error", "!")} ${color(theme, "toolTitle", "literature_search")} PubMed failed: ${truncateText(event.error, 96)}`;
217
+ }
218
+ const count = event.count;
219
+ if (count === 0) return `${prefix} no PubMed papers found`;
220
+ return `${prefix} found ${count} PubMed ${pluralize(count, "paper")}`;
221
+ }
222
+
223
+ function renderExpandedLiteratureResult(details: LiteratureResultDetails, theme?: ThemeLike): string {
224
+ const papers = compactPapersForDisplay(details.papers ?? []);
225
+ const lines = [renderCollapsedLiteratureResult(details, theme)];
226
+ const query = details.providers?.pubmed?.query;
227
+ if (query) lines.push(`${color(theme, "muted", "query:")} ${truncateText(query, 96)}`);
228
+ lines.push(
229
+ ...papers
230
+ .slice(0, MAX_EXPANDED_PAPER_PREVIEW)
231
+ .map((paper, index) => formatPaperPreviewLine(paper, index, theme)),
232
+ );
233
+ const hidden = papers.length - Math.min(papers.length, MAX_EXPANDED_PAPER_PREVIEW);
234
+ if (hidden > 0) lines.push(` ${color(theme, "dim", "…")} ${hidden} more ${pluralize(hidden, "paper")} in tool result`);
235
+ return lines.join("\n");
258
236
  }
259
237
 
260
238
  export function renderLiteratureSearchResult(
@@ -263,28 +241,17 @@ export function renderLiteratureSearchResult(
263
241
  theme?: ThemeLike,
264
242
  ): Text {
265
243
  const details = result.details ?? {};
266
- const transcript = renderLiteratureEventTranscript(details.events, theme);
267
244
  if (options.isPartial) {
268
- return terminalText(transcript || color(theme, "warning", "Searching literature..."));
245
+ return terminalText(renderLiteratureStreamingStatus(details, theme));
269
246
  }
270
247
  if (!options.expanded) {
271
248
  return terminalText(renderCollapsedLiteratureResult(details, theme));
272
249
  }
273
- if (transcript) return terminalText(transcript);
274
-
275
- const papers = compactPapersForDisplay(details.papers ?? []);
276
- const lines = [
277
- `${color(theme, "accent", "●")} ${color(theme, "toolTitle", "literature_search")} result`,
278
- renderCollapsedLiteratureResult(details, theme),
279
- `${color(theme, "warning", "→")} deduplicating by DOI / PMID / title-year`,
280
- ...papers.slice(0, MAX_FINAL_MERGED_PAPERS).map((paper, index) => formatMergedLine(paper, index, theme)),
281
- `${color(theme, "success", "✓")} done: ${papers.length} merged papers`,
282
- ];
283
- return terminalText(lines.join("\n"));
250
+ return terminalText(renderExpandedLiteratureResult(details, theme));
284
251
  }
285
252
 
286
253
  export function renderProviderSearchResult(
287
- provider: "pubmed" | "semantic_scholar",
254
+ provider: "pubmed",
288
255
  result: ToolRenderResult<ProviderResultDetails>,
289
256
  options: RenderOptions,
290
257
  theme?: ThemeLike,
@@ -298,7 +265,7 @@ export function renderProviderSearchResult(
298
265
  return terminalText(color(theme, "warning", text));
299
266
  }
300
267
  if (!options.expanded) {
301
- return terminalText(`${color(theme, "success", "✓")} ${color(theme, "toolTitle", provider === "pubmed" ? "pubmed_search" : "semantic_scholar_search")} ${papers.length} papers`);
268
+ return terminalText(`${color(theme, "success", "✓")} ${color(theme, "toolTitle", "pubmed_search")} ${papers.length} papers`);
302
269
  }
303
270
  const lines = [
304
271
  `${color(theme, providerColor(provider), "→")} ${color(theme, providerColor(provider), providerName)} q1: ${query}`,
package/src/shared.ts CHANGED
@@ -1,5 +1,3 @@
1
- import { mkdir, writeFile } from "node:fs/promises";
2
- import path from "node:path";
3
1
  import type { PaperRecord } from "./types.ts";
4
2
 
5
3
  export const USER_AGENT = "research-skills-literature-tools/0.1 (+https://github.com/fbraza/research-skills)";
@@ -82,22 +80,3 @@ export async function fetchJson<T>(url: string, signal?: AbortSignal, headers?:
82
80
  export function formatPaperText(papers: PaperRecord[]): string {
83
81
  return JSON.stringify(papers, null, 2);
84
82
  }
85
-
86
- export function sanitizeFilename(value: string): string {
87
- return value.replace(/[^a-z0-9._-]+/gi, "_").replace(/^_+|_+$/g, "") || "paper";
88
- }
89
-
90
- export async function savePdf(pdfUrl: string, outputDir: string, preferredId: string, signal?: AbortSignal): Promise<string> {
91
- await mkdir(outputDir, { recursive: true });
92
- const response = await fetch(pdfUrl, {
93
- method: "GET",
94
- signal,
95
- headers: { "user-agent": USER_AGENT, accept: "application/pdf,*/*" },
96
- redirect: "follow",
97
- });
98
- if (!response.ok) throw new Error(`Failed to download PDF (${response.status})`);
99
- const bytes = Buffer.from(await response.arrayBuffer());
100
- const filePath = path.resolve(outputDir, `${sanitizeFilename(preferredId)}.pdf`);
101
- await writeFile(filePath, bytes);
102
- return filePath;
103
- }
package/src/types.ts CHANGED
@@ -1,7 +1,6 @@
1
1
  export type PaperRecord = {
2
2
  pmid?: string;
3
3
  doi?: string;
4
- s2_id?: string;
5
4
  title: string;
6
5
  abstract?: string;
7
6
  authors?: string[];
@@ -9,22 +8,10 @@ export type PaperRecord = {
9
8
  year?: number;
10
9
  publication_types?: string[];
11
10
  mesh_terms?: string[];
12
- citation_count?: number;
13
- tldr?: string;
14
- open_access_pdf?: string;
15
- external_ids?: Record<string, string>;
16
11
  source?: string;
17
12
  sources?: string[];
18
13
  date?: string;
19
14
  category?: string;
20
15
  version?: string;
21
16
  license?: string;
22
- pdf_url?: string;
23
- };
24
-
25
- export type FullTextRouteResult = {
26
- source: string;
27
- pdf_url?: string;
28
- access_note: string;
29
- is_preprint?: boolean;
30
17
  };
@@ -1,34 +0,0 @@
1
- # Full-Text Access Guide
2
-
3
- **Workflow:** literature
4
- **Purpose:** Retrieve PDFs for prioritised papers using a consistent fallback chain.
5
-
6
- ## Access order
7
-
8
- 1. **PubMed Central (PMC)**
9
- - Preferred for PubMed-indexed papers with open full text.
10
- - Use PubMed/PMC linking first when a PMID is available.
11
-
12
- 2. **Publisher open-access page**
13
- - Resolve DOI at `https://doi.org/<doi>`.
14
- - Look for `citation_pdf_url`, explicit PDF links, or embedded PDF viewers.
15
-
16
- 3. **Sci-Hub fallback**
17
- - Use only as the final fallback after OA routes are exhausted.
18
- - Record that Sci-Hub was used.
19
-
20
- ## Per-paper logging
21
-
22
- For each paper, record:
23
- - PMID
24
- - DOI
25
- - source used: `pmc`, `publisher_oa`, `scihub`, or `not_found`
26
- - direct PDF URL if found
27
- - local saved path if downloaded
28
- - access note
29
-
30
- ## Notes
31
-
32
- - PMC and publisher OA should always be attempted before Sci-Hub.
33
- - If no DOI is known but PMID exists, try resolving identifiers from PubMed metadata first.
34
- - If no PDF is found, keep the paper in the synthesis and note `not_found`.
@@ -1,40 +0,0 @@
1
- # Sci-Hub PDF Resolver — Routine Quick-Reference
2
-
3
- Resolves DOIs to direct PDF URLs via Sci-Hub mirrors. **Always check institutional access and open-access sources first** (PubMed Central, publisher OA). Use Sci-Hub only as a last resort.
4
-
5
- **Script:** `scripts/scihub_pdf_resolver.py` — zero-dependency Python script.
6
-
7
- ## CLI Usage
8
-
9
- ```bash
10
- python scripts/scihub_pdf_resolver.py "10.1038/s41586-024-07000-0"
11
- ```
12
-
13
- ## Output Codes
14
-
15
- | Output | Meaning |
16
- |---|---|
17
- | Prints a URL | Direct PDF link, ready to download |
18
- | `NOT_FOUND` | Sci-Hub does not have this paper. Check for `OA_LINK <url>` for open-access alternatives. |
19
- | `MIRROR_ERROR` | Sci-Hub mirrors could not be reached reliably |
20
- | `INVALID_INPUT` | The DOI is malformed |
21
-
22
- ## Exit Codes
23
-
24
- `0` = found, `1` = not found, `2` = mirror error, `3` = invalid input.
25
-
26
- ## Python API
27
-
28
- ```python
29
- from scripts.scihub_pdf_resolver import resolve_pdf
30
-
31
- status, url = resolve_pdf("10.1038/s41586-024-07000-0")
32
- if status == "FOUND":
33
- print(f"PDF available at: {url}")
34
- elif status == "NOT_FOUND" and url:
35
- print(f"Open-access link: {url}")
36
- ```
37
-
38
- ## Mirror Configuration
39
-
40
- Set `SCIHUB_MIRRORS` environment variable (comma-separated URLs) to override defaults. Defaults: `sci-hub.st`, `sci-hub.ru`, `sci-hub.se`.
@@ -1,50 +0,0 @@
1
- # Semantic Scholar — Routine Quick-Reference
2
-
3
- Quick-start for paper search, paper lookup, and author search. For citation network analysis or bulk queries → consult the full API documentation.
4
-
5
- ## Paper Search
6
-
7
- ```
8
- GET https://api.semanticscholar.org/graph/v1/paper/search
9
- ```
10
-
11
- **Parameters:**
12
- | Parameter | Value |
13
- |---|---|
14
- | `query` | Search terms |
15
- | `limit` | Max results (default 10, max 100) |
16
- | `offset` | Pagination offset |
17
- | `fields` | Comma-separated fields to return |
18
- | `year` | `<YYYY>` or `<YYYY-YYYY>` range |
19
- | `fieldsOfStudy` | Field of study filter |
20
-
21
- **Useful fields:** `paperId`, `title`, `abstract`, `year`, `referenceCount`, `citationCount`, `authors`, `journal`, `publicationTypes`, `tldr`, `openAccessPdf`, `externalIds`
22
-
23
- **Example:**
24
- ```
25
- https://api.semanticscholar.org/graph/v1/paper/search?query=CRISPR+off-target&limit=20&fields=title,abstract,year,citationCount,openAccessPdf
26
- ```
27
-
28
- ## Paper Details (by ID)
29
-
30
- ```
31
- GET https://api.semanticscholar.org/graph/v1/paper/{paper_id}
32
- ```
33
-
34
- `paper_id` accepts: S2 ID, DOI (`DOI:10.xxx`), PMID (`PMID:12345`), ArXiv ID.
35
-
36
- **Example:**
37
- ```
38
- https://api.semanticscholar.org/graph/v1/paper/DOI:10.1038/s41586-024-07000-0?fields=title,abstract,year,citationCount,references,citations
39
- ```
40
-
41
- ## Author Search
42
-
43
- ```
44
- GET https://api.semanticscholar.org/graph/v1/author/search?query=<name>
45
- GET https://api.semanticscholar.org/graph/v1/author/{author_id}/papers
46
- ```
47
-
48
- ## Rate Limits
49
-
50
- 100 requests / 5 min (unauthenticated). Higher limits available with an API key.