@zenalexa/unicli 0.221.0 → 0.221.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +12 -12
- package/README.md +13 -11
- package/README.zh-CN.md +13 -11
- package/dist/adapters/acl-anthology/papers.d.ts +16 -0
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -0
- package/dist/adapters/acl-anthology/papers.js +135 -0
- package/dist/adapters/acl-anthology/papers.js.map +1 -0
- package/dist/adapters/arxiv/papers.js +2 -0
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +5 -0
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/crossref/works.d.ts +42 -0
- package/dist/adapters/crossref/works.d.ts.map +1 -0
- package/dist/adapters/crossref/works.js +157 -0
- package/dist/adapters/crossref/works.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +17 -0
- package/dist/adapters/cvf/papers.d.ts.map +1 -0
- package/dist/adapters/cvf/papers.js +124 -0
- package/dist/adapters/cvf/papers.js.map +1 -0
- package/dist/adapters/dblp/publications.js +4 -0
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/cite.js +1 -0
- package/dist/adapters/google-scholar/cite.js.map +1 -1
- package/dist/adapters/google-scholar/profile.js +5 -0
- package/dist/adapters/google-scholar/profile.js.map +1 -1
- package/dist/adapters/google-scholar/search.js +5 -0
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.js +1 -0
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/neurips/proceedings.d.ts +17 -0
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -0
- package/dist/adapters/neurips/proceedings.js +112 -0
- package/dist/adapters/neurips/proceedings.js.map +1 -0
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +32 -0
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.js +5 -0
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +35 -0
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -0
- package/dist/adapters/pmlr/proceedings.js +139 -0
- package/dist/adapters/pmlr/proceedings.js.map +1 -0
- package/dist/adapters/pubmed/articles.js +5 -0
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/semantic-scholar/papers.d.ts +36 -0
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -0
- package/dist/adapters/semantic-scholar/papers.js +214 -0
- package/dist/adapters/semantic-scholar/papers.js.map +1 -0
- package/dist/adapters/unpaywall/works.d.ts +33 -0
- package/dist/adapters/unpaywall/works.d.ts.map +1 -0
- package/dist/adapters/unpaywall/works.js +101 -0
- package/dist/adapters/unpaywall/works.js.map +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +15 -1
- package/dist/cli.js.map +1 -1
- package/dist/commands/do.d.ts +30 -0
- package/dist/commands/do.d.ts.map +1 -0
- package/dist/commands/do.js +248 -0
- package/dist/commands/do.js.map +1 -0
- package/dist/commands/extract.d.ts +34 -0
- package/dist/commands/extract.d.ts.map +1 -0
- package/dist/commands/extract.js +316 -0
- package/dist/commands/extract.js.map +1 -0
- package/dist/commands/scholar.d.ts +33 -0
- package/dist/commands/scholar.d.ts.map +1 -0
- package/dist/commands/scholar.js +494 -0
- package/dist/commands/scholar.js.map +1 -0
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +2 -5
- package/dist/commands/search.js.map +1 -1
- package/dist/discovery/aliases.d.ts +2 -2
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +182 -11
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/intents.d.ts +10 -0
- package/dist/discovery/intents.d.ts.map +1 -0
- package/dist/discovery/intents.js +255 -0
- package/dist/discovery/intents.js.map +1 -0
- package/dist/discovery/search.d.ts +4 -1
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +28 -140
- package/dist/discovery/search.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +17 -3
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/manifest-compact.txt +13 -11
- package/dist/manifest-search.json +1 -1
- package/dist/manifest.json +462 -68
- package/dist/mcp/handler.d.ts.map +1 -1
- package/dist/mcp/handler.js +14 -2
- package/dist/mcp/handler.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +11 -3
- package/dist/mcp/tools.js.map +1 -1
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +5 -0
- package/dist/registry.js.map +1 -1
- package/dist/types/scholarly.d.ts +49 -0
- package/dist/types/scholarly.d.ts.map +1 -0
- package/dist/types/scholarly.js +16 -0
- package/dist/types/scholarly.js.map +1 -0
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.ts +157 -0
- package/src/adapters/arxiv/download.yaml +1 -1
- package/src/adapters/arxiv/paper.yaml +1 -1
- package/src/adapters/arxiv/papers.ts +2 -0
- package/src/adapters/arxiv/search.yaml +1 -1
- package/src/adapters/arxiv/trending.yaml +1 -1
- package/src/adapters/baidu-scholar/search.ts +5 -0
- package/src/adapters/crossref/works.ts +209 -0
- package/src/adapters/cvf/papers.ts +136 -0
- package/src/adapters/dblp/publications.ts +4 -0
- package/src/adapters/google-scholar/cite.ts +1 -0
- package/src/adapters/google-scholar/profile.ts +5 -0
- package/src/adapters/google-scholar/search.ts +5 -0
- package/src/adapters/hf/paper.test.ts +10 -0
- package/src/adapters/hf/paper.ts +1 -0
- package/src/adapters/hf/top.yaml +1 -1
- package/src/adapters/huggingface-papers/daily.yaml +1 -1
- package/src/adapters/huggingface-papers/search.yaml +1 -1
- package/src/adapters/neurips/proceedings.ts +126 -0
- package/src/adapters/openalex/works.ts +33 -0
- package/src/adapters/openreview/papers.ts +5 -0
- package/src/adapters/pmlr/proceedings.ts +167 -0
- package/src/adapters/pubmed/articles.ts +5 -0
- package/src/adapters/semantic-scholar/papers.ts +268 -0
- package/src/adapters/unpaywall/works.ts +138 -0
- package/src/adapters/zotero/search.yaml +1 -1
|
@@ -303,6 +303,7 @@ cli({
|
|
|
303
303
|
{ name: "limit", type: "int", default: 25, description: "Max results" },
|
|
304
304
|
],
|
|
305
305
|
columns: ["rank", "id", "title", "authors", "venue", "pdate", "url"],
|
|
306
|
+
capabilities: ["http.fetch", "scholar.search", "scholar.review"],
|
|
306
307
|
func: async (_page, kwargs) => {
|
|
307
308
|
const query = String(kwargs.query ?? "").trim();
|
|
308
309
|
if (!query) throw new Error("openreview search query cannot be empty.");
|
|
@@ -363,6 +364,7 @@ cli({
|
|
|
363
364
|
"pdf",
|
|
364
365
|
"url",
|
|
365
366
|
],
|
|
367
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf", "scholar.review"],
|
|
366
368
|
func: async (_page, kwargs) => {
|
|
367
369
|
const id = requireForumId(kwargs.id);
|
|
368
370
|
const notes = notesFromEnvelope(
|
|
@@ -409,6 +411,7 @@ cli({
|
|
|
409
411
|
{ name: "limit", type: "int", default: 50, description: "Max submissions" },
|
|
410
412
|
],
|
|
411
413
|
columns: ["rank", "id", "title", "authors", "venue", "pdate", "url"],
|
|
414
|
+
capabilities: ["http.fetch", "scholar.author", "scholar.search"],
|
|
412
415
|
func: async (_page, kwargs) => {
|
|
413
416
|
const profile = requireProfileId(kwargs.profile);
|
|
414
417
|
const limit = requireOpenReviewLimit(kwargs.limit, 50, 1000);
|
|
@@ -473,6 +476,7 @@ cli({
|
|
|
473
476
|
"pdf",
|
|
474
477
|
"url",
|
|
475
478
|
],
|
|
479
|
+
capabilities: ["http.fetch", "scholar.venue", "scholar.search"],
|
|
476
480
|
func: async (_page, kwargs) => {
|
|
477
481
|
const venue = String(kwargs.venue ?? "").trim();
|
|
478
482
|
if (!venue) throw new Error("openreview venue cannot be empty.");
|
|
@@ -531,6 +535,7 @@ cli({
|
|
|
531
535
|
},
|
|
532
536
|
],
|
|
533
537
|
columns: ["type", "author", "rating", "confidence", "text"],
|
|
538
|
+
capabilities: ["http.fetch", "scholar.review"],
|
|
534
539
|
func: async (_page, kwargs) => {
|
|
535
540
|
const forum = requireForumId(kwargs.forum, "forum");
|
|
536
541
|
const maxLength = coerceOpenReviewInt(
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::pmlr::proceedings
|
|
3
|
+
* @does Registers Proceedings of Machine Learning Research volume search using official citeproc.yaml metadata.
|
|
4
|
+
* @needs proceedings.mlr.press citeproc.yaml files, js-yaml, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.search, scholar.get, scholar.pdf, and scholar.venue
|
|
6
|
+
* @breaks Missing volume metadata or citeproc drift surfaces as explicit adapter errors.
|
|
7
|
+
* @invariants Volume is explicit; rows are filtered locally from official YAML metadata, not scraped from rendered cards.
|
|
8
|
+
* @side-effects HTTPS egress to proceedings.mlr.press only
|
|
9
|
+
* @perf O(N) over one proceedings volume
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import yaml from "js-yaml";
|
|
17
|
+
import { cli, Strategy } from "../../registry.js";
|
|
18
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
19
|
+
|
|
20
|
+
interface PmlrEntry {
|
|
21
|
+
title?: unknown;
|
|
22
|
+
abstract?: unknown;
|
|
23
|
+
URL?: unknown;
|
|
24
|
+
PDF?: unknown;
|
|
25
|
+
"container-title"?: unknown;
|
|
26
|
+
author?: Array<{ given?: unknown; family?: unknown }>;
|
|
27
|
+
id?: unknown;
|
|
28
|
+
issued?: { "date-parts"?: unknown[] };
|
|
29
|
+
volume?: unknown;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function str(value: unknown): string {
|
|
33
|
+
return typeof value === "string" ? value.trim() : "";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function authors(value: PmlrEntry["author"]): string[] | undefined {
|
|
37
|
+
if (!Array.isArray(value)) return undefined;
|
|
38
|
+
const out = value
|
|
39
|
+
.map((person) =>
|
|
40
|
+
[person.given, person.family].map(str).filter(Boolean).join(" "),
|
|
41
|
+
)
|
|
42
|
+
.filter(Boolean);
|
|
43
|
+
return out.length > 0 ? out : undefined;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function issuedYear(entry: PmlrEntry): number | undefined {
|
|
47
|
+
const first = entry.issued?.["date-parts"]?.[0];
|
|
48
|
+
return typeof first === "number" && Number.isFinite(first)
|
|
49
|
+
? first
|
|
50
|
+
: undefined;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function parsePmlrCiteproc(text: string): PmlrEntry[] {
|
|
54
|
+
const parsed = yaml.load(text);
|
|
55
|
+
return Array.isArray(parsed) ? (parsed as PmlrEntry[]) : [];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function mapPmlrEntry(
|
|
59
|
+
entry: PmlrEntry,
|
|
60
|
+
source: string,
|
|
61
|
+
): ScholarlyWorkRecord {
|
|
62
|
+
const id = str(entry.id);
|
|
63
|
+
if (!id) throw new Error("PMLR entry did not include id.");
|
|
64
|
+
return {
|
|
65
|
+
id,
|
|
66
|
+
title: str(entry.title),
|
|
67
|
+
abstract: str(entry.abstract) || undefined,
|
|
68
|
+
authors: authors(entry.author),
|
|
69
|
+
year: issuedYear(entry),
|
|
70
|
+
venue: str(entry["container-title"]) || undefined,
|
|
71
|
+
type: entry.volume ? `pmlr:${String(entry.volume)}` : "pmlr",
|
|
72
|
+
pdf_url: str(entry.PDF) || undefined,
|
|
73
|
+
source_adapter: source,
|
|
74
|
+
source_url: str(entry.URL) || undefined,
|
|
75
|
+
retrieved_at: new Date().toISOString(),
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function requireVolume(value: unknown): string {
|
|
80
|
+
const raw = String(value ?? "")
|
|
81
|
+
.trim()
|
|
82
|
+
.replace(/^v/i, "");
|
|
83
|
+
if (!/^\d+$/.test(raw))
|
|
84
|
+
throw new Error(`pmlr volume "${String(value)}" is not valid.`);
|
|
85
|
+
return raw;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function fetchVolume(volume: string): Promise<PmlrEntry[]> {
|
|
89
|
+
const response = await fetch(
|
|
90
|
+
`https://proceedings.mlr.press/v${volume}/assets/bib/citeproc.yaml`,
|
|
91
|
+
{
|
|
92
|
+
headers: {
|
|
93
|
+
Accept: "application/x-yaml,text/yaml,text/plain",
|
|
94
|
+
"User-Agent": "unicli-pmlr/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
);
|
|
98
|
+
if (response.status === 404)
|
|
99
|
+
throw new Error(`PMLR volume v${volume} returned no metadata.`);
|
|
100
|
+
if (!response.ok)
|
|
101
|
+
throw new Error(`PMLR volume v${volume} returned HTTP ${response.status}.`);
|
|
102
|
+
return parsePmlrCiteproc(await response.text());
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
cli({
|
|
106
|
+
site: "pmlr",
|
|
107
|
+
name: "search",
|
|
108
|
+
description: "Search a PMLR proceedings volume (e.g. v235 for ICML 2024)",
|
|
109
|
+
domain: "proceedings.mlr.press",
|
|
110
|
+
strategy: Strategy.PUBLIC,
|
|
111
|
+
args: [
|
|
112
|
+
{ name: "query", type: "str", required: true, positional: true },
|
|
113
|
+
{ name: "volume", type: "str", default: "235" },
|
|
114
|
+
{ name: "limit", type: "int", default: 20 },
|
|
115
|
+
],
|
|
116
|
+
columns: ["id", "title", "authors", "year", "venue", "pdf_url", "source_url"],
|
|
117
|
+
capabilities: [
|
|
118
|
+
"http.fetch",
|
|
119
|
+
"scholar.search",
|
|
120
|
+
"scholar.venue",
|
|
121
|
+
"scholar.pdf",
|
|
122
|
+
],
|
|
123
|
+
func: async (_page, kwargs) => {
|
|
124
|
+
const query = String(kwargs.query ?? "")
|
|
125
|
+
.trim()
|
|
126
|
+
.toLowerCase();
|
|
127
|
+
if (!query) throw new Error("pmlr search query cannot be empty.");
|
|
128
|
+
const volume = requireVolume(kwargs.volume);
|
|
129
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 200);
|
|
130
|
+
const rows = (await fetchVolume(volume))
|
|
131
|
+
.map((entry) => mapPmlrEntry(entry, "pmlr"))
|
|
132
|
+
.filter((row) =>
|
|
133
|
+
`${row.title} ${row.abstract ?? ""} ${row.authors?.join(" ") ?? ""}`
|
|
134
|
+
.toLowerCase()
|
|
135
|
+
.includes(query),
|
|
136
|
+
)
|
|
137
|
+
.slice(0, limit);
|
|
138
|
+
if (rows.length === 0)
|
|
139
|
+
throw new Error(`No PMLR v${volume} papers matched "${query}".`);
|
|
140
|
+
return rows;
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
cli({
|
|
145
|
+
site: "pmlr",
|
|
146
|
+
name: "paper",
|
|
147
|
+
description: "Fetch a PMLR paper by id inside a proceedings volume",
|
|
148
|
+
domain: "proceedings.mlr.press",
|
|
149
|
+
strategy: Strategy.PUBLIC,
|
|
150
|
+
args: [
|
|
151
|
+
{ name: "id", type: "str", required: true, positional: true },
|
|
152
|
+
{ name: "volume", type: "str", default: "235" },
|
|
153
|
+
],
|
|
154
|
+
columns: ["id", "title", "authors", "year", "venue", "pdf_url", "source_url"],
|
|
155
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
|
|
156
|
+
func: async (_page, kwargs) => {
|
|
157
|
+
const id = String(kwargs.id ?? kwargs.ref ?? "").trim();
|
|
158
|
+
if (!id) throw new Error("pmlr paper id is required.");
|
|
159
|
+
const volume = requireVolume(kwargs.volume);
|
|
160
|
+
const row = (await fetchVolume(volume))
|
|
161
|
+
.map((entry) => mapPmlrEntry(entry, "pmlr"))
|
|
162
|
+
.find((entry) => entry.id === id);
|
|
163
|
+
if (!row)
|
|
164
|
+
throw new Error(`No PMLR v${volume} paper found with id "${id}".`);
|
|
165
|
+
return [row];
|
|
166
|
+
},
|
|
167
|
+
});
|
|
@@ -304,6 +304,7 @@ cli({
|
|
|
304
304
|
{ name: "limit", type: "int", default: 20, description: "Max results" },
|
|
305
305
|
],
|
|
306
306
|
columns: SUMMARY_COLUMNS,
|
|
307
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
307
308
|
func: async (_page, kwargs) => {
|
|
308
309
|
const query = requirePubMedText(kwargs.query, "query");
|
|
309
310
|
const limit = requirePubMedLimit(kwargs.limit);
|
|
@@ -345,6 +346,7 @@ cli({
|
|
|
345
346
|
},
|
|
346
347
|
],
|
|
347
348
|
columns: ["field", "value"],
|
|
349
|
+
capabilities: ["http.fetch", "scholar.get"],
|
|
348
350
|
func: async (_page, kwargs) => {
|
|
349
351
|
const pmid = requirePmid(kwargs.pmid);
|
|
350
352
|
const xml = String(
|
|
@@ -371,6 +373,7 @@ cli({
|
|
|
371
373
|
{ name: "limit", type: "int", default: 20, description: "Max results" },
|
|
372
374
|
],
|
|
373
375
|
columns: SUMMARY_COLUMNS,
|
|
376
|
+
capabilities: ["http.fetch", "scholar.author", "scholar.search"],
|
|
374
377
|
func: async (_page, kwargs) => {
|
|
375
378
|
const name = requirePubMedText(kwargs.name, "author");
|
|
376
379
|
const limit = requirePubMedLimit(kwargs.limit);
|
|
@@ -414,6 +417,7 @@ cli({
|
|
|
414
417
|
{ name: "limit", type: "int", default: 20, description: "Max results" },
|
|
415
418
|
],
|
|
416
419
|
columns: SUMMARY_COLUMNS,
|
|
420
|
+
capabilities: ["http.fetch", "scholar.citations", "scholar.references"],
|
|
417
421
|
func: async (_page, kwargs) => {
|
|
418
422
|
const pmid = requirePmid(kwargs.pmid);
|
|
419
423
|
const direction = requireChoice(
|
|
@@ -459,6 +463,7 @@ cli({
|
|
|
459
463
|
{ name: "limit", type: "int", default: 20, description: "Max results" },
|
|
460
464
|
],
|
|
461
465
|
columns: RELATED_COLUMNS,
|
|
466
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
462
467
|
func: async (_page, kwargs) => {
|
|
463
468
|
const pmid = requirePmid(kwargs.pmid);
|
|
464
469
|
const limit = requirePubMedLimit(kwargs.limit);
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::semantic-scholar::papers
|
|
3
|
+
* @does Registers Semantic Scholar Graph API paper search, detail, citations, references, and PDF discovery commands.
|
|
4
|
+
* @needs api.semanticscholar.org Graph v1, optional SEMANTIC_SCHOLAR_API_KEY, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.* capability tags
|
|
6
|
+
* @breaks Graph API rate limits or response-shape drift surface as explicit adapter errors; no cached fallback is used.
|
|
7
|
+
* @invariants Paper references are normalized to Semantic Scholar's accepted DOI:/ARXIV:/paperId formats; output maps to ScholarlyWorkRecord.
|
|
8
|
+
* @side-effects HTTPS egress to api.semanticscholar.org only
|
|
9
|
+
* @perf O(limit) JSON mapping per command
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
|
|
19
|
+
const API = "https://api.semanticscholar.org/graph/v1";
|
|
20
|
+
const FIELDS = [
|
|
21
|
+
"paperId",
|
|
22
|
+
"title",
|
|
23
|
+
"abstract",
|
|
24
|
+
"year",
|
|
25
|
+
"authors",
|
|
26
|
+
"citationCount",
|
|
27
|
+
"referenceCount",
|
|
28
|
+
"venue",
|
|
29
|
+
"publicationVenue",
|
|
30
|
+
"url",
|
|
31
|
+
"openAccessPdf",
|
|
32
|
+
"externalIds",
|
|
33
|
+
].join(",");
|
|
34
|
+
|
|
35
|
+
interface S2Paper {
|
|
36
|
+
paperId?: unknown;
|
|
37
|
+
title?: unknown;
|
|
38
|
+
abstract?: unknown;
|
|
39
|
+
year?: unknown;
|
|
40
|
+
authors?: Array<{ name?: unknown }>;
|
|
41
|
+
citationCount?: unknown;
|
|
42
|
+
referenceCount?: unknown;
|
|
43
|
+
venue?: unknown;
|
|
44
|
+
url?: unknown;
|
|
45
|
+
openAccessPdf?: { url?: unknown };
|
|
46
|
+
externalIds?: Record<string, unknown>;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function str(value: unknown): string {
|
|
50
|
+
return typeof value === "string" ? value.trim() : "";
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function num(value: unknown): number | undefined {
|
|
54
|
+
return typeof value === "number" && Number.isFinite(value)
|
|
55
|
+
? value
|
|
56
|
+
: undefined;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function bareDoi(value: unknown): string {
|
|
60
|
+
return str(value)
|
|
61
|
+
.replace(/^doi:/i, "")
|
|
62
|
+
.replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function bareArxiv(value: unknown): string {
|
|
66
|
+
return str(value)
|
|
67
|
+
.replace(/^arxiv:/i, "")
|
|
68
|
+
.replace(/^https?:\/\/arxiv\.org\/(?:abs|pdf)\//i, "")
|
|
69
|
+
.replace(/\.pdf$/i, "")
|
|
70
|
+
.replace(/v\d+$/i, "");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export function requireSemanticScholarPaperRef(value: unknown): string {
|
|
74
|
+
const raw = String(value ?? "").trim();
|
|
75
|
+
if (!raw) throw new Error("semantic-scholar paper reference is required.");
|
|
76
|
+
const doi = bareDoi(raw);
|
|
77
|
+
if (/^10\.\S+\/\S+/.test(doi)) return `DOI:${doi}`;
|
|
78
|
+
if (
|
|
79
|
+
/^(?:arxiv:|https?:\/\/arxiv\.org\/(?:abs|pdf)\/|\d{4}\.\d{4,5})/i.test(raw)
|
|
80
|
+
) {
|
|
81
|
+
return `ARXIV:${bareArxiv(raw)}`;
|
|
82
|
+
}
|
|
83
|
+
if (/^[a-f0-9]{40}$/i.test(raw)) return raw;
|
|
84
|
+
throw new Error(
|
|
85
|
+
`semantic-scholar paper reference "${raw}" is not recognised.`,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function headers(): Record<string, string> {
|
|
90
|
+
const out: Record<string, string> = {
|
|
91
|
+
Accept: "application/json",
|
|
92
|
+
"User-Agent":
|
|
93
|
+
"unicli-semantic-scholar/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
94
|
+
};
|
|
95
|
+
const key = process.env.SEMANTIC_SCHOLAR_API_KEY?.trim();
|
|
96
|
+
if (key) out["x-api-key"] = key;
|
|
97
|
+
return out;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function fetchS2(path: string, label: string): Promise<unknown> {
|
|
101
|
+
const response = await fetch(`${API}${path}`, { headers: headers() });
|
|
102
|
+
if (response.status === 404) throw new Error(`${label} returned no result.`);
|
|
103
|
+
if (response.status === 429) {
|
|
104
|
+
throw new Error(
|
|
105
|
+
`${label} returned HTTP 429; set SEMANTIC_SCHOLAR_API_KEY or retry later.`,
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
if (!response.ok)
|
|
109
|
+
throw new Error(`${label} returned HTTP ${response.status}.`);
|
|
110
|
+
const json = (await response.json()) as {
|
|
111
|
+
error?: unknown;
|
|
112
|
+
message?: unknown;
|
|
113
|
+
};
|
|
114
|
+
if (json.error || json.message) {
|
|
115
|
+
throw new Error(
|
|
116
|
+
`${label} returned API error: ${String(json.error ?? json.message)}.`,
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
return json;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function mapSemanticScholarPaper(
|
|
123
|
+
paper: S2Paper,
|
|
124
|
+
source: string,
|
|
125
|
+
): ScholarlyWorkRecord {
|
|
126
|
+
const id = str(paper.paperId);
|
|
127
|
+
if (!id)
|
|
128
|
+
throw new Error("Semantic Scholar returned a paper without paperId.");
|
|
129
|
+
const doi = bareDoi(paper.externalIds?.DOI);
|
|
130
|
+
const arxiv = bareArxiv(paper.externalIds?.ArXiv);
|
|
131
|
+
return {
|
|
132
|
+
id,
|
|
133
|
+
title: str(paper.title),
|
|
134
|
+
abstract: str(paper.abstract) || undefined,
|
|
135
|
+
authors: Array.isArray(paper.authors)
|
|
136
|
+
? paper.authors.map((author) => str(author.name)).filter(Boolean)
|
|
137
|
+
: undefined,
|
|
138
|
+
year: num(paper.year),
|
|
139
|
+
venue: str(paper.venue) || undefined,
|
|
140
|
+
doi: doi || undefined,
|
|
141
|
+
arxiv_id: arxiv || undefined,
|
|
142
|
+
semantic_scholar_id: id,
|
|
143
|
+
cited_by_count: num(paper.citationCount),
|
|
144
|
+
references_count: num(paper.referenceCount),
|
|
145
|
+
pdf_url: str(paper.openAccessPdf?.url) || undefined,
|
|
146
|
+
source_adapter: source,
|
|
147
|
+
source_url: str(paper.url) || `https://www.semanticscholar.org/paper/${id}`,
|
|
148
|
+
retrieved_at: new Date().toISOString(),
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function rows(
|
|
153
|
+
papers: unknown,
|
|
154
|
+
source = "semantic-scholar",
|
|
155
|
+
): ScholarlyWorkRecord[] {
|
|
156
|
+
const list = Array.isArray(papers) ? papers : [];
|
|
157
|
+
return list.map((paper) => mapSemanticScholarPaper(paper as S2Paper, source));
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
cli({
|
|
161
|
+
site: "semantic-scholar",
|
|
162
|
+
name: "search",
|
|
163
|
+
description: "Search Semantic Scholar papers",
|
|
164
|
+
domain: "api.semanticscholar.org",
|
|
165
|
+
strategy: Strategy.PUBLIC,
|
|
166
|
+
args: [
|
|
167
|
+
{ name: "query", type: "str", required: true, positional: true },
|
|
168
|
+
{ name: "limit", type: "int", default: 20 },
|
|
169
|
+
],
|
|
170
|
+
columns: [
|
|
171
|
+
"id",
|
|
172
|
+
"title",
|
|
173
|
+
"authors",
|
|
174
|
+
"year",
|
|
175
|
+
"venue",
|
|
176
|
+
"doi",
|
|
177
|
+
"pdf_url",
|
|
178
|
+
"source_url",
|
|
179
|
+
],
|
|
180
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
181
|
+
func: async (_page, kwargs) => {
|
|
182
|
+
const query = String(kwargs.query ?? "").trim();
|
|
183
|
+
if (!query)
|
|
184
|
+
throw new Error("semantic-scholar search query cannot be empty.");
|
|
185
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 100);
|
|
186
|
+
const body = (await fetchS2(
|
|
187
|
+
`/paper/search?query=${encodeURIComponent(query)}&limit=${limit}&fields=${encodeURIComponent(FIELDS)}`,
|
|
188
|
+
"semantic-scholar search",
|
|
189
|
+
)) as { data?: S2Paper[] };
|
|
190
|
+
const out = rows(body.data);
|
|
191
|
+
if (out.length === 0)
|
|
192
|
+
throw new Error(`No Semantic Scholar papers matched "${query}".`);
|
|
193
|
+
return out;
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
cli({
|
|
198
|
+
site: "semantic-scholar",
|
|
199
|
+
name: "paper",
|
|
200
|
+
description: "Fetch one Semantic Scholar paper by paperId, DOI, or arXiv id",
|
|
201
|
+
domain: "api.semanticscholar.org",
|
|
202
|
+
strategy: Strategy.PUBLIC,
|
|
203
|
+
args: [{ name: "id", type: "str", required: true, positional: true }],
|
|
204
|
+
columns: [
|
|
205
|
+
"id",
|
|
206
|
+
"title",
|
|
207
|
+
"authors",
|
|
208
|
+
"year",
|
|
209
|
+
"venue",
|
|
210
|
+
"doi",
|
|
211
|
+
"pdf_url",
|
|
212
|
+
"source_url",
|
|
213
|
+
],
|
|
214
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
|
|
215
|
+
func: async (_page, kwargs) => {
|
|
216
|
+
const ref = requireSemanticScholarPaperRef(
|
|
217
|
+
kwargs.id ?? kwargs.ref ?? kwargs.doi ?? kwargs.arxiv_id,
|
|
218
|
+
);
|
|
219
|
+
const paper = (await fetchS2(
|
|
220
|
+
`/paper/${encodeURIComponent(ref)}?fields=${encodeURIComponent(FIELDS)}`,
|
|
221
|
+
`semantic-scholar paper ${ref}`,
|
|
222
|
+
)) as S2Paper;
|
|
223
|
+
return [mapSemanticScholarPaper(paper, "semantic-scholar")];
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
for (const [name, path, cap] of [
|
|
228
|
+
["citations", "citations", "scholar.citations"],
|
|
229
|
+
["references", "references", "scholar.references"],
|
|
230
|
+
] as const) {
|
|
231
|
+
cli({
|
|
232
|
+
site: "semantic-scholar",
|
|
233
|
+
name,
|
|
234
|
+
description: `List Semantic Scholar paper ${name}`,
|
|
235
|
+
domain: "api.semanticscholar.org",
|
|
236
|
+
strategy: Strategy.PUBLIC,
|
|
237
|
+
args: [
|
|
238
|
+
{ name: "id", type: "str", required: true, positional: true },
|
|
239
|
+
{ name: "limit", type: "int", default: 20 },
|
|
240
|
+
],
|
|
241
|
+
columns: [
|
|
242
|
+
"id",
|
|
243
|
+
"title",
|
|
244
|
+
"authors",
|
|
245
|
+
"year",
|
|
246
|
+
"venue",
|
|
247
|
+
"doi",
|
|
248
|
+
"pdf_url",
|
|
249
|
+
"source_url",
|
|
250
|
+
],
|
|
251
|
+
capabilities: ["http.fetch", cap],
|
|
252
|
+
func: async (_page, kwargs) => {
|
|
253
|
+
const ref = requireSemanticScholarPaperRef(kwargs.id ?? kwargs.ref);
|
|
254
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 100);
|
|
255
|
+
const body = (await fetchS2(
|
|
256
|
+
`/paper/${encodeURIComponent(ref)}/${path}?limit=${limit}&fields=${encodeURIComponent(FIELDS)}`,
|
|
257
|
+
`semantic-scholar ${name} ${ref}`,
|
|
258
|
+
)) as { data?: Array<{ citingPaper?: S2Paper; citedPaper?: S2Paper }> };
|
|
259
|
+
const papers = (body.data ?? []).map((item) =>
|
|
260
|
+
name === "citations" ? item.citingPaper : item.citedPaper,
|
|
261
|
+
);
|
|
262
|
+
const out = rows(papers);
|
|
263
|
+
if (out.length === 0)
|
|
264
|
+
throw new Error(`No Semantic Scholar ${name} found for ${ref}.`);
|
|
265
|
+
return out;
|
|
266
|
+
},
|
|
267
|
+
});
|
|
268
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::unpaywall::works
|
|
3
|
+
* @does Registers Unpaywall DOI open-access lookup for PDF availability.
|
|
4
|
+
* @needs api.unpaywall.org v2, UNPAYWALL_EMAIL or --email, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.pdf and scholar.get
|
|
6
|
+
* @breaks Missing email is an explicit invalid-input error; Unpaywall drift surfaces as adapter error, never as a fabricated PDF.
|
|
7
|
+
* @invariants Only DOI-shaped references are accepted; best_oa_location is preferred for PDF and landing URLs.
|
|
8
|
+
* @side-effects HTTPS egress to api.unpaywall.org only
|
|
9
|
+
* @perf O(1) per DOI
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
|
|
19
|
+
const API = "https://api.unpaywall.org/v2";
|
|
20
|
+
|
|
21
|
+
interface OaLocation {
|
|
22
|
+
url_for_pdf?: unknown;
|
|
23
|
+
url_for_landing_page?: unknown;
|
|
24
|
+
host_type?: unknown;
|
|
25
|
+
version?: unknown;
|
|
26
|
+
license?: unknown;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface UnpaywallWork {
|
|
30
|
+
doi?: unknown;
|
|
31
|
+
title?: unknown;
|
|
32
|
+
is_oa?: unknown;
|
|
33
|
+
oa_status?: unknown;
|
|
34
|
+
best_oa_location?: OaLocation | null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function str(value: unknown): string {
|
|
38
|
+
return typeof value === "string" ? value.trim() : "";
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function bareDoi(value: unknown): string {
|
|
42
|
+
return str(value)
|
|
43
|
+
.replace(/^doi:/i, "")
|
|
44
|
+
.replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function requireUnpaywallDoi(value: unknown): string {
|
|
48
|
+
const doi = bareDoi(value);
|
|
49
|
+
if (!/^10\.\S+\/\S+/.test(doi)) {
|
|
50
|
+
throw new Error(
|
|
51
|
+
`unpaywall DOI "${String(value ?? "")}" is not recognised.`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
return doi;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function requireEmail(value: unknown): string {
|
|
58
|
+
const email = str(value) || process.env.UNPAYWALL_EMAIL?.trim() || "";
|
|
59
|
+
if (!/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email)) {
|
|
60
|
+
throw new Error("unpaywall lookup requires --email or UNPAYWALL_EMAIL.");
|
|
61
|
+
}
|
|
62
|
+
return email;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function mapUnpaywallWork(
|
|
66
|
+
work: UnpaywallWork,
|
|
67
|
+
source: string,
|
|
68
|
+
): ScholarlyWorkRecord {
|
|
69
|
+
const doi = requireUnpaywallDoi(work.doi);
|
|
70
|
+
const best = work.best_oa_location ?? {};
|
|
71
|
+
return {
|
|
72
|
+
id: doi,
|
|
73
|
+
title: str(work.title),
|
|
74
|
+
doi,
|
|
75
|
+
is_open_access: work.is_oa === true,
|
|
76
|
+
oa_status: str(work.oa_status) || undefined,
|
|
77
|
+
pdf_url: str(best.url_for_pdf) || undefined,
|
|
78
|
+
landing_url: str(best.url_for_landing_page) || `https://doi.org/${doi}`,
|
|
79
|
+
type:
|
|
80
|
+
[str(best.host_type), str(best.version), str(best.license)]
|
|
81
|
+
.filter(Boolean)
|
|
82
|
+
.join(":") || undefined,
|
|
83
|
+
source_adapter: source,
|
|
84
|
+
source_url: str(best.url_for_landing_page) || `https://doi.org/${doi}`,
|
|
85
|
+
retrieved_at: new Date().toISOString(),
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function fetchUnpaywall(
|
|
90
|
+
doi: string,
|
|
91
|
+
email: string,
|
|
92
|
+
): Promise<UnpaywallWork> {
|
|
93
|
+
const response = await fetch(
|
|
94
|
+
`${API}/${encodeURIComponent(doi)}?email=${encodeURIComponent(email)}`,
|
|
95
|
+
{
|
|
96
|
+
headers: {
|
|
97
|
+
Accept: "application/json",
|
|
98
|
+
"User-Agent":
|
|
99
|
+
"unicli-unpaywall/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
);
|
|
103
|
+
if (response.status === 404)
|
|
104
|
+
throw new Error(`Unpaywall returned no result for ${doi}.`);
|
|
105
|
+
if (response.status === 422)
|
|
106
|
+
throw new Error("Unpaywall rejected the email parameter.");
|
|
107
|
+
if (response.status === 429) throw new Error("Unpaywall returned HTTP 429.");
|
|
108
|
+
if (!response.ok)
|
|
109
|
+
throw new Error(`Unpaywall returned HTTP ${response.status}.`);
|
|
110
|
+
return response.json() as Promise<UnpaywallWork>;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
cli({
|
|
114
|
+
site: "unpaywall",
|
|
115
|
+
name: "oa",
|
|
116
|
+
description: "Find open-access PDF availability for a DOI via Unpaywall",
|
|
117
|
+
domain: "api.unpaywall.org",
|
|
118
|
+
strategy: Strategy.PUBLIC,
|
|
119
|
+
args: [
|
|
120
|
+
{ name: "doi", type: "str", required: true, positional: true },
|
|
121
|
+
{ name: "email", type: "str", description: "Unpaywall requester email" },
|
|
122
|
+
],
|
|
123
|
+
columns: [
|
|
124
|
+
"id",
|
|
125
|
+
"title",
|
|
126
|
+
"doi",
|
|
127
|
+
"is_open_access",
|
|
128
|
+
"oa_status",
|
|
129
|
+
"pdf_url",
|
|
130
|
+
"source_url",
|
|
131
|
+
],
|
|
132
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
|
|
133
|
+
func: async (_page, kwargs) => {
|
|
134
|
+
const doi = requireUnpaywallDoi(kwargs.doi ?? kwargs.id ?? kwargs.ref);
|
|
135
|
+
const email = requireEmail(kwargs.email);
|
|
136
|
+
return [mapUnpaywallWork(await fetchUnpaywall(doi, email), "unpaywall")];
|
|
137
|
+
},
|
|
138
|
+
});
|
|
@@ -38,7 +38,7 @@ pipeline:
|
|
|
38
38
|
columns: [key, title, type, date, creators]
|
|
39
39
|
|
|
40
40
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
41
|
-
capabilities: ["http.fetch"]
|
|
41
|
+
capabilities: ["http.fetch", "scholar.search"]
|
|
42
42
|
minimum_capability: http.fetch
|
|
43
43
|
trust: public
|
|
44
44
|
confidentiality: public
|