@zenalexa/unicli 0.221.0 → 0.221.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +12 -12
- package/README.md +13 -11
- package/README.zh-CN.md +13 -11
- package/dist/adapters/acl-anthology/papers.d.ts +16 -0
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -0
- package/dist/adapters/acl-anthology/papers.js +135 -0
- package/dist/adapters/acl-anthology/papers.js.map +1 -0
- package/dist/adapters/arxiv/papers.js +2 -0
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +5 -0
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/crossref/works.d.ts +42 -0
- package/dist/adapters/crossref/works.d.ts.map +1 -0
- package/dist/adapters/crossref/works.js +157 -0
- package/dist/adapters/crossref/works.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +17 -0
- package/dist/adapters/cvf/papers.d.ts.map +1 -0
- package/dist/adapters/cvf/papers.js +124 -0
- package/dist/adapters/cvf/papers.js.map +1 -0
- package/dist/adapters/dblp/publications.js +4 -0
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/cite.js +1 -0
- package/dist/adapters/google-scholar/cite.js.map +1 -1
- package/dist/adapters/google-scholar/profile.js +5 -0
- package/dist/adapters/google-scholar/profile.js.map +1 -1
- package/dist/adapters/google-scholar/search.js +5 -0
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.js +1 -0
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/neurips/proceedings.d.ts +17 -0
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -0
- package/dist/adapters/neurips/proceedings.js +112 -0
- package/dist/adapters/neurips/proceedings.js.map +1 -0
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +32 -0
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.js +5 -0
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +35 -0
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -0
- package/dist/adapters/pmlr/proceedings.js +139 -0
- package/dist/adapters/pmlr/proceedings.js.map +1 -0
- package/dist/adapters/pubmed/articles.js +5 -0
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/semantic-scholar/papers.d.ts +36 -0
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -0
- package/dist/adapters/semantic-scholar/papers.js +214 -0
- package/dist/adapters/semantic-scholar/papers.js.map +1 -0
- package/dist/adapters/unpaywall/works.d.ts +33 -0
- package/dist/adapters/unpaywall/works.d.ts.map +1 -0
- package/dist/adapters/unpaywall/works.js +101 -0
- package/dist/adapters/unpaywall/works.js.map +1 -0
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +15 -1
- package/dist/cli.js.map +1 -1
- package/dist/commands/do.d.ts +30 -0
- package/dist/commands/do.d.ts.map +1 -0
- package/dist/commands/do.js +248 -0
- package/dist/commands/do.js.map +1 -0
- package/dist/commands/extract.d.ts +34 -0
- package/dist/commands/extract.d.ts.map +1 -0
- package/dist/commands/extract.js +316 -0
- package/dist/commands/extract.js.map +1 -0
- package/dist/commands/scholar.d.ts +33 -0
- package/dist/commands/scholar.d.ts.map +1 -0
- package/dist/commands/scholar.js +494 -0
- package/dist/commands/scholar.js.map +1 -0
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +2 -5
- package/dist/commands/search.js.map +1 -1
- package/dist/discovery/aliases.d.ts +2 -2
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +182 -11
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/intents.d.ts +10 -0
- package/dist/discovery/intents.d.ts.map +1 -0
- package/dist/discovery/intents.js +255 -0
- package/dist/discovery/intents.js.map +1 -0
- package/dist/discovery/search.d.ts +4 -1
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +28 -140
- package/dist/discovery/search.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +17 -3
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/manifest-compact.txt +13 -11
- package/dist/manifest-search.json +1 -1
- package/dist/manifest.json +462 -68
- package/dist/mcp/handler.d.ts.map +1 -1
- package/dist/mcp/handler.js +14 -2
- package/dist/mcp/handler.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +11 -3
- package/dist/mcp/tools.js.map +1 -1
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +5 -0
- package/dist/registry.js.map +1 -1
- package/dist/types/scholarly.d.ts +49 -0
- package/dist/types/scholarly.d.ts.map +1 -0
- package/dist/types/scholarly.js +16 -0
- package/dist/types/scholarly.js.map +1 -0
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.ts +157 -0
- package/src/adapters/arxiv/download.yaml +1 -1
- package/src/adapters/arxiv/paper.yaml +1 -1
- package/src/adapters/arxiv/papers.ts +2 -0
- package/src/adapters/arxiv/search.yaml +1 -1
- package/src/adapters/arxiv/trending.yaml +1 -1
- package/src/adapters/baidu-scholar/search.ts +5 -0
- package/src/adapters/crossref/works.ts +209 -0
- package/src/adapters/cvf/papers.ts +136 -0
- package/src/adapters/dblp/publications.ts +4 -0
- package/src/adapters/google-scholar/cite.ts +1 -0
- package/src/adapters/google-scholar/profile.ts +5 -0
- package/src/adapters/google-scholar/search.ts +5 -0
- package/src/adapters/hf/paper.test.ts +10 -0
- package/src/adapters/hf/paper.ts +1 -0
- package/src/adapters/hf/top.yaml +1 -1
- package/src/adapters/huggingface-papers/daily.yaml +1 -1
- package/src/adapters/huggingface-papers/search.yaml +1 -1
- package/src/adapters/neurips/proceedings.ts +126 -0
- package/src/adapters/openalex/works.ts +33 -0
- package/src/adapters/openreview/papers.ts +5 -0
- package/src/adapters/pmlr/proceedings.ts +167 -0
- package/src/adapters/pubmed/articles.ts +5 -0
- package/src/adapters/semantic-scholar/papers.ts +268 -0
- package/src/adapters/unpaywall/works.ts +138 -0
- package/src/adapters/zotero/search.yaml +1 -1
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::crossref::works
|
|
3
|
+
* @does Registers Crossref REST work search and DOI lookup commands for publisher metadata.
|
|
4
|
+
* @needs api.crossref.org REST API, optional CROSSREF_MAILTO, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.search and scholar.get
|
|
6
|
+
* @breaks Crossref response-shape drift or rate limiting surfaces as explicit adapter errors.
|
|
7
|
+
* @invariants DOI lookup accepts only DOI-shaped references; output maps to ScholarlyWorkRecord.
|
|
8
|
+
* @side-effects HTTPS egress to api.crossref.org only
|
|
9
|
+
* @perf O(limit) JSON mapping
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
|
|
19
|
+
const API = "https://api.crossref.org";
|
|
20
|
+
|
|
21
|
+
interface CrossrefPerson {
|
|
22
|
+
given?: unknown;
|
|
23
|
+
family?: unknown;
|
|
24
|
+
name?: unknown;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface CrossrefItem {
|
|
28
|
+
DOI?: unknown;
|
|
29
|
+
title?: unknown[];
|
|
30
|
+
subtitle?: unknown[];
|
|
31
|
+
author?: CrossrefPerson[];
|
|
32
|
+
"container-title"?: unknown[];
|
|
33
|
+
issued?: { "date-parts"?: unknown[][] };
|
|
34
|
+
published?: { "date-parts"?: unknown[][] };
|
|
35
|
+
"is-referenced-by-count"?: unknown;
|
|
36
|
+
reference?: unknown[];
|
|
37
|
+
URL?: unknown;
|
|
38
|
+
type?: unknown;
|
|
39
|
+
abstract?: unknown;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function str(value: unknown): string {
|
|
43
|
+
return typeof value === "string" ? value.trim() : "";
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function arrFirst(value: unknown): string {
|
|
47
|
+
return Array.isArray(value) ? str(value[0]) : str(value);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function num(value: unknown): number | undefined {
|
|
51
|
+
return typeof value === "number" && Number.isFinite(value)
|
|
52
|
+
? value
|
|
53
|
+
: undefined;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function dateParts(item: CrossrefItem): unknown[] {
|
|
57
|
+
return (
|
|
58
|
+
item.issued?.["date-parts"]?.[0] ??
|
|
59
|
+
item.published?.["date-parts"]?.[0] ??
|
|
60
|
+
[]
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function year(item: CrossrefItem): number | undefined {
|
|
65
|
+
const first = dateParts(item)[0];
|
|
66
|
+
return typeof first === "number" && Number.isFinite(first)
|
|
67
|
+
? first
|
|
68
|
+
: undefined;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function date(item: CrossrefItem): string | undefined {
|
|
72
|
+
const parts = dateParts(item).filter(
|
|
73
|
+
(part): part is number => typeof part === "number",
|
|
74
|
+
);
|
|
75
|
+
if (parts.length === 0) return undefined;
|
|
76
|
+
return [
|
|
77
|
+
String(parts[0]).padStart(4, "0"),
|
|
78
|
+
String(parts[1] ?? 1).padStart(2, "0"),
|
|
79
|
+
String(parts[2] ?? 1).padStart(2, "0"),
|
|
80
|
+
].join("-");
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function authors(value: CrossrefPerson[] | undefined): string[] | undefined {
|
|
84
|
+
if (!Array.isArray(value)) return undefined;
|
|
85
|
+
const out = value
|
|
86
|
+
.map(
|
|
87
|
+
(person) =>
|
|
88
|
+
str(person.name) ||
|
|
89
|
+
[person.given, person.family].map(str).filter(Boolean).join(" "),
|
|
90
|
+
)
|
|
91
|
+
.filter(Boolean);
|
|
92
|
+
return out.length > 0 ? out : undefined;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function bareDoi(value: unknown): string {
|
|
96
|
+
return str(value)
|
|
97
|
+
.replace(/^doi:/i, "")
|
|
98
|
+
.replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, "");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function requireCrossrefDoi(value: unknown): string {
|
|
102
|
+
const doi = bareDoi(value);
|
|
103
|
+
if (!/^10\.\S+\/\S+/.test(doi)) {
|
|
104
|
+
throw new Error(`crossref DOI "${String(value ?? "")}" is not recognised.`);
|
|
105
|
+
}
|
|
106
|
+
return doi;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function maybeMailto(params: URLSearchParams): void {
|
|
110
|
+
const mailto = process.env.CROSSREF_MAILTO?.trim();
|
|
111
|
+
if (mailto) params.set("mailto", mailto);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async function fetchCrossref(path: string, label: string): Promise<unknown> {
|
|
115
|
+
const response = await fetch(`${API}${path}`, {
|
|
116
|
+
headers: {
|
|
117
|
+
Accept: "application/json",
|
|
118
|
+
"User-Agent":
|
|
119
|
+
"unicli-crossref/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
if (response.status === 404) throw new Error(`${label} returned no result.`);
|
|
123
|
+
if (response.status === 429) throw new Error(`${label} returned HTTP 429.`);
|
|
124
|
+
if (!response.ok)
|
|
125
|
+
throw new Error(`${label} returned HTTP ${response.status}.`);
|
|
126
|
+
return response.json();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export function mapCrossrefItem(
|
|
130
|
+
item: CrossrefItem,
|
|
131
|
+
source: string,
|
|
132
|
+
): ScholarlyWorkRecord {
|
|
133
|
+
const doi = requireCrossrefDoi(item.DOI);
|
|
134
|
+
return {
|
|
135
|
+
id: doi,
|
|
136
|
+
title: arrFirst(item.title),
|
|
137
|
+
authors: authors(item.author),
|
|
138
|
+
year: year(item),
|
|
139
|
+
date: date(item),
|
|
140
|
+
venue: arrFirst(item["container-title"]) || undefined,
|
|
141
|
+
type: str(item.type) || undefined,
|
|
142
|
+
abstract: str(item.abstract).replace(/<[^>]+>/g, " ") || undefined,
|
|
143
|
+
doi,
|
|
144
|
+
cited_by_count: num(item["is-referenced-by-count"]),
|
|
145
|
+
references_count: Array.isArray(item.reference)
|
|
146
|
+
? item.reference.length
|
|
147
|
+
: undefined,
|
|
148
|
+
source_adapter: source,
|
|
149
|
+
source_url: str(item.URL) || `https://doi.org/${doi}`,
|
|
150
|
+
retrieved_at: new Date().toISOString(),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
cli({
|
|
155
|
+
site: "crossref",
|
|
156
|
+
name: "search",
|
|
157
|
+
description:
|
|
158
|
+
"Search Crossref Works by title, author, DOI, or bibliographic text",
|
|
159
|
+
domain: "api.crossref.org",
|
|
160
|
+
strategy: Strategy.PUBLIC,
|
|
161
|
+
args: [
|
|
162
|
+
{ name: "query", type: "str", required: true, positional: true },
|
|
163
|
+
{ name: "limit", type: "int", default: 20 },
|
|
164
|
+
],
|
|
165
|
+
columns: ["id", "title", "authors", "year", "venue", "doi", "source_url"],
|
|
166
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
167
|
+
func: async (_page, kwargs) => {
|
|
168
|
+
const query = String(kwargs.query ?? "").trim();
|
|
169
|
+
if (!query) throw new Error("crossref search query cannot be empty.");
|
|
170
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 100);
|
|
171
|
+
const params = new URLSearchParams({ query, rows: String(limit) });
|
|
172
|
+
maybeMailto(params);
|
|
173
|
+
const body = (await fetchCrossref(
|
|
174
|
+
`/works?${params.toString()}`,
|
|
175
|
+
"crossref search",
|
|
176
|
+
)) as {
|
|
177
|
+
message?: { items?: CrossrefItem[] };
|
|
178
|
+
};
|
|
179
|
+
const rows = (body.message?.items ?? []).map((item) =>
|
|
180
|
+
mapCrossrefItem(item, "crossref"),
|
|
181
|
+
);
|
|
182
|
+
if (rows.length === 0)
|
|
183
|
+
throw new Error(`No Crossref works matched "${query}".`);
|
|
184
|
+
return rows;
|
|
185
|
+
},
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
cli({
|
|
189
|
+
site: "crossref",
|
|
190
|
+
name: "work",
|
|
191
|
+
description: "Fetch one Crossref Work by DOI",
|
|
192
|
+
domain: "api.crossref.org",
|
|
193
|
+
strategy: Strategy.PUBLIC,
|
|
194
|
+
args: [{ name: "doi", type: "str", required: true, positional: true }],
|
|
195
|
+
columns: ["id", "title", "authors", "year", "venue", "doi", "source_url"],
|
|
196
|
+
capabilities: ["http.fetch", "scholar.get"],
|
|
197
|
+
func: async (_page, kwargs) => {
|
|
198
|
+
const doi = requireCrossrefDoi(kwargs.doi ?? kwargs.id ?? kwargs.ref);
|
|
199
|
+
const params = new URLSearchParams();
|
|
200
|
+
maybeMailto(params);
|
|
201
|
+
const suffix = params.size > 0 ? `?${params.toString()}` : "";
|
|
202
|
+
const body = (await fetchCrossref(
|
|
203
|
+
`/works/${encodeURIComponent(doi)}${suffix}`,
|
|
204
|
+
`crossref work ${doi}`,
|
|
205
|
+
)) as { message?: CrossrefItem };
|
|
206
|
+
if (!body.message) throw new Error(`Crossref returned no work for ${doi}.`);
|
|
207
|
+
return [mapCrossrefItem(body.message, "crossref")];
|
|
208
|
+
},
|
|
209
|
+
});
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::cvf::papers
|
|
3
|
+
* @does Registers CVF OpenAccess conference paper search for CVPR/ICCV/ECCV-style proceedings pages.
|
|
4
|
+
* @needs openaccess.thecvf.com static proceedings HTML, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.search, scholar.pdf, and scholar.venue
|
|
6
|
+
* @breaks CVF markup drift surfaces as empty/parse errors rather than non-CVF fallbacks.
|
|
7
|
+
* @invariants Venue/year map to explicit CVF event pages; PDF URLs are absolutized against openaccess.thecvf.com.
|
|
8
|
+
* @side-effects HTTPS egress to openaccess.thecvf.com only
|
|
9
|
+
* @perf O(N) over one proceedings HTML page
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
|
|
19
|
+
const ORIGIN = "https://openaccess.thecvf.com";
|
|
20
|
+
|
|
21
|
+
function decode(value: string): string {
|
|
22
|
+
return value
|
|
23
|
+
.replace(/&/g, "&")
|
|
24
|
+
.replace(/</g, "<")
|
|
25
|
+
.replace(/>/g, ">")
|
|
26
|
+
.replace(/"/g, '"')
|
|
27
|
+
.replace(/'/g, "'")
|
|
28
|
+
.replace(/\s+/g, " ")
|
|
29
|
+
.trim();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function absolute(path: string): string {
|
|
33
|
+
return /^https?:\/\//i.test(path)
|
|
34
|
+
? path
|
|
35
|
+
: `${ORIGIN}${path.startsWith("/") ? "" : "/"}${path}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function eventId(venue: unknown, year: unknown): string {
|
|
39
|
+
const v = String(venue ?? "CVPR")
|
|
40
|
+
.trim()
|
|
41
|
+
.toUpperCase();
|
|
42
|
+
const y = String(year ?? "").trim();
|
|
43
|
+
if (!/^(CVPR|ICCV|ECCV|WACV)$/.test(v))
|
|
44
|
+
throw new Error(`unsupported CVF venue: ${v}`);
|
|
45
|
+
if (!/^\d{4}$/.test(y)) throw new Error(`cvf year "${y}" is not valid.`);
|
|
46
|
+
return `${v}${y}`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function parseCvfRows(
|
|
50
|
+
html: string,
|
|
51
|
+
event = "CVPR2024",
|
|
52
|
+
): ScholarlyWorkRecord[] {
|
|
53
|
+
const out: ScholarlyWorkRecord[] = [];
|
|
54
|
+
const re =
|
|
55
|
+
/<dt class="ptitle">[\s\S]*?<a href="([^"]+)">([\s\S]*?)<\/a><\/dt>([\s\S]*?)(?=<dt class="ptitle">|$)/g;
|
|
56
|
+
let match: RegExpExecArray | null;
|
|
57
|
+
while ((match = re.exec(html)) !== null) {
|
|
58
|
+
const sourceUrl = absolute(match[1]);
|
|
59
|
+
const title = decode(match[2].replace(/<[^>]+>/g, " "));
|
|
60
|
+
const block = match[3];
|
|
61
|
+
const pdf = block.match(/<a href="([^"]+\.pdf)">pdf<\/a>/i)?.[1] ?? "";
|
|
62
|
+
const authorText = block
|
|
63
|
+
.replace(/\[[\s\S]*?\]/g, " ")
|
|
64
|
+
.replace(/<form[\s\S]*?<\/form>/g, " ")
|
|
65
|
+
.replace(/<[^>]+>/g, " ");
|
|
66
|
+
const authors = decode(authorText)
|
|
67
|
+
.split(",")
|
|
68
|
+
.map((author) => author.trim())
|
|
69
|
+
.filter(Boolean);
|
|
70
|
+
out.push({
|
|
71
|
+
id:
|
|
72
|
+
sourceUrl
|
|
73
|
+
.split("/")
|
|
74
|
+
.pop()
|
|
75
|
+
?.replace(/\.html$/, "") ?? title,
|
|
76
|
+
title,
|
|
77
|
+
authors: authors.length > 0 ? authors : undefined,
|
|
78
|
+
year: Number(event.slice(-4)),
|
|
79
|
+
venue: event.replace(/\d{4}$/, ""),
|
|
80
|
+
pdf_url: pdf ? absolute(pdf) : undefined,
|
|
81
|
+
source_adapter: "cvf",
|
|
82
|
+
source_url: sourceUrl,
|
|
83
|
+
retrieved_at: new Date().toISOString(),
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
return out;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
cli({
|
|
90
|
+
site: "cvf",
|
|
91
|
+
name: "search",
|
|
92
|
+
description: "Search CVF OpenAccess proceedings (CVPR/ICCV/ECCV/WACV)",
|
|
93
|
+
domain: "openaccess.thecvf.com",
|
|
94
|
+
strategy: Strategy.PUBLIC,
|
|
95
|
+
args: [
|
|
96
|
+
{ name: "query", type: "str", required: true, positional: true },
|
|
97
|
+
{ name: "venue", type: "str", default: "CVPR" },
|
|
98
|
+
{ name: "year", type: "str", default: "2024" },
|
|
99
|
+
{ name: "limit", type: "int", default: 20 },
|
|
100
|
+
],
|
|
101
|
+
columns: ["id", "title", "authors", "year", "venue", "pdf_url", "source_url"],
|
|
102
|
+
capabilities: [
|
|
103
|
+
"http.fetch",
|
|
104
|
+
"scholar.search",
|
|
105
|
+
"scholar.venue",
|
|
106
|
+
"scholar.pdf",
|
|
107
|
+
],
|
|
108
|
+
func: async (_page, kwargs) => {
|
|
109
|
+
const query = String(kwargs.query ?? "")
|
|
110
|
+
.trim()
|
|
111
|
+
.toLowerCase();
|
|
112
|
+
if (!query) throw new Error("cvf search query cannot be empty.");
|
|
113
|
+
const event = eventId(kwargs.venue, kwargs.year);
|
|
114
|
+
const response = await fetch(`${ORIGIN}/${event}?day=all`, {
|
|
115
|
+
headers: {
|
|
116
|
+
Accept: "*/*",
|
|
117
|
+
"User-Agent": "unicli-cvf/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
if (response.status === 404)
|
|
121
|
+
throw new Error(`CVF ${event} returned no proceedings page.`);
|
|
122
|
+
if (!response.ok)
|
|
123
|
+
throw new Error(`CVF ${event} returned HTTP ${response.status}.`);
|
|
124
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 200);
|
|
125
|
+
const rows = parseCvfRows(await response.text(), event)
|
|
126
|
+
.filter((row) =>
|
|
127
|
+
`${row.title} ${row.authors?.join(" ") ?? ""}`
|
|
128
|
+
.toLowerCase()
|
|
129
|
+
.includes(query),
|
|
130
|
+
)
|
|
131
|
+
.slice(0, limit);
|
|
132
|
+
if (rows.length === 0)
|
|
133
|
+
throw new Error(`No CVF ${event} papers matched "${query}".`);
|
|
134
|
+
return rows;
|
|
135
|
+
},
|
|
136
|
+
});
|
|
@@ -332,6 +332,7 @@ cli({
|
|
|
332
332
|
"doi",
|
|
333
333
|
"url",
|
|
334
334
|
],
|
|
335
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
335
336
|
func: async (_page, kwargs) => {
|
|
336
337
|
const query = requireDblpQuery(kwargs.query);
|
|
337
338
|
const limit = requireDblpLimit(kwargs.limit, 20, 100);
|
|
@@ -375,6 +376,7 @@ cli({
|
|
|
375
376
|
"open_access_url",
|
|
376
377
|
"dblp_url",
|
|
377
378
|
],
|
|
379
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf"],
|
|
378
380
|
func: async (_page, kwargs) => {
|
|
379
381
|
const key = requireRecordKey(kwargs.key);
|
|
380
382
|
const xml = await fetchDblpXml(
|
|
@@ -406,6 +408,7 @@ cli({
|
|
|
406
408
|
{ name: "limit", type: "int", default: 20, description: "Max venues" },
|
|
407
409
|
],
|
|
408
410
|
columns: ["rank", "acronym", "venue", "type", "url"],
|
|
411
|
+
capabilities: ["http.fetch", "scholar.venue"],
|
|
409
412
|
func: async (_page, kwargs) => {
|
|
410
413
|
const query = requireDblpQuery(kwargs.query);
|
|
411
414
|
const limit = requireDblpLimit(kwargs.limit, 20, 100);
|
|
@@ -455,6 +458,7 @@ cli({
|
|
|
455
458
|
"pid",
|
|
456
459
|
"url",
|
|
457
460
|
],
|
|
461
|
+
capabilities: ["http.fetch", "scholar.author", "scholar.search"],
|
|
458
462
|
func: async (_page, kwargs) => {
|
|
459
463
|
const limit = requireDblpLimit(kwargs.limit, 20, 200);
|
|
460
464
|
let pid = kwargs.pid ? requirePid(kwargs.pid) : "";
|
|
@@ -27,6 +27,7 @@ cli({
|
|
|
27
27
|
{ name: "index", type: "int", default: 1 },
|
|
28
28
|
],
|
|
29
29
|
columns: ["title", "format", "citation"],
|
|
30
|
+
capabilities: ["mcp-browser.navigate", "mcp-browser.evaluate", "scholar.get"],
|
|
30
31
|
func: async (page, kwargs) => {
|
|
31
32
|
const p = page as IPage;
|
|
32
33
|
const query = str(kwargs.query).trim();
|
|
@@ -18,6 +18,11 @@ cli({
|
|
|
18
18
|
{ name: "limit", type: "int", default: 10 },
|
|
19
19
|
],
|
|
20
20
|
columns: ["rank", "kind", "title", "authors", "year", "cited", "url"],
|
|
21
|
+
capabilities: [
|
|
22
|
+
"mcp-browser.navigate",
|
|
23
|
+
"mcp-browser.evaluate",
|
|
24
|
+
"scholar.author",
|
|
25
|
+
],
|
|
21
26
|
func: async (page, kwargs) => {
|
|
22
27
|
const p = page as IPage;
|
|
23
28
|
const author = str(kwargs.author).trim();
|
|
@@ -14,6 +14,11 @@ cli({
|
|
|
14
14
|
{ name: "limit", type: "int", default: 10 },
|
|
15
15
|
],
|
|
16
16
|
columns: ["rank", "title", "authors", "source", "year", "cited", "url"],
|
|
17
|
+
capabilities: [
|
|
18
|
+
"mcp-browser.navigate",
|
|
19
|
+
"mcp-browser.evaluate",
|
|
20
|
+
"scholar.search",
|
|
21
|
+
],
|
|
17
22
|
func: async (page, kwargs) => {
|
|
18
23
|
const p = page as IPage;
|
|
19
24
|
const limit = intArg(kwargs.limit, 10, 20);
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { getAdapter } from "../../registry.js";
|
|
2
3
|
import { hfEndpoint, mapHfPaperRow, requireHfPaperId } from "./paper.js";
|
|
3
4
|
|
|
4
5
|
describe("hf agent-facing paper command", () => {
|
|
@@ -45,4 +46,13 @@ describe("hf agent-facing paper command", () => {
|
|
|
45
46
|
it("rejects empty HF paper payloads", () => {
|
|
46
47
|
expect(() => mapHfPaperRow({})).toThrow("no paper data");
|
|
47
48
|
});
|
|
49
|
+
|
|
50
|
+
it("advertises scholarly capabilities for meta-command discovery", () => {
|
|
51
|
+
expect(getAdapter("hf")?.commands.paper?.capabilities).toEqual([
|
|
52
|
+
"http.fetch",
|
|
53
|
+
"scholar.get",
|
|
54
|
+
"scholar.pdf",
|
|
55
|
+
"scholar.code",
|
|
56
|
+
]);
|
|
57
|
+
});
|
|
48
58
|
});
|
package/src/adapters/hf/paper.ts
CHANGED
|
@@ -131,6 +131,7 @@ cli({
|
|
|
131
131
|
"aiSummary",
|
|
132
132
|
"url",
|
|
133
133
|
],
|
|
134
|
+
capabilities: ["http.fetch", "scholar.get", "scholar.pdf", "scholar.code"],
|
|
134
135
|
func: async (_page, kwargs) => {
|
|
135
136
|
const id = requireHfPaperId(kwargs.id);
|
|
136
137
|
return [mapHfPaperRow(await fetchHfPaper(id), hfEndpoint())];
|
package/src/adapters/hf/top.yaml
CHANGED
|
@@ -32,7 +32,7 @@ pipeline:
|
|
|
32
32
|
columns: [rank, id, title, upvotes, authors]
|
|
33
33
|
|
|
34
34
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
35
|
-
capabilities: ["http.fetch"]
|
|
35
|
+
capabilities: ["http.fetch", "scholar.search", "scholar.code"]
|
|
36
36
|
minimum_capability: http.fetch
|
|
37
37
|
trust: public
|
|
38
38
|
confidentiality: public
|
|
@@ -21,7 +21,7 @@ pipeline:
|
|
|
21
21
|
columns: [title, authors, upvotes, url]
|
|
22
22
|
|
|
23
23
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
24
|
-
capabilities: ["http.fetch"]
|
|
24
|
+
capabilities: ["http.fetch", "scholar.search", "scholar.code"]
|
|
25
25
|
minimum_capability: http.fetch
|
|
26
26
|
trust: public
|
|
27
27
|
confidentiality: public
|
|
@@ -34,7 +34,7 @@ pipeline:
|
|
|
34
34
|
columns: [title, authors, upvotes, published, url]
|
|
35
35
|
|
|
36
36
|
# schema-v2 metadata — injected by `unicli migrate schema-v2`
|
|
37
|
-
capabilities: ["http.fetch"]
|
|
37
|
+
capabilities: ["http.fetch", "scholar.search", "scholar.code"]
|
|
38
38
|
minimum_capability: http.fetch
|
|
39
39
|
trust: public
|
|
40
40
|
confidentiality: public
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src::adapters::neurips::proceedings
|
|
3
|
+
* @does Registers NeurIPS proceedings search over the official yearly paper list.
|
|
4
|
+
* @needs proceedings.neurips.cc static HTML, src/registry.ts
|
|
5
|
+
* @feeds src/commands/scholar.ts via scholar.search, scholar.pdf, and scholar.venue
|
|
6
|
+
* @breaks NeurIPS markup drift surfaces as empty parse output; no unrelated source fallback is used.
|
|
7
|
+
* @invariants Year is explicit; paper URLs are absolutized against proceedings.neurips.cc.
|
|
8
|
+
* @side-effects HTTPS egress to proceedings.neurips.cc only
|
|
9
|
+
* @perf O(N) over one proceedings HTML page
|
|
10
|
+
* @concurrency safe
|
|
11
|
+
* @test tests/unit/adapters/scholar-sources.test.ts
|
|
12
|
+
* @stability experimental
|
|
13
|
+
* @since 2026-05-19
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
|
|
18
|
+
|
|
19
|
+
const ORIGIN = "https://proceedings.neurips.cc";
|
|
20
|
+
|
|
21
|
+
function decode(value: string): string {
|
|
22
|
+
return value
|
|
23
|
+
.replace(/&/g, "&")
|
|
24
|
+
.replace(/</g, "<")
|
|
25
|
+
.replace(/>/g, ">")
|
|
26
|
+
.replace(/"/g, '"')
|
|
27
|
+
.replace(/'/g, "'")
|
|
28
|
+
.replace(/\s+/g, " ")
|
|
29
|
+
.trim();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function absolute(path: string): string {
|
|
33
|
+
return /^https?:\/\//i.test(path)
|
|
34
|
+
? path
|
|
35
|
+
: `${ORIGIN}${path.startsWith("/") ? "" : "/"}${path}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function requireYear(value: unknown): string {
|
|
39
|
+
const year = String(value ?? "").trim();
|
|
40
|
+
if (!/^\d{4}$/.test(year))
|
|
41
|
+
throw new Error(`neurips year "${year}" is not valid.`);
|
|
42
|
+
return year;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function parseNeuripsRows(
|
|
46
|
+
html: string,
|
|
47
|
+
year = "2024",
|
|
48
|
+
): ScholarlyWorkRecord[] {
|
|
49
|
+
const out: ScholarlyWorkRecord[] = [];
|
|
50
|
+
const re =
|
|
51
|
+
/<div class="paper-content">[\s\S]*?<a title="paper title" href="([^"]+)">([\s\S]*?)<\/a>[\s\S]*?<span class="paper-authors">([\s\S]*?)<\/span>/g;
|
|
52
|
+
let match: RegExpExecArray | null;
|
|
53
|
+
while ((match = re.exec(html)) !== null) {
|
|
54
|
+
const sourceUrl = absolute(match[1]);
|
|
55
|
+
out.push({
|
|
56
|
+
id:
|
|
57
|
+
sourceUrl
|
|
58
|
+
.split("/")
|
|
59
|
+
.pop()
|
|
60
|
+
?.replace(/\.html$/, "") ?? decode(match[2]),
|
|
61
|
+
title: decode(match[2].replace(/<[^>]+>/g, " ")),
|
|
62
|
+
authors: decode(match[3])
|
|
63
|
+
.split(",")
|
|
64
|
+
.map((author) => author.trim())
|
|
65
|
+
.filter(Boolean),
|
|
66
|
+
year: Number(year),
|
|
67
|
+
venue: "NeurIPS",
|
|
68
|
+
pdf_url: sourceUrl
|
|
69
|
+
.replace("-Abstract-", "-Paper-")
|
|
70
|
+
.replace(/\.html$/, ".pdf"),
|
|
71
|
+
source_adapter: "neurips",
|
|
72
|
+
source_url: sourceUrl,
|
|
73
|
+
retrieved_at: new Date().toISOString(),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
return out;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
cli({
|
|
80
|
+
site: "neurips",
|
|
81
|
+
name: "search",
|
|
82
|
+
description: "Search NeurIPS proceedings by year",
|
|
83
|
+
domain: "proceedings.neurips.cc",
|
|
84
|
+
strategy: Strategy.PUBLIC,
|
|
85
|
+
args: [
|
|
86
|
+
{ name: "query", type: "str", required: true, positional: true },
|
|
87
|
+
{ name: "year", type: "str", default: "2024" },
|
|
88
|
+
{ name: "limit", type: "int", default: 20 },
|
|
89
|
+
],
|
|
90
|
+
columns: ["id", "title", "authors", "year", "venue", "pdf_url", "source_url"],
|
|
91
|
+
capabilities: [
|
|
92
|
+
"http.fetch",
|
|
93
|
+
"scholar.search",
|
|
94
|
+
"scholar.venue",
|
|
95
|
+
"scholar.pdf",
|
|
96
|
+
],
|
|
97
|
+
func: async (_page, kwargs) => {
|
|
98
|
+
const query = String(kwargs.query ?? "")
|
|
99
|
+
.trim()
|
|
100
|
+
.toLowerCase();
|
|
101
|
+
if (!query) throw new Error("neurips search query cannot be empty.");
|
|
102
|
+
const year = requireYear(kwargs.year);
|
|
103
|
+
const response = await fetch(`${ORIGIN}/paper_files/paper/${year}`, {
|
|
104
|
+
headers: {
|
|
105
|
+
Accept: "text/html",
|
|
106
|
+
"User-Agent":
|
|
107
|
+
"unicli-neurips/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
if (response.status === 404)
|
|
111
|
+
throw new Error(`NeurIPS ${year} returned no proceedings page.`);
|
|
112
|
+
if (!response.ok)
|
|
113
|
+
throw new Error(`NeurIPS ${year} returned HTTP ${response.status}.`);
|
|
114
|
+
const limit = Math.min(Math.max(Number(kwargs.limit ?? 20), 1), 200);
|
|
115
|
+
const rows = parseNeuripsRows(await response.text(), year)
|
|
116
|
+
.filter((row) =>
|
|
117
|
+
`${row.title} ${row.authors?.join(" ") ?? ""}`
|
|
118
|
+
.toLowerCase()
|
|
119
|
+
.includes(query),
|
|
120
|
+
)
|
|
121
|
+
.slice(0, limit);
|
|
122
|
+
if (rows.length === 0)
|
|
123
|
+
throw new Error(`No NeurIPS ${year} papers matched "${query}".`);
|
|
124
|
+
return rows;
|
|
125
|
+
},
|
|
126
|
+
});
|
|
@@ -164,6 +164,14 @@ function authors(work: OpenAlexWork): string {
|
|
|
164
164
|
: "";
|
|
165
165
|
}
|
|
166
166
|
|
|
167
|
+
function authorList(work: OpenAlexWork): string[] {
|
|
168
|
+
return Array.isArray(work.authorships)
|
|
169
|
+
? work.authorships
|
|
170
|
+
.map((item) => stringField(item.author?.display_name).trim())
|
|
171
|
+
.filter(Boolean)
|
|
172
|
+
: [];
|
|
173
|
+
}
|
|
174
|
+
|
|
167
175
|
function venue(work: OpenAlexWork): string {
|
|
168
176
|
return stringField(work.primary_location?.source?.display_name).trim();
|
|
169
177
|
}
|
|
@@ -181,10 +189,17 @@ export function mapOpenAlexSearchRows(
|
|
|
181
189
|
year: numberField(work.publication_year),
|
|
182
190
|
citations: numberField(work.cited_by_count),
|
|
183
191
|
firstAuthor: firstAuthor(work),
|
|
192
|
+
authors: authorList(work),
|
|
184
193
|
venue: venue(work),
|
|
185
194
|
openAccess: Boolean(work.open_access?.is_oa),
|
|
195
|
+
is_open_access: Boolean(work.open_access?.is_oa),
|
|
186
196
|
type: stringField(work.type).trim(),
|
|
187
197
|
doi: bareDoi(work.doi),
|
|
198
|
+
pdf_url: stringField(work.open_access?.oa_url).trim(),
|
|
199
|
+
openalex_id: id,
|
|
200
|
+
source_adapter: "openalex",
|
|
201
|
+
source_url: id ? `https://openalex.org/${id}` : "",
|
|
202
|
+
retrieved_at: new Date().toISOString(),
|
|
188
203
|
url: id ? `https://openalex.org/${id}` : "",
|
|
189
204
|
};
|
|
190
205
|
});
|
|
@@ -203,15 +218,26 @@ export function mapOpenAlexWorkRow(
|
|
|
203
218
|
date: stringField(work.publication_date).trim(),
|
|
204
219
|
language: stringField(work.language).trim(),
|
|
205
220
|
authors: authors(work),
|
|
221
|
+
author_list: authorList(work),
|
|
206
222
|
venue: venue(work),
|
|
207
223
|
citations: numberField(work.cited_by_count),
|
|
224
|
+
cited_by_count: numberField(work.cited_by_count),
|
|
208
225
|
openAccess: Boolean(work.open_access?.is_oa),
|
|
226
|
+
is_open_access: Boolean(work.open_access?.is_oa),
|
|
209
227
|
openAccessUrl: stringField(work.open_access?.oa_url).trim(),
|
|
228
|
+
pdf_url: stringField(work.open_access?.oa_url).trim(),
|
|
210
229
|
referencedCount: Array.isArray(work.referenced_works)
|
|
211
230
|
? work.referenced_works.length
|
|
212
231
|
: null,
|
|
232
|
+
references_count: Array.isArray(work.referenced_works)
|
|
233
|
+
? work.referenced_works.length
|
|
234
|
+
: null,
|
|
213
235
|
doi: bareDoi(work.doi),
|
|
214
236
|
abstract: reconstructOpenAlexAbstract(work.abstract_inverted_index),
|
|
237
|
+
openalex_id: id,
|
|
238
|
+
source_adapter: "openalex",
|
|
239
|
+
source_url: `https://openalex.org/${id}`,
|
|
240
|
+
retrieved_at: new Date().toISOString(),
|
|
215
241
|
url: `https://openalex.org/${id}`,
|
|
216
242
|
};
|
|
217
243
|
}
|
|
@@ -259,6 +285,7 @@ cli({
|
|
|
259
285
|
"doi",
|
|
260
286
|
"url",
|
|
261
287
|
],
|
|
288
|
+
capabilities: ["http.fetch", "scholar.search"],
|
|
262
289
|
func: async (_page, kwargs) => {
|
|
263
290
|
const query = requireOpenAlexString(kwargs.query, "query");
|
|
264
291
|
const limit = requireOpenAlexLimit(kwargs.limit);
|
|
@@ -308,6 +335,12 @@ cli({
|
|
|
308
335
|
"abstract",
|
|
309
336
|
"url",
|
|
310
337
|
],
|
|
338
|
+
capabilities: [
|
|
339
|
+
"http.fetch",
|
|
340
|
+
"scholar.get",
|
|
341
|
+
"scholar.pdf",
|
|
342
|
+
"scholar.references",
|
|
343
|
+
],
|
|
311
344
|
func: async (_page, kwargs) => {
|
|
312
345
|
const ref = requireOpenAlexWorkRef(kwargs.id);
|
|
313
346
|
const work = (await fetchOpenAlex(
|