@zenalexa/unicli 0.225.2 → 0.225.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/dist/adapters/acl-anthology/papers.d.ts +16 -9
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
- package/dist/adapters/acl-anthology/papers.js +322 -58
- package/dist/adapters/acl-anthology/papers.js.map +1 -1
- package/dist/adapters/arxiv/papers.d.ts +22 -4
- package/dist/adapters/arxiv/papers.d.ts.map +1 -1
- package/dist/adapters/arxiv/papers.js +202 -4
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.d.ts +15 -1
- package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +72 -8
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/biorxiv/preprints.d.ts +9 -0
- package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/biorxiv/preprints.js +78 -0
- package/dist/adapters/biorxiv/preprints.js.map +1 -0
- package/dist/adapters/cnki/search.d.ts +82 -0
- package/dist/adapters/cnki/search.d.ts.map +1 -0
- package/dist/adapters/cnki/search.js +236 -0
- package/dist/adapters/cnki/search.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +12 -7
- package/dist/adapters/cvf/papers.d.ts.map +1 -1
- package/dist/adapters/cvf/papers.js +210 -27
- package/dist/adapters/cvf/papers.js.map +1 -1
- package/dist/adapters/dblp/publications.d.ts +12 -5
- package/dist/adapters/dblp/publications.d.ts.map +1 -1
- package/dist/adapters/dblp/publications.js +31 -8
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/search.d.ts +22 -1
- package/dist/adapters/google-scholar/search.d.ts.map +1 -1
- package/dist/adapters/google-scholar/search.js +129 -14
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.d.ts +12 -3
- package/dist/adapters/hf/paper.d.ts.map +1 -1
- package/dist/adapters/hf/paper.js +65 -5
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/medrxiv/preprints.d.ts +9 -0
- package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/medrxiv/preprints.js +78 -0
- package/dist/adapters/medrxiv/preprints.js.map +1 -0
- package/dist/adapters/neurips/proceedings.d.ts +8 -7
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
- package/dist/adapters/neurips/proceedings.js +209 -21
- package/dist/adapters/neurips/proceedings.js.map +1 -1
- package/dist/adapters/openalex/works.d.ts +21 -5
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +108 -8
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.d.ts +10 -4
- package/dist/adapters/openreview/papers.d.ts.map +1 -1
- package/dist/adapters/openreview/papers.js +351 -24
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +6 -6
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
- package/dist/adapters/pmlr/proceedings.js +92 -12
- package/dist/adapters/pmlr/proceedings.js.map +1 -1
- package/dist/adapters/pubmed/articles.d.ts +8 -4
- package/dist/adapters/pubmed/articles.d.ts.map +1 -1
- package/dist/adapters/pubmed/articles.js +272 -39
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/rxiv/preprints.d.ts +75 -0
- package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/rxiv/preprints.js +651 -0
- package/dist/adapters/rxiv/preprints.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.js +122 -0
- package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
- package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
- package/dist/adapters/semantic-scholar/papers.js +80 -6
- package/dist/adapters/semantic-scholar/papers.js.map +1 -1
- package/dist/adapters/unpaywall/works.d.ts +7 -7
- package/dist/adapters/unpaywall/works.d.ts.map +1 -1
- package/dist/adapters/unpaywall/works.js +104 -12
- package/dist/adapters/unpaywall/works.js.map +1 -1
- package/dist/adapters/wanfang/search.d.ts +14 -0
- package/dist/adapters/wanfang/search.d.ts.map +1 -1
- package/dist/adapters/wanfang/search.js +56 -7
- package/dist/adapters/wanfang/search.js.map +1 -1
- package/dist/browser/page.d.ts +2 -0
- package/dist/browser/page.d.ts.map +1 -1
- package/dist/browser/page.js +12 -0
- package/dist/browser/page.js.map +1 -1
- package/dist/commands/browser/actions.d.ts.map +1 -1
- package/dist/commands/browser/actions.js +59 -3
- package/dist/commands/browser/actions.js.map +1 -1
- package/dist/commands/scholar.d.ts +77 -5
- package/dist/commands/scholar.d.ts.map +1 -1
- package/dist/commands/scholar.js +2945 -83
- package/dist/commands/scholar.js.map +1 -1
- package/dist/core/command-contract.d.ts.map +1 -1
- package/dist/core/command-contract.js +5 -0
- package/dist/core/command-contract.js.map +1 -1
- package/dist/core/schema-v2.d.ts +1 -0
- package/dist/core/schema-v2.d.ts.map +1 -1
- package/dist/core/schema-v2.js +1 -0
- package/dist/core/schema-v2.js.map +1 -1
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +208 -0
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts +2 -0
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +487 -0
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +273 -2
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +3 -0
- package/dist/discovery/loader.js.map +1 -1
- package/dist/engine/capability-policy.d.ts.map +1 -1
- package/dist/engine/capability-policy.js +30 -4
- package/dist/engine/capability-policy.js.map +1 -1
- package/dist/engine/kernel/stages.d.ts.map +1 -1
- package/dist/engine/kernel/stages.js +3 -0
- package/dist/engine/kernel/stages.js.map +1 -1
- package/dist/engine/operation-policy.d.ts +4 -1
- package/dist/engine/operation-policy.d.ts.map +1 -1
- package/dist/engine/operation-policy.js +23 -0
- package/dist/engine/operation-policy.js.map +1 -1
- package/dist/fast-path/manifest.d.ts +3 -0
- package/dist/fast-path/manifest.d.ts.map +1 -1
- package/dist/fast-path/manifest.js.map +1 -1
- package/dist/fast-path/policy.d.ts.map +1 -1
- package/dist/fast-path/policy.js +3 -0
- package/dist/fast-path/policy.js.map +1 -1
- package/dist/manifest-compact.txt +1 -1
- package/dist/manifest.json +6804 -1002
- package/dist/registry.d.ts +2 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +1 -0
- package/dist/registry.js.map +1 -1
- package/dist/types/scholarly.d.ts +19 -4
- package/dist/types/scholarly.d.ts.map +1 -1
- package/dist/types/scholarly.js +4 -4
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.test.ts +111 -0
- package/src/adapters/acl-anthology/papers.ts +379 -71
- package/src/adapters/arxiv/papers.test.ts +46 -0
- package/src/adapters/arxiv/papers.ts +251 -4
- package/src/adapters/baidu-scholar/search.ts +74 -11
- package/src/adapters/biorxiv/preprints.ts +112 -0
- package/src/adapters/cnki/search.ts +357 -0
- package/src/adapters/cvf/papers.ts +260 -27
- package/src/adapters/dblp/publications.test.ts +9 -0
- package/src/adapters/dblp/publications.ts +31 -8
- package/src/adapters/google-scholar/search.ts +165 -17
- package/src/adapters/hf/paper.test.ts +23 -0
- package/src/adapters/hf/paper.ts +89 -5
- package/src/adapters/hf/top.yaml +34 -2
- package/src/adapters/huggingface-papers/daily.yaml +37 -3
- package/src/adapters/huggingface-papers/search.yaml +43 -9
- package/src/adapters/medrxiv/preprints.ts +112 -0
- package/src/adapters/neurips/proceedings.ts +266 -22
- package/src/adapters/openalex/works.test.ts +15 -4
- package/src/adapters/openalex/works.ts +136 -8
- package/src/adapters/openreview/papers.test.ts +31 -0
- package/src/adapters/openreview/papers.ts +407 -29
- package/src/adapters/pmlr/proceedings.ts +102 -12
- package/src/adapters/pubmed/articles.test.ts +88 -1
- package/src/adapters/pubmed/articles.ts +343 -44
- package/src/adapters/rxiv/preprints.test.ts +233 -0
- package/src/adapters/rxiv/preprints.ts +849 -0
- package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
- package/src/adapters/scholar-artifacts/pdf.ts +133 -0
- package/src/adapters/semantic-scholar/papers.ts +98 -6
- package/src/adapters/unpaywall/works.ts +141 -12
- package/src/adapters/wanfang/search.ts +57 -7
- package/src/adapters/cnki/search.yaml +0 -49
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @owner src/adapters/openreview/papers.ts
|
|
3
|
-
* @does Register agent-facing OpenReview search, paper, author, venue, and
|
|
4
|
-
* @needs Public api2.openreview.net notes API, forum/profile id validation, note content normalization.
|
|
5
|
-
* @feeds surface coverage ledger, scholarly review workflow, agent-readable paper/review rows.
|
|
6
|
-
* @breaks OpenReview API envelope drift, content.value parsing, or silent empty threads hide paper review state.
|
|
3
|
+
* @does Register agent-facing OpenReview search, paper, author, venue, reviews, PDF download, and PDF text extraction commands.
|
|
4
|
+
* @needs Public api2.openreview.net notes API, openreview.net PDF URLs, pdftotext for read, forum/profile id validation, note content normalization.
|
|
5
|
+
* @feeds surface coverage ledger, scholarly review workflow, agent-readable paper/review rows, local PDF/fulltext workflow.
|
|
6
|
+
* @breaks OpenReview API envelope drift, content.value parsing, PDF download failure, pdftotext absence, or silent empty threads hide paper review state.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
+
import { execFile } from "node:child_process";
|
|
10
|
+
import { join, resolve } from "node:path";
|
|
11
|
+
import { promisify } from "node:util";
|
|
12
|
+
|
|
13
|
+
import { httpDownload, sanitizeFilename } from "../../engine/download.js";
|
|
9
14
|
import { cli, Strategy } from "../../registry.js";
|
|
10
15
|
|
|
11
16
|
const OPENREVIEW_API = "https://api2.openreview.net";
|
|
12
17
|
const OPENREVIEW_BASE = "https://openreview.net";
|
|
13
18
|
const FORUM_ID_RE = /^[A-Za-z0-9_-]{6,20}$/;
|
|
14
19
|
const PROFILE_ID_RE = /^~(?=.*\p{L})[\p{L}\p{M}0-9._-]+\d+$/u;
|
|
20
|
+
const execFileAsync = promisify(execFile);
|
|
15
21
|
const REVIEW_SECTION_FIELDS = [
|
|
16
22
|
["summary", "Summary"],
|
|
17
23
|
["strengths", "Strengths"],
|
|
@@ -89,7 +95,9 @@ export function requireOpenReviewOffset(value: unknown, fallback = 0): number {
|
|
|
89
95
|
}
|
|
90
96
|
|
|
91
97
|
export function requireForumId(value: unknown, label = "id"): string {
|
|
92
|
-
const
|
|
98
|
+
const raw = String(value ?? "").trim();
|
|
99
|
+
const id =
|
|
100
|
+
raw.match(/^https?:\/\/openreview\.net\/forum\?id=([^&#]+)/i)?.[1] ?? raw;
|
|
93
101
|
if (!id) throw new Error(`openreview ${label} is required.`);
|
|
94
102
|
if (!FORUM_ID_RE.test(id)) {
|
|
95
103
|
throw new Error(
|
|
@@ -99,6 +107,44 @@ export function requireForumId(value: unknown, label = "id"): string {
|
|
|
99
107
|
return id;
|
|
100
108
|
}
|
|
101
109
|
|
|
110
|
+
export function requireOpenReviewPageRange(
|
|
111
|
+
firstPage: unknown,
|
|
112
|
+
lastPage: unknown,
|
|
113
|
+
): { firstPage: number; lastPage: number } {
|
|
114
|
+
const first = coerceOpenReviewInt(firstPage ?? 1);
|
|
115
|
+
const last = coerceOpenReviewInt(lastPage ?? 20);
|
|
116
|
+
if (!Number.isInteger(first) || first < 1) {
|
|
117
|
+
throw new Error("openreview first-page must be an integer >= 1.");
|
|
118
|
+
}
|
|
119
|
+
if (!Number.isInteger(last) || last < first) {
|
|
120
|
+
throw new Error("openreview last-page must be an integer >= first-page.");
|
|
121
|
+
}
|
|
122
|
+
return { firstPage: first, lastPage: last };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export function requireOpenReviewMaxChars(
|
|
126
|
+
value: unknown,
|
|
127
|
+
fallback = 40_000,
|
|
128
|
+
): number {
|
|
129
|
+
const raw =
|
|
130
|
+
value === undefined || value === null || value === "" ? fallback : value;
|
|
131
|
+
const n = typeof raw === "number" ? raw : Number(raw);
|
|
132
|
+
if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
|
|
133
|
+
throw new Error(
|
|
134
|
+
`openreview max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`,
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
return n;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function openReviewPdfFilename(id: string, title: unknown): string {
|
|
141
|
+
const slug = stringField(title)
|
|
142
|
+
.replace(/[^A-Za-z0-9._-]+/g, "-")
|
|
143
|
+
.replace(/^-+|-+$/g, "")
|
|
144
|
+
.slice(0, 80);
|
|
145
|
+
return sanitizeFilename(`${id}${slug ? `-${slug}` : ""}.pdf`);
|
|
146
|
+
}
|
|
147
|
+
|
|
102
148
|
export function requireProfileId(value: unknown): string {
|
|
103
149
|
const id = String(value ?? "").trim();
|
|
104
150
|
if (!id) throw new Error("openreview profile is required.");
|
|
@@ -154,8 +200,12 @@ export function mapOpenReviewNoteRow(
|
|
|
154
200
|
const keywordList = Array.isArray(keywords)
|
|
155
201
|
? keywords.map(stringField).filter(Boolean).join(", ")
|
|
156
202
|
: stringField(keywords);
|
|
203
|
+
const pdate = formatOpenReviewDate(note.pdate ?? note.cdate);
|
|
204
|
+
const pdfUrl = absoluteOpenReviewPdf(readContent(content, "pdf"));
|
|
205
|
+
const sourceUrl = id ? `${OPENREVIEW_BASE}/forum?id=${id}` : "";
|
|
157
206
|
return {
|
|
158
207
|
id,
|
|
208
|
+
openreview_id: id,
|
|
159
209
|
title: stringField(readContent(content, "title")).replace(/\s+/g, " "),
|
|
160
210
|
authors: authorList,
|
|
161
211
|
keywords: keywordList,
|
|
@@ -166,9 +216,15 @@ export function mapOpenReviewNoteRow(
|
|
|
166
216
|
/\s+/g,
|
|
167
217
|
" ",
|
|
168
218
|
),
|
|
169
|
-
pdate
|
|
170
|
-
|
|
171
|
-
|
|
219
|
+
pdate,
|
|
220
|
+
date: pdate,
|
|
221
|
+
pdf: pdfUrl,
|
|
222
|
+
pdf_url: pdfUrl,
|
|
223
|
+
url: sourceUrl,
|
|
224
|
+
source_url: sourceUrl,
|
|
225
|
+
landing_url: sourceUrl,
|
|
226
|
+
source_adapter: "openreview",
|
|
227
|
+
retrieved_at: new Date().toISOString(),
|
|
172
228
|
};
|
|
173
229
|
}
|
|
174
230
|
|
|
@@ -181,6 +237,11 @@ function invitationTail(note: OpenReviewNote): string {
|
|
|
181
237
|
return "";
|
|
182
238
|
}
|
|
183
239
|
|
|
240
|
+
function lastInvitation(note: OpenReviewNote): string {
|
|
241
|
+
const invitations = Array.isArray(note.invitations) ? note.invitations : [];
|
|
242
|
+
return invitations.length > 0 ? String(invitations.at(-1)) : "";
|
|
243
|
+
}
|
|
244
|
+
|
|
184
245
|
export function classifyReviewNote(
|
|
185
246
|
note: OpenReviewNote,
|
|
186
247
|
isRoot: boolean,
|
|
@@ -216,8 +277,24 @@ function joinReviewSections(content: OpenReviewContent | undefined): string {
|
|
|
216
277
|
return parts.join("\n\n");
|
|
217
278
|
}
|
|
218
279
|
|
|
219
|
-
function
|
|
220
|
-
|
|
280
|
+
function truncateText(
|
|
281
|
+
text: string,
|
|
282
|
+
maxLength: number,
|
|
283
|
+
): { text: string; truncated: boolean; originalChars: number } {
|
|
284
|
+
const originalChars = text.length;
|
|
285
|
+
if (originalChars <= maxLength) {
|
|
286
|
+
return { text, truncated: false, originalChars };
|
|
287
|
+
}
|
|
288
|
+
return {
|
|
289
|
+
text: `${text.slice(0, Math.max(0, maxLength - 3)).trimEnd()}...`,
|
|
290
|
+
truncated: true,
|
|
291
|
+
originalChars,
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function openReviewNoteUrl(forum: string, noteId: string): string {
|
|
296
|
+
const forumUrl = `${OPENREVIEW_BASE}/forum?id=${forum}`;
|
|
297
|
+
return noteId && noteId !== forum ? `${forumUrl}¬eId=${noteId}` : forumUrl;
|
|
221
298
|
}
|
|
222
299
|
|
|
223
300
|
export function mapReviewThreadRows(
|
|
@@ -231,17 +308,26 @@ export function mapReviewThreadRows(
|
|
|
231
308
|
.sort((a, b) => (numberField(a.cdate) ?? 0) - (numberField(b.cdate) ?? 0));
|
|
232
309
|
return [root, ...sorted].map((note) => {
|
|
233
310
|
const isRoot = note.id === forum;
|
|
311
|
+
const noteId = stringField(note.id);
|
|
234
312
|
const rating = readContent(note.content, "rating");
|
|
235
313
|
const confidence = readContent(note.content, "confidence");
|
|
314
|
+
const text = truncateText(joinReviewSections(note.content), maxLength);
|
|
236
315
|
return {
|
|
316
|
+
forum,
|
|
317
|
+
note_id: noteId,
|
|
237
318
|
type: classifyReviewNote(note, isRoot),
|
|
238
319
|
author: authorFromSignatures(note.signatures),
|
|
320
|
+
invitation: lastInvitation(note),
|
|
321
|
+
created_at: formatOpenReviewDate(note.pdate ?? note.cdate),
|
|
322
|
+
source_url: openReviewNoteUrl(forum, noteId),
|
|
239
323
|
rating: rating === undefined || rating === null ? "" : String(rating),
|
|
240
324
|
confidence:
|
|
241
325
|
confidence === undefined || confidence === null
|
|
242
326
|
? ""
|
|
243
327
|
: String(confidence),
|
|
244
|
-
text:
|
|
328
|
+
text: text.text,
|
|
329
|
+
text_chars: text.originalChars,
|
|
330
|
+
text_truncated: text.truncated,
|
|
245
331
|
};
|
|
246
332
|
});
|
|
247
333
|
}
|
|
@@ -286,6 +372,88 @@ function notesFromEnvelope(json: NotesEnvelope): OpenReviewNote[] {
|
|
|
286
372
|
return Array.isArray(json.notes) ? json.notes : [];
|
|
287
373
|
}
|
|
288
374
|
|
|
375
|
+
async function fetchOpenReviewPaperRow(
|
|
376
|
+
id: string,
|
|
377
|
+
): Promise<Record<string, unknown>> {
|
|
378
|
+
const notes = notesFromEnvelope(
|
|
379
|
+
await fetchOpenReview(
|
|
380
|
+
`/notes?id=${encodeURIComponent(id)}`,
|
|
381
|
+
`openreview paper ${id}`,
|
|
382
|
+
),
|
|
383
|
+
);
|
|
384
|
+
if (notes.length === 0)
|
|
385
|
+
throw new Error(`No OpenReview paper found with id "${id}".`);
|
|
386
|
+
return mapOpenReviewNoteRow(notes[0]);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function hasPaperContent(note: OpenReviewNote): boolean {
|
|
390
|
+
const content = note.content ?? {};
|
|
391
|
+
return (
|
|
392
|
+
stringField(readContent(content, "title")).length > 0 ||
|
|
393
|
+
stringField(readContent(content, "abstract")).length > 0 ||
|
|
394
|
+
stringField(readContent(content, "pdf")).length > 0
|
|
395
|
+
);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async function paperRowsFromSearchNotes(
|
|
399
|
+
notes: OpenReviewNote[],
|
|
400
|
+
limit: number,
|
|
401
|
+
): Promise<Array<Record<string, unknown>>> {
|
|
402
|
+
const rows: Array<Record<string, unknown>> = [];
|
|
403
|
+
const seen = new Set<string>();
|
|
404
|
+
let firstHydrationError: Error | undefined;
|
|
405
|
+
|
|
406
|
+
for (const note of notes) {
|
|
407
|
+
const rawId = hasPaperContent(note) ? note.id : note.forum;
|
|
408
|
+
const idText = stringField(rawId);
|
|
409
|
+
if (!idText || seen.has(idText) || !FORUM_ID_RE.test(idText)) continue;
|
|
410
|
+
seen.add(idText);
|
|
411
|
+
try {
|
|
412
|
+
const row = hasPaperContent(note)
|
|
413
|
+
? mapOpenReviewNoteRow(note)
|
|
414
|
+
: await fetchOpenReviewPaperRow(idText);
|
|
415
|
+
if (stringField(row.title) || stringField(row.pdf_url)) rows.push(row);
|
|
416
|
+
} catch (error) {
|
|
417
|
+
if (!firstHydrationError) {
|
|
418
|
+
firstHydrationError =
|
|
419
|
+
error instanceof Error ? error : new Error(String(error));
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
if (rows.length >= limit) break;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
if (rows.length === 0 && firstHydrationError) throw firstHydrationError;
|
|
426
|
+
return rows;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
async function downloadOpenReviewPdf(
|
|
430
|
+
row: Record<string, unknown>,
|
|
431
|
+
output: unknown,
|
|
432
|
+
): Promise<Record<string, unknown>> {
|
|
433
|
+
const id = requireForumId(row.id);
|
|
434
|
+
const pdfUrl = stringField(row.pdf_url);
|
|
435
|
+
if (!pdfUrl) {
|
|
436
|
+
throw new Error(`OpenReview paper "${id}" does not expose a PDF URL.`);
|
|
437
|
+
}
|
|
438
|
+
const outputDir = resolve(String(output ?? "./openreview-downloads"));
|
|
439
|
+
const path = join(outputDir, openReviewPdfFilename(id, row.title));
|
|
440
|
+
const download = await httpDownload(pdfUrl, path, {
|
|
441
|
+
"User-Agent":
|
|
442
|
+
"unicli-openreview/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
|
|
443
|
+
Accept: "application/pdf,*/*",
|
|
444
|
+
});
|
|
445
|
+
if (download.status === "failed") {
|
|
446
|
+
throw new Error(
|
|
447
|
+
`OpenReview PDF download failed for ${id}: ${download.error ?? "unknown error"}.`,
|
|
448
|
+
);
|
|
449
|
+
}
|
|
450
|
+
return {
|
|
451
|
+
...row,
|
|
452
|
+
path: download.path,
|
|
453
|
+
_download: download,
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
|
|
289
457
|
cli({
|
|
290
458
|
site: "openreview",
|
|
291
459
|
name: "search",
|
|
@@ -302,16 +470,26 @@ cli({
|
|
|
302
470
|
},
|
|
303
471
|
{ name: "limit", type: "int", default: 25, description: "Max results" },
|
|
304
472
|
],
|
|
305
|
-
columns: [
|
|
473
|
+
columns: [
|
|
474
|
+
"rank",
|
|
475
|
+
"id",
|
|
476
|
+
"title",
|
|
477
|
+
"authors",
|
|
478
|
+
"venue",
|
|
479
|
+
"pdate",
|
|
480
|
+
"pdf_url",
|
|
481
|
+
"source_url",
|
|
482
|
+
],
|
|
306
483
|
capabilities: ["http.fetch", "scholar.search", "scholar.review"],
|
|
307
484
|
func: async (_page, kwargs) => {
|
|
308
485
|
const query = String(kwargs.query ?? "").trim();
|
|
309
486
|
if (!query) throw new Error("openreview search query cannot be empty.");
|
|
310
487
|
const limit = requireOpenReviewLimit(kwargs.limit, 25, 50);
|
|
488
|
+
const searchLimit = Math.min(limit * 5, 50);
|
|
311
489
|
const params = new URLSearchParams({
|
|
312
490
|
term: query,
|
|
313
491
|
type: "terms",
|
|
314
|
-
limit: String(
|
|
492
|
+
limit: String(searchLimit),
|
|
315
493
|
});
|
|
316
494
|
const notes = notesFromEnvelope(
|
|
317
495
|
await fetchOpenReview(
|
|
@@ -321,8 +499,11 @@ cli({
|
|
|
321
499
|
);
|
|
322
500
|
if (notes.length === 0)
|
|
323
501
|
throw new Error(`No OpenReview papers found for "${query}".`);
|
|
324
|
-
|
|
325
|
-
|
|
502
|
+
const paperRows = await paperRowsFromSearchNotes(notes, limit);
|
|
503
|
+
if (paperRows.length === 0) {
|
|
504
|
+
throw new Error(`No OpenReview paper notes found for "${query}".`);
|
|
505
|
+
}
|
|
506
|
+
return paperRows.map((row, index) => {
|
|
326
507
|
return {
|
|
327
508
|
rank: index + 1,
|
|
328
509
|
id: row.id,
|
|
@@ -330,7 +511,10 @@ cli({
|
|
|
330
511
|
authors: row.authors,
|
|
331
512
|
venue: row.venue,
|
|
332
513
|
pdate: row.pdate,
|
|
333
|
-
|
|
514
|
+
pdf_url: row.pdf_url,
|
|
515
|
+
source_url: row.source_url,
|
|
516
|
+
source_adapter: row.source_adapter,
|
|
517
|
+
openreview_id: row.openreview_id,
|
|
334
518
|
};
|
|
335
519
|
});
|
|
336
520
|
},
|
|
@@ -362,23 +546,18 @@ cli({
|
|
|
362
546
|
"abstract",
|
|
363
547
|
"pdate",
|
|
364
548
|
"pdf",
|
|
549
|
+
"pdf_url",
|
|
365
550
|
"url",
|
|
551
|
+
"source_url",
|
|
366
552
|
],
|
|
367
553
|
capabilities: ["http.fetch", "scholar.get", "scholar.pdf", "scholar.review"],
|
|
368
554
|
func: async (_page, kwargs) => {
|
|
369
555
|
const id = requireForumId(kwargs.id);
|
|
370
|
-
const
|
|
371
|
-
await fetchOpenReview(
|
|
372
|
-
`/notes?id=${encodeURIComponent(id)}`,
|
|
373
|
-
`openreview paper ${id}`,
|
|
374
|
-
),
|
|
375
|
-
);
|
|
376
|
-
if (notes.length === 0)
|
|
377
|
-
throw new Error(`No OpenReview paper found with id "${id}".`);
|
|
378
|
-
const row = mapOpenReviewNoteRow(notes[0]);
|
|
556
|
+
const row = await fetchOpenReviewPaperRow(id);
|
|
379
557
|
return [
|
|
380
558
|
{
|
|
381
559
|
id: row.id,
|
|
560
|
+
openreview_id: row.openreview_id,
|
|
382
561
|
title: row.title,
|
|
383
562
|
authors: row.authors,
|
|
384
563
|
keywords: row.keywords,
|
|
@@ -387,8 +566,174 @@ cli({
|
|
|
387
566
|
primary_area: row.primary_area,
|
|
388
567
|
abstract: row.abstract,
|
|
389
568
|
pdate: row.pdate,
|
|
569
|
+
date: row.date,
|
|
390
570
|
pdf: row.pdf,
|
|
571
|
+
pdf_url: row.pdf_url,
|
|
391
572
|
url: row.url,
|
|
573
|
+
source_url: row.source_url,
|
|
574
|
+
source_adapter: row.source_adapter,
|
|
575
|
+
retrieved_at: row.retrieved_at,
|
|
576
|
+
},
|
|
577
|
+
];
|
|
578
|
+
},
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
cli({
|
|
582
|
+
site: "openreview",
|
|
583
|
+
name: "download",
|
|
584
|
+
description: "Download an OpenReview paper PDF by forum id",
|
|
585
|
+
domain: "openreview.net",
|
|
586
|
+
strategy: Strategy.PUBLIC,
|
|
587
|
+
args: [
|
|
588
|
+
{
|
|
589
|
+
name: "id",
|
|
590
|
+
type: "str",
|
|
591
|
+
required: true,
|
|
592
|
+
positional: true,
|
|
593
|
+
description: "OpenReview forum id or forum URL",
|
|
594
|
+
"x-unicli-kind": "id",
|
|
595
|
+
"x-unicli-accepts": ["url"],
|
|
596
|
+
},
|
|
597
|
+
{
|
|
598
|
+
name: "output",
|
|
599
|
+
type: "str",
|
|
600
|
+
default: "./openreview-downloads",
|
|
601
|
+
description: "Output directory",
|
|
602
|
+
"x-unicli-kind": "path",
|
|
603
|
+
},
|
|
604
|
+
],
|
|
605
|
+
columns: ["id", "title", "pdf_url", "path", "_download"],
|
|
606
|
+
capabilities: ["http.fetch", "http.download", "scholar.pdf"],
|
|
607
|
+
minimum_capability: "http.download",
|
|
608
|
+
func: async (_page, kwargs) => {
|
|
609
|
+
const id = requireForumId(kwargs.id);
|
|
610
|
+
const downloaded = await downloadOpenReviewPdf(
|
|
611
|
+
await fetchOpenReviewPaperRow(id),
|
|
612
|
+
kwargs.output,
|
|
613
|
+
);
|
|
614
|
+
return [
|
|
615
|
+
{
|
|
616
|
+
id: downloaded.id,
|
|
617
|
+
title: downloaded.title,
|
|
618
|
+
pdf_url: downloaded.pdf_url,
|
|
619
|
+
path: downloaded.path,
|
|
620
|
+
_download: downloaded._download,
|
|
621
|
+
source_adapter: downloaded.source_adapter,
|
|
622
|
+
source_url: downloaded.source_url,
|
|
623
|
+
openreview_id: downloaded.openreview_id,
|
|
624
|
+
},
|
|
625
|
+
];
|
|
626
|
+
},
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
cli({
|
|
630
|
+
site: "openreview",
|
|
631
|
+
name: "read",
|
|
632
|
+
description: "Download and extract text from an OpenReview paper PDF",
|
|
633
|
+
domain: "openreview.net",
|
|
634
|
+
strategy: Strategy.PUBLIC,
|
|
635
|
+
args: [
|
|
636
|
+
{
|
|
637
|
+
name: "id",
|
|
638
|
+
type: "str",
|
|
639
|
+
required: true,
|
|
640
|
+
positional: true,
|
|
641
|
+
description: "OpenReview forum id or forum URL",
|
|
642
|
+
"x-unicli-kind": "id",
|
|
643
|
+
"x-unicli-accepts": ["url"],
|
|
644
|
+
},
|
|
645
|
+
{
|
|
646
|
+
name: "output",
|
|
647
|
+
type: "str",
|
|
648
|
+
default: "./openreview-downloads",
|
|
649
|
+
description: "Output directory for the PDF used for extraction",
|
|
650
|
+
"x-unicli-kind": "path",
|
|
651
|
+
},
|
|
652
|
+
{
|
|
653
|
+
name: "first-page",
|
|
654
|
+
type: "int",
|
|
655
|
+
default: 1,
|
|
656
|
+
description: "First page to extract",
|
|
657
|
+
},
|
|
658
|
+
{
|
|
659
|
+
name: "last-page",
|
|
660
|
+
type: "int",
|
|
661
|
+
default: 20,
|
|
662
|
+
description: "Last page to extract",
|
|
663
|
+
},
|
|
664
|
+
{
|
|
665
|
+
name: "max-chars",
|
|
666
|
+
type: "int",
|
|
667
|
+
default: 40000,
|
|
668
|
+
description: "Maximum extracted text characters",
|
|
669
|
+
},
|
|
670
|
+
],
|
|
671
|
+
columns: [
|
|
672
|
+
"id",
|
|
673
|
+
"title",
|
|
674
|
+
"pdf_url",
|
|
675
|
+
"path",
|
|
676
|
+
"text",
|
|
677
|
+
"text_chars",
|
|
678
|
+
"text_truncated",
|
|
679
|
+
],
|
|
680
|
+
capabilities: [
|
|
681
|
+
"http.fetch",
|
|
682
|
+
"http.download",
|
|
683
|
+
"subprocess.exec",
|
|
684
|
+
"scholar.pdf",
|
|
685
|
+
"scholar.fulltext",
|
|
686
|
+
],
|
|
687
|
+
minimum_capability: "subprocess.exec",
|
|
688
|
+
func: async (_page, kwargs) => {
|
|
689
|
+
const id = requireForumId(kwargs.id);
|
|
690
|
+
const { firstPage, lastPage } = requireOpenReviewPageRange(
|
|
691
|
+
kwargs["first-page"] ?? kwargs.firstPage,
|
|
692
|
+
kwargs["last-page"] ?? kwargs.lastPage,
|
|
693
|
+
);
|
|
694
|
+
const maxChars = requireOpenReviewMaxChars(
|
|
695
|
+
kwargs["max-chars"] ?? kwargs.maxChars,
|
|
696
|
+
);
|
|
697
|
+
const downloaded = await downloadOpenReviewPdf(
|
|
698
|
+
await fetchOpenReviewPaperRow(id),
|
|
699
|
+
kwargs.output,
|
|
700
|
+
);
|
|
701
|
+
const path = stringField(downloaded.path);
|
|
702
|
+
if (!path) throw new Error(`OpenReview PDF download produced no path.`);
|
|
703
|
+
const { stdout } = await execFileAsync(
|
|
704
|
+
"pdftotext",
|
|
705
|
+
[
|
|
706
|
+
"-layout",
|
|
707
|
+
"-enc",
|
|
708
|
+
"UTF-8",
|
|
709
|
+
"-f",
|
|
710
|
+
String(firstPage),
|
|
711
|
+
"-l",
|
|
712
|
+
String(lastPage),
|
|
713
|
+
path,
|
|
714
|
+
"-",
|
|
715
|
+
],
|
|
716
|
+
{ timeout: 60000, maxBuffer: 10 * 1024 * 1024 },
|
|
717
|
+
);
|
|
718
|
+
const text = stdout.trim();
|
|
719
|
+
if (!text) {
|
|
720
|
+
throw new Error(
|
|
721
|
+
`pdftotext returned no text for OpenReview paper ${id} pages ${firstPage}-${lastPage}.`,
|
|
722
|
+
);
|
|
723
|
+
}
|
|
724
|
+
const truncated = truncateText(text, maxChars);
|
|
725
|
+
return [
|
|
726
|
+
{
|
|
727
|
+
id: downloaded.id,
|
|
728
|
+
title: downloaded.title,
|
|
729
|
+
pdf_url: downloaded.pdf_url,
|
|
730
|
+
path,
|
|
731
|
+
text: truncated.text,
|
|
732
|
+
text_chars: truncated.originalChars,
|
|
733
|
+
text_truncated: truncated.truncated,
|
|
734
|
+
source_adapter: downloaded.source_adapter,
|
|
735
|
+
source_url: downloaded.source_url,
|
|
736
|
+
openreview_id: downloaded.openreview_id,
|
|
392
737
|
},
|
|
393
738
|
];
|
|
394
739
|
},
|
|
@@ -410,7 +755,16 @@ cli({
|
|
|
410
755
|
},
|
|
411
756
|
{ name: "limit", type: "int", default: 50, description: "Max submissions" },
|
|
412
757
|
],
|
|
413
|
-
columns: [
|
|
758
|
+
columns: [
|
|
759
|
+
"rank",
|
|
760
|
+
"id",
|
|
761
|
+
"title",
|
|
762
|
+
"authors",
|
|
763
|
+
"venue",
|
|
764
|
+
"pdate",
|
|
765
|
+
"pdf_url",
|
|
766
|
+
"source_url",
|
|
767
|
+
],
|
|
414
768
|
capabilities: ["http.fetch", "scholar.author", "scholar.search"],
|
|
415
769
|
func: async (_page, kwargs) => {
|
|
416
770
|
const profile = requireProfileId(kwargs.profile);
|
|
@@ -437,7 +791,10 @@ cli({
|
|
|
437
791
|
authors: row.authors,
|
|
438
792
|
venue: row.venue,
|
|
439
793
|
pdate: row.pdate,
|
|
440
|
-
|
|
794
|
+
pdf_url: row.pdf_url,
|
|
795
|
+
source_url: row.source_url,
|
|
796
|
+
source_adapter: row.source_adapter,
|
|
797
|
+
openreview_id: row.openreview_id,
|
|
441
798
|
};
|
|
442
799
|
});
|
|
443
800
|
},
|
|
@@ -474,7 +831,9 @@ cli({
|
|
|
474
831
|
"primary_area",
|
|
475
832
|
"pdate",
|
|
476
833
|
"pdf",
|
|
834
|
+
"pdf_url",
|
|
477
835
|
"url",
|
|
836
|
+
"source_url",
|
|
478
837
|
],
|
|
479
838
|
capabilities: ["http.fetch", "scholar.venue", "scholar.search"],
|
|
480
839
|
func: async (_page, kwargs) => {
|
|
@@ -506,7 +865,11 @@ cli({
|
|
|
506
865
|
primary_area: row.primary_area,
|
|
507
866
|
pdate: row.pdate,
|
|
508
867
|
pdf: row.pdf,
|
|
868
|
+
pdf_url: row.pdf_url,
|
|
509
869
|
url: row.url,
|
|
870
|
+
source_url: row.source_url,
|
|
871
|
+
source_adapter: row.source_adapter,
|
|
872
|
+
openreview_id: row.openreview_id,
|
|
510
873
|
};
|
|
511
874
|
});
|
|
512
875
|
},
|
|
@@ -525,7 +888,9 @@ cli({
|
|
|
525
888
|
type: "str",
|
|
526
889
|
required: true,
|
|
527
890
|
positional: true,
|
|
528
|
-
description: "OpenReview forum id",
|
|
891
|
+
description: "OpenReview forum id or forum URL",
|
|
892
|
+
"x-unicli-kind": "id",
|
|
893
|
+
"x-unicli-accepts": ["url"],
|
|
529
894
|
},
|
|
530
895
|
{
|
|
531
896
|
name: "max-length",
|
|
@@ -534,7 +899,20 @@ cli({
|
|
|
534
899
|
description: "Per-row text truncation length",
|
|
535
900
|
},
|
|
536
901
|
],
|
|
537
|
-
columns: [
|
|
902
|
+
columns: [
|
|
903
|
+
"forum",
|
|
904
|
+
"note_id",
|
|
905
|
+
"type",
|
|
906
|
+
"author",
|
|
907
|
+
"invitation",
|
|
908
|
+
"created_at",
|
|
909
|
+
"source_url",
|
|
910
|
+
"rating",
|
|
911
|
+
"confidence",
|
|
912
|
+
"text",
|
|
913
|
+
"text_chars",
|
|
914
|
+
"text_truncated",
|
|
915
|
+
],
|
|
538
916
|
capabilities: ["http.fetch", "scholar.review"],
|
|
539
917
|
func: async (_page, kwargs) => {
|
|
540
918
|
const forum = requireForumId(kwargs.forum, "forum");
|