@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -1,17 +1,23 @@
1
1
  /**
2
2
  * @owner src/adapters/openreview/papers.ts
3
- * @does Register agent-facing OpenReview search, paper, author, venue, and reviews commands.
4
- * @needs Public api2.openreview.net notes API, forum/profile id validation, note content normalization.
5
- * @feeds surface coverage ledger, scholarly review workflow, agent-readable paper/review rows.
6
- * @breaks OpenReview API envelope drift, content.value parsing, or silent empty threads hide paper review state.
3
+ * @does Register agent-facing OpenReview search, paper, author, venue, reviews, PDF download, and PDF text extraction commands.
4
+ * @needs Public api2.openreview.net notes API, openreview.net PDF URLs, pdftotext for read, forum/profile id validation, note content normalization.
5
+ * @feeds surface coverage ledger, scholarly review workflow, agent-readable paper/review rows, local PDF/fulltext workflow.
6
+ * @breaks OpenReview API envelope drift, content.value parsing, PDF download failure, pdftotext absence, or silent empty threads hide paper review state.
7
7
  */
8
8
 
9
+ import { execFile } from "node:child_process";
10
+ import { join, resolve } from "node:path";
11
+ import { promisify } from "node:util";
12
+
13
+ import { httpDownload, sanitizeFilename } from "../../engine/download.js";
9
14
  import { cli, Strategy } from "../../registry.js";
10
15
 
11
16
  const OPENREVIEW_API = "https://api2.openreview.net";
12
17
  const OPENREVIEW_BASE = "https://openreview.net";
13
18
  const FORUM_ID_RE = /^[A-Za-z0-9_-]{6,20}$/;
14
19
  const PROFILE_ID_RE = /^~(?=.*\p{L})[\p{L}\p{M}0-9._-]+\d+$/u;
20
+ const execFileAsync = promisify(execFile);
15
21
  const REVIEW_SECTION_FIELDS = [
16
22
  ["summary", "Summary"],
17
23
  ["strengths", "Strengths"],
@@ -89,7 +95,9 @@ export function requireOpenReviewOffset(value: unknown, fallback = 0): number {
89
95
  }
90
96
 
91
97
  export function requireForumId(value: unknown, label = "id"): string {
92
- const id = String(value ?? "").trim();
98
+ const raw = String(value ?? "").trim();
99
+ const id =
100
+ raw.match(/^https?:\/\/openreview\.net\/forum\?id=([^&#]+)/i)?.[1] ?? raw;
93
101
  if (!id) throw new Error(`openreview ${label} is required.`);
94
102
  if (!FORUM_ID_RE.test(id)) {
95
103
  throw new Error(
@@ -99,6 +107,44 @@ export function requireForumId(value: unknown, label = "id"): string {
99
107
  return id;
100
108
  }
101
109
 
110
+ export function requireOpenReviewPageRange(
111
+ firstPage: unknown,
112
+ lastPage: unknown,
113
+ ): { firstPage: number; lastPage: number } {
114
+ const first = coerceOpenReviewInt(firstPage ?? 1);
115
+ const last = coerceOpenReviewInt(lastPage ?? 20);
116
+ if (!Number.isInteger(first) || first < 1) {
117
+ throw new Error("openreview first-page must be an integer >= 1.");
118
+ }
119
+ if (!Number.isInteger(last) || last < first) {
120
+ throw new Error("openreview last-page must be an integer >= first-page.");
121
+ }
122
+ return { firstPage: first, lastPage: last };
123
+ }
124
+
125
+ export function requireOpenReviewMaxChars(
126
+ value: unknown,
127
+ fallback = 40_000,
128
+ ): number {
129
+ const raw =
130
+ value === undefined || value === null || value === "" ? fallback : value;
131
+ const n = typeof raw === "number" ? raw : Number(raw);
132
+ if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
133
+ throw new Error(
134
+ `openreview max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`,
135
+ );
136
+ }
137
+ return n;
138
+ }
139
+
140
+ export function openReviewPdfFilename(id: string, title: unknown): string {
141
+ const slug = stringField(title)
142
+ .replace(/[^A-Za-z0-9._-]+/g, "-")
143
+ .replace(/^-+|-+$/g, "")
144
+ .slice(0, 80);
145
+ return sanitizeFilename(`${id}${slug ? `-${slug}` : ""}.pdf`);
146
+ }
147
+
102
148
  export function requireProfileId(value: unknown): string {
103
149
  const id = String(value ?? "").trim();
104
150
  if (!id) throw new Error("openreview profile is required.");
@@ -154,8 +200,12 @@ export function mapOpenReviewNoteRow(
154
200
  const keywordList = Array.isArray(keywords)
155
201
  ? keywords.map(stringField).filter(Boolean).join(", ")
156
202
  : stringField(keywords);
203
+ const pdate = formatOpenReviewDate(note.pdate ?? note.cdate);
204
+ const pdfUrl = absoluteOpenReviewPdf(readContent(content, "pdf"));
205
+ const sourceUrl = id ? `${OPENREVIEW_BASE}/forum?id=${id}` : "";
157
206
  return {
158
207
  id,
208
+ openreview_id: id,
159
209
  title: stringField(readContent(content, "title")).replace(/\s+/g, " "),
160
210
  authors: authorList,
161
211
  keywords: keywordList,
@@ -166,9 +216,15 @@ export function mapOpenReviewNoteRow(
166
216
  /\s+/g,
167
217
  " ",
168
218
  ),
169
- pdate: formatOpenReviewDate(note.pdate ?? note.cdate),
170
- pdf: absoluteOpenReviewPdf(readContent(content, "pdf")),
171
- url: id ? `${OPENREVIEW_BASE}/forum?id=${id}` : "",
219
+ pdate,
220
+ date: pdate,
221
+ pdf: pdfUrl,
222
+ pdf_url: pdfUrl,
223
+ url: sourceUrl,
224
+ source_url: sourceUrl,
225
+ landing_url: sourceUrl,
226
+ source_adapter: "openreview",
227
+ retrieved_at: new Date().toISOString(),
172
228
  };
173
229
  }
174
230
 
@@ -181,6 +237,11 @@ function invitationTail(note: OpenReviewNote): string {
181
237
  return "";
182
238
  }
183
239
 
240
+ function lastInvitation(note: OpenReviewNote): string {
241
+ const invitations = Array.isArray(note.invitations) ? note.invitations : [];
242
+ return invitations.length > 0 ? String(invitations.at(-1)) : "";
243
+ }
244
+
184
245
  export function classifyReviewNote(
185
246
  note: OpenReviewNote,
186
247
  isRoot: boolean,
@@ -216,8 +277,24 @@ function joinReviewSections(content: OpenReviewContent | undefined): string {
216
277
  return parts.join("\n\n");
217
278
  }
218
279
 
219
- function truncate(text: string, maxLength: number): string {
220
- return text.length <= maxLength ? text : `${text.slice(0, maxLength - 3)}...`;
280
+ function truncateText(
281
+ text: string,
282
+ maxLength: number,
283
+ ): { text: string; truncated: boolean; originalChars: number } {
284
+ const originalChars = text.length;
285
+ if (originalChars <= maxLength) {
286
+ return { text, truncated: false, originalChars };
287
+ }
288
+ return {
289
+ text: `${text.slice(0, Math.max(0, maxLength - 3)).trimEnd()}...`,
290
+ truncated: true,
291
+ originalChars,
292
+ };
293
+ }
294
+
295
+ function openReviewNoteUrl(forum: string, noteId: string): string {
296
+ const forumUrl = `${OPENREVIEW_BASE}/forum?id=${forum}`;
297
+ return noteId && noteId !== forum ? `${forumUrl}&noteId=${noteId}` : forumUrl;
221
298
  }
222
299
 
223
300
  export function mapReviewThreadRows(
@@ -231,17 +308,26 @@ export function mapReviewThreadRows(
231
308
  .sort((a, b) => (numberField(a.cdate) ?? 0) - (numberField(b.cdate) ?? 0));
232
309
  return [root, ...sorted].map((note) => {
233
310
  const isRoot = note.id === forum;
311
+ const noteId = stringField(note.id);
234
312
  const rating = readContent(note.content, "rating");
235
313
  const confidence = readContent(note.content, "confidence");
314
+ const text = truncateText(joinReviewSections(note.content), maxLength);
236
315
  return {
316
+ forum,
317
+ note_id: noteId,
237
318
  type: classifyReviewNote(note, isRoot),
238
319
  author: authorFromSignatures(note.signatures),
320
+ invitation: lastInvitation(note),
321
+ created_at: formatOpenReviewDate(note.pdate ?? note.cdate),
322
+ source_url: openReviewNoteUrl(forum, noteId),
239
323
  rating: rating === undefined || rating === null ? "" : String(rating),
240
324
  confidence:
241
325
  confidence === undefined || confidence === null
242
326
  ? ""
243
327
  : String(confidence),
244
- text: truncate(joinReviewSections(note.content), maxLength),
328
+ text: text.text,
329
+ text_chars: text.originalChars,
330
+ text_truncated: text.truncated,
245
331
  };
246
332
  });
247
333
  }
@@ -286,6 +372,88 @@ function notesFromEnvelope(json: NotesEnvelope): OpenReviewNote[] {
286
372
  return Array.isArray(json.notes) ? json.notes : [];
287
373
  }
288
374
 
375
+ async function fetchOpenReviewPaperRow(
376
+ id: string,
377
+ ): Promise<Record<string, unknown>> {
378
+ const notes = notesFromEnvelope(
379
+ await fetchOpenReview(
380
+ `/notes?id=${encodeURIComponent(id)}`,
381
+ `openreview paper ${id}`,
382
+ ),
383
+ );
384
+ if (notes.length === 0)
385
+ throw new Error(`No OpenReview paper found with id "${id}".`);
386
+ return mapOpenReviewNoteRow(notes[0]);
387
+ }
388
+
389
+ function hasPaperContent(note: OpenReviewNote): boolean {
390
+ const content = note.content ?? {};
391
+ return (
392
+ stringField(readContent(content, "title")).length > 0 ||
393
+ stringField(readContent(content, "abstract")).length > 0 ||
394
+ stringField(readContent(content, "pdf")).length > 0
395
+ );
396
+ }
397
+
398
+ async function paperRowsFromSearchNotes(
399
+ notes: OpenReviewNote[],
400
+ limit: number,
401
+ ): Promise<Array<Record<string, unknown>>> {
402
+ const rows: Array<Record<string, unknown>> = [];
403
+ const seen = new Set<string>();
404
+ let firstHydrationError: Error | undefined;
405
+
406
+ for (const note of notes) {
407
+ const rawId = hasPaperContent(note) ? note.id : note.forum;
408
+ const idText = stringField(rawId);
409
+ if (!idText || seen.has(idText) || !FORUM_ID_RE.test(idText)) continue;
410
+ seen.add(idText);
411
+ try {
412
+ const row = hasPaperContent(note)
413
+ ? mapOpenReviewNoteRow(note)
414
+ : await fetchOpenReviewPaperRow(idText);
415
+ if (stringField(row.title) || stringField(row.pdf_url)) rows.push(row);
416
+ } catch (error) {
417
+ if (!firstHydrationError) {
418
+ firstHydrationError =
419
+ error instanceof Error ? error : new Error(String(error));
420
+ }
421
+ }
422
+ if (rows.length >= limit) break;
423
+ }
424
+
425
+ if (rows.length === 0 && firstHydrationError) throw firstHydrationError;
426
+ return rows;
427
+ }
428
+
429
+ async function downloadOpenReviewPdf(
430
+ row: Record<string, unknown>,
431
+ output: unknown,
432
+ ): Promise<Record<string, unknown>> {
433
+ const id = requireForumId(row.id);
434
+ const pdfUrl = stringField(row.pdf_url);
435
+ if (!pdfUrl) {
436
+ throw new Error(`OpenReview paper "${id}" does not expose a PDF URL.`);
437
+ }
438
+ const outputDir = resolve(String(output ?? "./openreview-downloads"));
439
+ const path = join(outputDir, openReviewPdfFilename(id, row.title));
440
+ const download = await httpDownload(pdfUrl, path, {
441
+ "User-Agent":
442
+ "unicli-openreview/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
443
+ Accept: "application/pdf,*/*",
444
+ });
445
+ if (download.status === "failed") {
446
+ throw new Error(
447
+ `OpenReview PDF download failed for ${id}: ${download.error ?? "unknown error"}.`,
448
+ );
449
+ }
450
+ return {
451
+ ...row,
452
+ path: download.path,
453
+ _download: download,
454
+ };
455
+ }
456
+
289
457
  cli({
290
458
  site: "openreview",
291
459
  name: "search",
@@ -302,16 +470,26 @@ cli({
302
470
  },
303
471
  { name: "limit", type: "int", default: 25, description: "Max results" },
304
472
  ],
305
- columns: ["rank", "id", "title", "authors", "venue", "pdate", "url"],
473
+ columns: [
474
+ "rank",
475
+ "id",
476
+ "title",
477
+ "authors",
478
+ "venue",
479
+ "pdate",
480
+ "pdf_url",
481
+ "source_url",
482
+ ],
306
483
  capabilities: ["http.fetch", "scholar.search", "scholar.review"],
307
484
  func: async (_page, kwargs) => {
308
485
  const query = String(kwargs.query ?? "").trim();
309
486
  if (!query) throw new Error("openreview search query cannot be empty.");
310
487
  const limit = requireOpenReviewLimit(kwargs.limit, 25, 50);
488
+ const searchLimit = Math.min(limit * 5, 50);
311
489
  const params = new URLSearchParams({
312
490
  term: query,
313
491
  type: "terms",
314
- limit: String(limit),
492
+ limit: String(searchLimit),
315
493
  });
316
494
  const notes = notesFromEnvelope(
317
495
  await fetchOpenReview(
@@ -321,8 +499,11 @@ cli({
321
499
  );
322
500
  if (notes.length === 0)
323
501
  throw new Error(`No OpenReview papers found for "${query}".`);
324
- return notes.slice(0, limit).map((note, index) => {
325
- const row = mapOpenReviewNoteRow(note);
502
+ const paperRows = await paperRowsFromSearchNotes(notes, limit);
503
+ if (paperRows.length === 0) {
504
+ throw new Error(`No OpenReview paper notes found for "${query}".`);
505
+ }
506
+ return paperRows.map((row, index) => {
326
507
  return {
327
508
  rank: index + 1,
328
509
  id: row.id,
@@ -330,7 +511,10 @@ cli({
330
511
  authors: row.authors,
331
512
  venue: row.venue,
332
513
  pdate: row.pdate,
333
- url: row.url,
514
+ pdf_url: row.pdf_url,
515
+ source_url: row.source_url,
516
+ source_adapter: row.source_adapter,
517
+ openreview_id: row.openreview_id,
334
518
  };
335
519
  });
336
520
  },
@@ -362,23 +546,18 @@ cli({
362
546
  "abstract",
363
547
  "pdate",
364
548
  "pdf",
549
+ "pdf_url",
365
550
  "url",
551
+ "source_url",
366
552
  ],
367
553
  capabilities: ["http.fetch", "scholar.get", "scholar.pdf", "scholar.review"],
368
554
  func: async (_page, kwargs) => {
369
555
  const id = requireForumId(kwargs.id);
370
- const notes = notesFromEnvelope(
371
- await fetchOpenReview(
372
- `/notes?id=${encodeURIComponent(id)}`,
373
- `openreview paper ${id}`,
374
- ),
375
- );
376
- if (notes.length === 0)
377
- throw new Error(`No OpenReview paper found with id "${id}".`);
378
- const row = mapOpenReviewNoteRow(notes[0]);
556
+ const row = await fetchOpenReviewPaperRow(id);
379
557
  return [
380
558
  {
381
559
  id: row.id,
560
+ openreview_id: row.openreview_id,
382
561
  title: row.title,
383
562
  authors: row.authors,
384
563
  keywords: row.keywords,
@@ -387,8 +566,174 @@ cli({
387
566
  primary_area: row.primary_area,
388
567
  abstract: row.abstract,
389
568
  pdate: row.pdate,
569
+ date: row.date,
390
570
  pdf: row.pdf,
571
+ pdf_url: row.pdf_url,
391
572
  url: row.url,
573
+ source_url: row.source_url,
574
+ source_adapter: row.source_adapter,
575
+ retrieved_at: row.retrieved_at,
576
+ },
577
+ ];
578
+ },
579
+ });
580
+
581
+ cli({
582
+ site: "openreview",
583
+ name: "download",
584
+ description: "Download an OpenReview paper PDF by forum id",
585
+ domain: "openreview.net",
586
+ strategy: Strategy.PUBLIC,
587
+ args: [
588
+ {
589
+ name: "id",
590
+ type: "str",
591
+ required: true,
592
+ positional: true,
593
+ description: "OpenReview forum id or forum URL",
594
+ "x-unicli-kind": "id",
595
+ "x-unicli-accepts": ["url"],
596
+ },
597
+ {
598
+ name: "output",
599
+ type: "str",
600
+ default: "./openreview-downloads",
601
+ description: "Output directory",
602
+ "x-unicli-kind": "path",
603
+ },
604
+ ],
605
+ columns: ["id", "title", "pdf_url", "path", "_download"],
606
+ capabilities: ["http.fetch", "http.download", "scholar.pdf"],
607
+ minimum_capability: "http.download",
608
+ func: async (_page, kwargs) => {
609
+ const id = requireForumId(kwargs.id);
610
+ const downloaded = await downloadOpenReviewPdf(
611
+ await fetchOpenReviewPaperRow(id),
612
+ kwargs.output,
613
+ );
614
+ return [
615
+ {
616
+ id: downloaded.id,
617
+ title: downloaded.title,
618
+ pdf_url: downloaded.pdf_url,
619
+ path: downloaded.path,
620
+ _download: downloaded._download,
621
+ source_adapter: downloaded.source_adapter,
622
+ source_url: downloaded.source_url,
623
+ openreview_id: downloaded.openreview_id,
624
+ },
625
+ ];
626
+ },
627
+ });
628
+
629
+ cli({
630
+ site: "openreview",
631
+ name: "read",
632
+ description: "Download and extract text from an OpenReview paper PDF",
633
+ domain: "openreview.net",
634
+ strategy: Strategy.PUBLIC,
635
+ args: [
636
+ {
637
+ name: "id",
638
+ type: "str",
639
+ required: true,
640
+ positional: true,
641
+ description: "OpenReview forum id or forum URL",
642
+ "x-unicli-kind": "id",
643
+ "x-unicli-accepts": ["url"],
644
+ },
645
+ {
646
+ name: "output",
647
+ type: "str",
648
+ default: "./openreview-downloads",
649
+ description: "Output directory for the PDF used for extraction",
650
+ "x-unicli-kind": "path",
651
+ },
652
+ {
653
+ name: "first-page",
654
+ type: "int",
655
+ default: 1,
656
+ description: "First page to extract",
657
+ },
658
+ {
659
+ name: "last-page",
660
+ type: "int",
661
+ default: 20,
662
+ description: "Last page to extract",
663
+ },
664
+ {
665
+ name: "max-chars",
666
+ type: "int",
667
+ default: 40000,
668
+ description: "Maximum extracted text characters",
669
+ },
670
+ ],
671
+ columns: [
672
+ "id",
673
+ "title",
674
+ "pdf_url",
675
+ "path",
676
+ "text",
677
+ "text_chars",
678
+ "text_truncated",
679
+ ],
680
+ capabilities: [
681
+ "http.fetch",
682
+ "http.download",
683
+ "subprocess.exec",
684
+ "scholar.pdf",
685
+ "scholar.fulltext",
686
+ ],
687
+ minimum_capability: "subprocess.exec",
688
+ func: async (_page, kwargs) => {
689
+ const id = requireForumId(kwargs.id);
690
+ const { firstPage, lastPage } = requireOpenReviewPageRange(
691
+ kwargs["first-page"] ?? kwargs.firstPage,
692
+ kwargs["last-page"] ?? kwargs.lastPage,
693
+ );
694
+ const maxChars = requireOpenReviewMaxChars(
695
+ kwargs["max-chars"] ?? kwargs.maxChars,
696
+ );
697
+ const downloaded = await downloadOpenReviewPdf(
698
+ await fetchOpenReviewPaperRow(id),
699
+ kwargs.output,
700
+ );
701
+ const path = stringField(downloaded.path);
702
+ if (!path) throw new Error(`OpenReview PDF download produced no path.`);
703
+ const { stdout } = await execFileAsync(
704
+ "pdftotext",
705
+ [
706
+ "-layout",
707
+ "-enc",
708
+ "UTF-8",
709
+ "-f",
710
+ String(firstPage),
711
+ "-l",
712
+ String(lastPage),
713
+ path,
714
+ "-",
715
+ ],
716
+ { timeout: 60000, maxBuffer: 10 * 1024 * 1024 },
717
+ );
718
+ const text = stdout.trim();
719
+ if (!text) {
720
+ throw new Error(
721
+ `pdftotext returned no text for OpenReview paper ${id} pages ${firstPage}-${lastPage}.`,
722
+ );
723
+ }
724
+ const truncated = truncateText(text, maxChars);
725
+ return [
726
+ {
727
+ id: downloaded.id,
728
+ title: downloaded.title,
729
+ pdf_url: downloaded.pdf_url,
730
+ path,
731
+ text: truncated.text,
732
+ text_chars: truncated.originalChars,
733
+ text_truncated: truncated.truncated,
734
+ source_adapter: downloaded.source_adapter,
735
+ source_url: downloaded.source_url,
736
+ openreview_id: downloaded.openreview_id,
392
737
  },
393
738
  ];
394
739
  },
@@ -410,7 +755,16 @@ cli({
410
755
  },
411
756
  { name: "limit", type: "int", default: 50, description: "Max submissions" },
412
757
  ],
413
- columns: ["rank", "id", "title", "authors", "venue", "pdate", "url"],
758
+ columns: [
759
+ "rank",
760
+ "id",
761
+ "title",
762
+ "authors",
763
+ "venue",
764
+ "pdate",
765
+ "pdf_url",
766
+ "source_url",
767
+ ],
414
768
  capabilities: ["http.fetch", "scholar.author", "scholar.search"],
415
769
  func: async (_page, kwargs) => {
416
770
  const profile = requireProfileId(kwargs.profile);
@@ -437,7 +791,10 @@ cli({
437
791
  authors: row.authors,
438
792
  venue: row.venue,
439
793
  pdate: row.pdate,
440
- url: row.url,
794
+ pdf_url: row.pdf_url,
795
+ source_url: row.source_url,
796
+ source_adapter: row.source_adapter,
797
+ openreview_id: row.openreview_id,
441
798
  };
442
799
  });
443
800
  },
@@ -474,7 +831,9 @@ cli({
474
831
  "primary_area",
475
832
  "pdate",
476
833
  "pdf",
834
+ "pdf_url",
477
835
  "url",
836
+ "source_url",
478
837
  ],
479
838
  capabilities: ["http.fetch", "scholar.venue", "scholar.search"],
480
839
  func: async (_page, kwargs) => {
@@ -506,7 +865,11 @@ cli({
506
865
  primary_area: row.primary_area,
507
866
  pdate: row.pdate,
508
867
  pdf: row.pdf,
868
+ pdf_url: row.pdf_url,
509
869
  url: row.url,
870
+ source_url: row.source_url,
871
+ source_adapter: row.source_adapter,
872
+ openreview_id: row.openreview_id,
510
873
  };
511
874
  });
512
875
  },
@@ -525,7 +888,9 @@ cli({
525
888
  type: "str",
526
889
  required: true,
527
890
  positional: true,
528
- description: "OpenReview forum id",
891
+ description: "OpenReview forum id or forum URL",
892
+ "x-unicli-kind": "id",
893
+ "x-unicli-accepts": ["url"],
529
894
  },
530
895
  {
531
896
  name: "max-length",
@@ -534,7 +899,20 @@ cli({
534
899
  description: "Per-row text truncation length",
535
900
  },
536
901
  ],
537
- columns: ["type", "author", "rating", "confidence", "text"],
902
+ columns: [
903
+ "forum",
904
+ "note_id",
905
+ "type",
906
+ "author",
907
+ "invitation",
908
+ "created_at",
909
+ "source_url",
910
+ "rating",
911
+ "confidence",
912
+ "text",
913
+ "text_chars",
914
+ "text_truncated",
915
+ ],
538
916
  capabilities: ["http.fetch", "scholar.review"],
539
917
  func: async (_page, kwargs) => {
540
918
  const forum = requireForumId(kwargs.forum, "forum");