@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -0,0 +1,277 @@
1
+ /**
2
+ * @owner src::adapters::scholar-artifacts::pdf-read
3
+ * @does Provides side-effect-free scholarly PDF download and pdftotext extraction helpers for source adapters.
4
+ * @needs src/engine/executor.ts download/exec steps, src/engine/download.ts, pdftotext
5
+ * @feeds src/adapters/scholar-artifacts/pdf.ts and source-specific scholarly read commands
6
+ * @breaks Invalid PDF URLs, denied download paths, missing pdftotext, or empty extracted text throw before claim text reaches agents.
7
+ * @invariants Helpers register no commands; callers pass the owning site/command so operation policy and resource attribution stay source-scoped.
8
+ * @side-effects HTTPS/HTTP egress to the supplied PDF URL; writes one PDF under the requested output directory; executes pdftotext.
9
+ * @perf O(PDF bytes + extracted page range); page range defaults to first 20 pages.
10
+ * @concurrency safe — each invocation writes one deterministic artifact path.
11
+ * @test tests/unit/adapters/scholar-artifacts.test.ts
12
+ * @stability experimental
13
+ * @since 2026-06-27
14
+ */
15
+
16
+ import { join, resolve } from "node:path";
17
+
18
+ import { runPipeline } from "../../engine/executor.js";
19
+ import { sanitizeFilename } from "../../engine/download.js";
20
+ import { Strategy } from "../../registry.js";
21
+
22
+ export interface ScholarArtifactDownloadRow {
23
+ id: string;
24
+ title: string;
25
+ source_adapter: string;
26
+ source_url?: string;
27
+ pdf_url: string;
28
+ path?: string;
29
+ _download?: unknown;
30
+ }
31
+
32
+ export interface ScholarPdfReadOptions {
33
+ site: string;
34
+ command: string;
35
+ defaultOutput: string;
36
+ userAgent: string;
37
+ }
38
+
39
+ function stringField(value: unknown): string {
40
+ return typeof value === "string" ? value.trim() : "";
41
+ }
42
+
43
+ export function requireScholarPdfUrl(value: unknown): string {
44
+ const url = stringField(value);
45
+ if (!url) throw new Error("scholar PDF URL is required.");
46
+ let parsed: URL;
47
+ try {
48
+ parsed = new URL(url);
49
+ } catch {
50
+ throw new Error(`scholar PDF URL "${url}" is not a valid URL.`);
51
+ }
52
+ if (parsed.protocol !== "https:" && parsed.protocol !== "http:") {
53
+ throw new Error(`scholar PDF URL "${url}" must use http or https.`);
54
+ }
55
+ return parsed.toString();
56
+ }
57
+
58
+ export function requireScholarPageRange(
59
+ firstPage: unknown,
60
+ lastPage: unknown,
61
+ ): { firstPage: number; lastPage: number } {
62
+ const first = Number(firstPage ?? 1);
63
+ const last = Number(lastPage ?? 20);
64
+ if (!Number.isInteger(first) || first < 1) {
65
+ throw new Error("first-page must be a positive integer.");
66
+ }
67
+ if (!Number.isInteger(last) || last < first) {
68
+ throw new Error(
69
+ "last-page must be an integer greater than or equal to first-page.",
70
+ );
71
+ }
72
+ return { firstPage: first, lastPage: last };
73
+ }
74
+
75
+ export function requireScholarMaxChars(
76
+ value: unknown,
77
+ fallback = 40_000,
78
+ ): number {
79
+ const raw =
80
+ value === undefined || value === null || value === "" ? fallback : value;
81
+ const n = typeof raw === "number" ? raw : Number(raw);
82
+ if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
83
+ throw new Error(
84
+ `scholar max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`,
85
+ );
86
+ }
87
+ return n;
88
+ }
89
+
90
+ export function truncateScholarText(
91
+ text: string,
92
+ maxChars: number,
93
+ ): { text: string; truncated: boolean; originalChars: number } {
94
+ const originalChars = text.length;
95
+ if (originalChars <= maxChars) {
96
+ return { text, truncated: false, originalChars };
97
+ }
98
+ return {
99
+ text: `${text.slice(0, maxChars).trimEnd()}\n\n[truncated at ${maxChars} characters]`,
100
+ truncated: true,
101
+ originalChars,
102
+ };
103
+ }
104
+
105
+ export function scholarArtifactFilename(input: {
106
+ source_adapter?: unknown;
107
+ id?: unknown;
108
+ title?: unknown;
109
+ filename?: unknown;
110
+ }): string {
111
+ const explicit = stringField(input.filename);
112
+ if (explicit) {
113
+ const safe = sanitizeFilename(explicit);
114
+ return safe.toLowerCase().endsWith(".pdf") ? safe : `${safe}.pdf`;
115
+ }
116
+
117
+ const source = sanitizeFilename(
118
+ stringField(input.source_adapter) || "scholar",
119
+ );
120
+ const id = sanitizeFilename(stringField(input.id) || "paper");
121
+ const title = sanitizeFilename(stringField(input.title))
122
+ .replace(/\s+/g, "_")
123
+ .slice(0, 96);
124
+ const stem = title ? `${source}-${id}-${title}` : `${source}-${id}`;
125
+ return `${stem.slice(0, 180)}.pdf`;
126
+ }
127
+
128
+ function downloadPath(row: unknown): string {
129
+ if (!row || typeof row !== "object") return "";
130
+ const download = (row as { _download?: unknown })._download;
131
+ if (!download || typeof download !== "object") return "";
132
+ return stringField((download as { path?: unknown }).path);
133
+ }
134
+
135
+ function downloadFailure(row: unknown): string {
136
+ if (!row || typeof row !== "object") return "download step returned no row";
137
+ const download = (row as { _download?: unknown })._download;
138
+ if (!download || typeof download !== "object") {
139
+ return "download step returned no _download metadata";
140
+ }
141
+ const status = stringField((download as { status?: unknown }).status);
142
+ const error = stringField((download as { error?: unknown }).error);
143
+ return [status ? `status=${status}` : "", error ? `error=${error}` : ""]
144
+ .filter(Boolean)
145
+ .join(", ");
146
+ }
147
+
148
+ export async function downloadScholarPdf(
149
+ kwargs: Record<string, unknown>,
150
+ options: ScholarPdfReadOptions,
151
+ ): Promise<ScholarArtifactDownloadRow> {
152
+ const pdfUrl = requireScholarPdfUrl(kwargs.pdf_url);
153
+ const output = resolve(stringField(kwargs.output) || options.defaultOutput);
154
+ const filename = scholarArtifactFilename(kwargs);
155
+ const id = stringField(kwargs.id) || pdfUrl;
156
+ const title = stringField(kwargs.title) || id;
157
+ const sourceAdapter = stringField(kwargs.source_adapter) || options.site;
158
+ const sourceUrl = stringField(kwargs.source_url);
159
+
160
+ const rows = await runPipeline(
161
+ [
162
+ {
163
+ download: {
164
+ url: "${{ args.pdf_url }}",
165
+ dir: "${{ args.output }}",
166
+ filename: "${{ args.filename }}",
167
+ type: "document",
168
+ headers: {
169
+ Accept: "application/pdf,*/*",
170
+ "User-Agent": options.userAgent,
171
+ },
172
+ },
173
+ },
174
+ ],
175
+ {
176
+ args: {
177
+ pdf_url: pdfUrl,
178
+ output,
179
+ filename,
180
+ },
181
+ source: "internal",
182
+ },
183
+ undefined,
184
+ {
185
+ site: options.site,
186
+ command: options.command,
187
+ strategy: Strategy.PUBLIC,
188
+ domain: new URL(pdfUrl).hostname,
189
+ surface: "cli",
190
+ },
191
+ );
192
+ const row = rows[0];
193
+ const path = downloadPath(row);
194
+ if (!path) {
195
+ throw new Error(
196
+ `scholar PDF download failed for ${pdfUrl}: ${downloadFailure(row)}.`,
197
+ );
198
+ }
199
+
200
+ return {
201
+ id,
202
+ title,
203
+ source_adapter: sourceAdapter,
204
+ ...(sourceUrl ? { source_url: sourceUrl } : {}),
205
+ pdf_url: pdfUrl,
206
+ path,
207
+ _download:
208
+ row && typeof row === "object"
209
+ ? (row as { _download?: unknown })._download
210
+ : undefined,
211
+ };
212
+ }
213
+
214
+ export async function readScholarPdf(
215
+ kwargs: Record<string, unknown>,
216
+ options: ScholarPdfReadOptions,
217
+ ): Promise<Record<string, unknown>> {
218
+ const downloaded = await downloadScholarPdf(kwargs, options);
219
+ const { firstPage, lastPage } = requireScholarPageRange(
220
+ kwargs["first-page"] ?? kwargs.firstPage ?? kwargs.first_page,
221
+ kwargs["last-page"] ?? kwargs.lastPage ?? kwargs.last_page,
222
+ );
223
+ const maxChars = requireScholarMaxChars(
224
+ kwargs["max-chars"] ?? kwargs.maxChars ?? kwargs.max_chars,
225
+ );
226
+ const [text] = await runPipeline(
227
+ [
228
+ {
229
+ exec: {
230
+ command: "pdftotext",
231
+ args: [
232
+ "-layout",
233
+ "-enc",
234
+ "UTF-8",
235
+ "-f",
236
+ "${{ args.first_page }}",
237
+ "-l",
238
+ "${{ args.last_page }}",
239
+ "${{ args.file }}",
240
+ "-",
241
+ ],
242
+ parse: "text",
243
+ timeout: 60000,
244
+ },
245
+ },
246
+ ],
247
+ {
248
+ args: {
249
+ file: downloaded.path,
250
+ first_page: firstPage,
251
+ last_page: lastPage,
252
+ },
253
+ source: "internal",
254
+ },
255
+ undefined,
256
+ {
257
+ site: options.site,
258
+ command: options.command,
259
+ strategy: Strategy.PUBLIC,
260
+ surface: "cli",
261
+ },
262
+ );
263
+ const extracted = stringField(text);
264
+ if (!extracted) {
265
+ throw new Error(
266
+ `pdftotext extracted no text from ${downloaded.path ?? join(".", scholarArtifactFilename(kwargs))}.`,
267
+ );
268
+ }
269
+ const truncated = truncateScholarText(extracted, maxChars);
270
+ return {
271
+ ...downloaded,
272
+ text: truncated.text,
273
+ text_chars: truncated.originalChars,
274
+ text_truncated: truncated.truncated,
275
+ text_source: "pdf",
276
+ };
277
+ }
@@ -0,0 +1,133 @@
1
+ /**
2
+ * @owner src::adapters::scholar-artifacts::pdf
3
+ * @does Registers source-agnostic scholarly PDF artifact download and text extraction commands.
4
+ * @needs src/engine/executor.ts download/exec steps, pdftotext, scholarly adapters that expose pdf_url
5
+ * @feeds src/commands/scholar.ts generic scholar download/read workflows
6
+ * @breaks PDF URL drift, denied download paths, missing pdftotext, or empty extracted text stop the artifact loop.
7
+ * @invariants PDF bytes are downloaded through pipeline resource guards; text extraction uses the same pdftotext contract as pdf/read.
8
+ * @side-effects HTTPS egress to the supplied PDF URL; writes PDF files under the requested output directory; executes pdftotext for read-pdf.
9
+ * @perf O(PDF bytes + extracted pages); page range defaults to the first 20 pages.
10
+ * @concurrency safe — each command invocation writes one deterministic PDF path
11
+ * @test tests/unit/adapters/scholar-artifacts.test.ts
12
+ * @stability experimental
13
+ * @since 2026-06-26
14
+ */
15
+
16
+ import { cli, Strategy } from "../../registry.js";
17
+ import { downloadScholarPdf, readScholarPdf } from "./pdf-read.js";
18
+
19
+ export {
20
+ requireScholarMaxChars,
21
+ requireScholarPageRange,
22
+ requireScholarPdfUrl,
23
+ scholarArtifactFilename,
24
+ truncateScholarText,
25
+ } from "./pdf-read.js";
26
+
27
+ const SCHOLAR_ARTIFACT_PDF_OPTIONS = {
28
+ site: "scholar-artifacts",
29
+ command: "read-pdf",
30
+ defaultOutput: "./scholar-downloads",
31
+ userAgent:
32
+ "unicli-scholar-artifacts/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
33
+ };
34
+
35
+ cli({
36
+ site: "scholar-artifacts",
37
+ name: "download-pdf",
38
+ description: "Download a scholarly PDF URL with artifact metadata",
39
+ domain: "scholarly-pdf",
40
+ strategy: Strategy.PUBLIC,
41
+ args: [
42
+ {
43
+ name: "pdf_url",
44
+ type: "str",
45
+ required: true,
46
+ positional: true,
47
+ description: "Open scholarly PDF URL",
48
+ format: "uri",
49
+ },
50
+ { name: "title", type: "str", description: "Paper title" },
51
+ { name: "id", type: "str", description: "Source-local paper id" },
52
+ { name: "source_adapter", type: "str", description: "Source adapter name" },
53
+ { name: "source_url", type: "str", description: "Landing page URL" },
54
+ {
55
+ name: "output",
56
+ type: "str",
57
+ default: "./scholar-downloads",
58
+ description: "Output directory",
59
+ },
60
+ { name: "filename", type: "str", description: "Output PDF filename" },
61
+ ],
62
+ columns: [
63
+ "id",
64
+ "title",
65
+ "source_adapter",
66
+ "pdf_url",
67
+ "source_url",
68
+ "path",
69
+ "_download",
70
+ ],
71
+ capabilities: ["http.download"],
72
+ minimum_capability: "http.download",
73
+ func: async (_page, kwargs) => [
74
+ await downloadScholarPdf(kwargs, {
75
+ ...SCHOLAR_ARTIFACT_PDF_OPTIONS,
76
+ command: "download-pdf",
77
+ }),
78
+ ],
79
+ });
80
+
81
+ cli({
82
+ site: "scholar-artifacts",
83
+ name: "read-pdf",
84
+ description: "Download a scholarly PDF URL and extract text with pdftotext",
85
+ domain: "scholarly-pdf",
86
+ strategy: Strategy.PUBLIC,
87
+ args: [
88
+ {
89
+ name: "pdf_url",
90
+ type: "str",
91
+ required: true,
92
+ positional: true,
93
+ description: "Open scholarly PDF URL",
94
+ format: "uri",
95
+ },
96
+ { name: "title", type: "str", description: "Paper title" },
97
+ { name: "id", type: "str", description: "Source-local paper id" },
98
+ { name: "source_adapter", type: "str", description: "Source adapter name" },
99
+ { name: "source_url", type: "str", description: "Landing page URL" },
100
+ {
101
+ name: "output",
102
+ type: "str",
103
+ default: "./scholar-downloads",
104
+ description: "Output directory",
105
+ },
106
+ { name: "filename", type: "str", description: "Output PDF filename" },
107
+ { name: "first-page", type: "int", default: 1, description: "First page" },
108
+ { name: "last-page", type: "int", default: 20, description: "Last page" },
109
+ {
110
+ name: "max-chars",
111
+ type: "int",
112
+ default: 40000,
113
+ description: "Maximum extracted text characters",
114
+ },
115
+ ],
116
+ columns: [
117
+ "id",
118
+ "title",
119
+ "source_adapter",
120
+ "pdf_url",
121
+ "source_url",
122
+ "path",
123
+ "text",
124
+ "text_chars",
125
+ "text_truncated",
126
+ ],
127
+ capabilities: ["http.download", "subprocess.exec"],
128
+ executables: ["pdftotext"],
129
+ minimum_capability: "subprocess.exec",
130
+ func: async (_page, kwargs) => [
131
+ await readScholarPdf(kwargs, SCHOLAR_ARTIFACT_PDF_OPTIONS),
132
+ ],
133
+ });
@@ -1,12 +1,12 @@
1
1
  /**
2
2
  * @owner src::adapters::semantic-scholar::papers
3
- * @does Registers Semantic Scholar Graph API paper search, detail, citations, references, and PDF discovery commands.
4
- * @needs api.semanticscholar.org Graph v1, optional SEMANTIC_SCHOLAR_API_KEY, src/registry.ts
3
+ * @does Registers Semantic Scholar Graph API paper search, detail, citations, references, and source PDF read commands.
4
+ * @needs api.semanticscholar.org Graph v1, optional SEMANTIC_SCHOLAR_API_KEY, src/adapters/scholar-artifacts/pdf-read.ts, pdftotext
5
5
  * @feeds src/commands/scholar.ts via scholar.* capability tags
6
- * @breaks Graph API rate limits or response-shape drift surface as explicit adapter errors; no cached fallback is used.
7
- * @invariants Paper references are normalized to Semantic Scholar's accepted DOI:/ARXIV:/paperId formats; output maps to ScholarlyWorkRecord.
8
- * @side-effects HTTPS egress to api.semanticscholar.org only
9
- * @perf O(limit) JSON mapping per command
6
+ * @breaks Graph API rate limits, response-shape drift, missing OA PDF URLs, or pdftotext failures surface as explicit adapter errors; no cached fallback is used.
7
+ * @invariants Paper references are normalized to Semantic Scholar's accepted DOI:/ARXIV:/paperId formats; read requires openAccessPdf.url before text is claimed.
8
+ * @side-effects HTTPS egress to api.semanticscholar.org and source PDF hosts; read writes one PDF and executes pdftotext.
9
+ * @perf O(limit) JSON mapping per command; O(PDF bytes + extracted page range) for read
10
10
  * @concurrency safe
11
11
  * @test tests/unit/adapters/scholar-sources.test.ts
12
12
  * @stability experimental
@@ -15,6 +15,7 @@
15
15
 
16
16
  import { cli, Strategy } from "../../registry.js";
17
17
  import type { ScholarlyWorkRecord } from "../../types/scholarly.js";
18
+ import { readScholarPdf } from "../scholar-artifacts/pdf-read.js";
18
19
 
19
20
  const API = "https://api.semanticscholar.org/graph/v1";
20
21
  const FIELDS = [
@@ -149,6 +150,33 @@ export function mapSemanticScholarPaper(
149
150
  };
150
151
  }
151
152
 
153
+ async function readSemanticScholarPaperPdf(
154
+ row: ScholarlyWorkRecord,
155
+ kwargs: Record<string, unknown>,
156
+ ): Promise<Record<string, unknown>> {
157
+ const pdfUrl = str(row.pdf_url);
158
+ if (!pdfUrl) {
159
+ throw new Error(`Semantic Scholar paper ${row.id} has no source PDF URL.`);
160
+ }
161
+ return readScholarPdf(
162
+ {
163
+ ...kwargs,
164
+ id: row.id,
165
+ title: row.title,
166
+ source_adapter: row.source_adapter,
167
+ source_url: row.source_url,
168
+ pdf_url: pdfUrl,
169
+ },
170
+ {
171
+ site: "semantic-scholar",
172
+ command: "read",
173
+ defaultOutput: "./semantic-scholar-downloads",
174
+ userAgent:
175
+ "unicli-semantic-scholar/1.0 (https://github.com/olo-dot-io/Uni-CLI)",
176
+ },
177
+ );
178
+ }
179
+
152
180
  function rows(
153
181
  papers: unknown,
154
182
  source = "semantic-scholar",
@@ -224,6 +252,70 @@ cli({
224
252
  },
225
253
  });
226
254
 
255
+ cli({
256
+ site: "semantic-scholar",
257
+ name: "read",
258
+ description:
259
+ "Download a Semantic Scholar open-access paper PDF and extract text",
260
+ domain: "api.semanticscholar.org",
261
+ strategy: Strategy.PUBLIC,
262
+ args: [
263
+ { name: "id", type: "str", required: true, positional: true },
264
+ {
265
+ name: "output",
266
+ type: "str",
267
+ default: "./semantic-scholar-downloads",
268
+ description: "Output directory for the downloaded PDF",
269
+ "x-unicli-kind": "path",
270
+ },
271
+ { name: "filename", type: "str", description: "Output PDF filename" },
272
+ { name: "first-page", type: "int", default: 1, description: "First page" },
273
+ { name: "last-page", type: "int", default: 20, description: "Last page" },
274
+ {
275
+ name: "max-chars",
276
+ type: "int",
277
+ default: 40000,
278
+ description: "Maximum extracted text characters",
279
+ },
280
+ ],
281
+ columns: [
282
+ "id",
283
+ "title",
284
+ "source_adapter",
285
+ "source_url",
286
+ "pdf_url",
287
+ "path",
288
+ "text_source",
289
+ "text",
290
+ "text_chars",
291
+ "text_truncated",
292
+ ],
293
+ capabilities: [
294
+ "http.fetch",
295
+ "http.download",
296
+ "subprocess.exec",
297
+ "scholar.fulltext",
298
+ "scholar.pdf",
299
+ ],
300
+ executables: ["pdftotext"],
301
+ minimum_capability: "subprocess.exec",
302
+ func: async (_page, kwargs) => {
303
+ const ref = requireSemanticScholarPaperRef(
304
+ kwargs.id ?? kwargs.ref ?? kwargs.doi ?? kwargs.arxiv_id,
305
+ );
306
+ const paper = (await fetchS2(
307
+ `/paper/${encodeURIComponent(ref)}?fields=${encodeURIComponent(FIELDS)}`,
308
+ `semantic-scholar paper ${ref}`,
309
+ )) as S2Paper;
310
+ return [
311
+ await readSemanticScholarPaperPdf(
312
+ mapSemanticScholarPaper(paper, "semantic-scholar"),
313
+ kwargs,
314
+ ),
315
+ ];
316
+ },
317
+ });
318
+
227
319
  for (const [name, path, cap] of [
228
320
  ["citations", "citations", "scholar.citations"],
229
321
  ["references", "references", "scholar.references"],