@zenalexa/unicli 0.225.2 → 0.225.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/AGENTS.md +2 -2
  2. package/README.md +3 -3
  3. package/README.zh-CN.md +3 -3
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -9
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
  6. package/dist/adapters/acl-anthology/papers.js +322 -58
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -1
  8. package/dist/adapters/arxiv/papers.d.ts +22 -4
  9. package/dist/adapters/arxiv/papers.d.ts.map +1 -1
  10. package/dist/adapters/arxiv/papers.js +202 -4
  11. package/dist/adapters/arxiv/papers.js.map +1 -1
  12. package/dist/adapters/baidu-scholar/search.d.ts +15 -1
  13. package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
  14. package/dist/adapters/baidu-scholar/search.js +72 -8
  15. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  16. package/dist/adapters/biorxiv/preprints.d.ts +9 -0
  17. package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
  18. package/dist/adapters/biorxiv/preprints.js +78 -0
  19. package/dist/adapters/biorxiv/preprints.js.map +1 -0
  20. package/dist/adapters/cnki/search.d.ts +82 -0
  21. package/dist/adapters/cnki/search.d.ts.map +1 -0
  22. package/dist/adapters/cnki/search.js +236 -0
  23. package/dist/adapters/cnki/search.js.map +1 -0
  24. package/dist/adapters/cvf/papers.d.ts +12 -7
  25. package/dist/adapters/cvf/papers.d.ts.map +1 -1
  26. package/dist/adapters/cvf/papers.js +210 -27
  27. package/dist/adapters/cvf/papers.js.map +1 -1
  28. package/dist/adapters/dblp/publications.d.ts +12 -5
  29. package/dist/adapters/dblp/publications.d.ts.map +1 -1
  30. package/dist/adapters/dblp/publications.js +31 -8
  31. package/dist/adapters/dblp/publications.js.map +1 -1
  32. package/dist/adapters/google-scholar/search.d.ts +22 -1
  33. package/dist/adapters/google-scholar/search.d.ts.map +1 -1
  34. package/dist/adapters/google-scholar/search.js +129 -14
  35. package/dist/adapters/google-scholar/search.js.map +1 -1
  36. package/dist/adapters/hf/paper.d.ts +12 -3
  37. package/dist/adapters/hf/paper.d.ts.map +1 -1
  38. package/dist/adapters/hf/paper.js +65 -5
  39. package/dist/adapters/hf/paper.js.map +1 -1
  40. package/dist/adapters/medrxiv/preprints.d.ts +9 -0
  41. package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
  42. package/dist/adapters/medrxiv/preprints.js +78 -0
  43. package/dist/adapters/medrxiv/preprints.js.map +1 -0
  44. package/dist/adapters/neurips/proceedings.d.ts +8 -7
  45. package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
  46. package/dist/adapters/neurips/proceedings.js +209 -21
  47. package/dist/adapters/neurips/proceedings.js.map +1 -1
  48. package/dist/adapters/openalex/works.d.ts +21 -5
  49. package/dist/adapters/openalex/works.d.ts.map +1 -1
  50. package/dist/adapters/openalex/works.js +108 -8
  51. package/dist/adapters/openalex/works.js.map +1 -1
  52. package/dist/adapters/openreview/papers.d.ts +10 -4
  53. package/dist/adapters/openreview/papers.d.ts.map +1 -1
  54. package/dist/adapters/openreview/papers.js +351 -24
  55. package/dist/adapters/openreview/papers.js.map +1 -1
  56. package/dist/adapters/pmlr/proceedings.d.ts +6 -6
  57. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
  58. package/dist/adapters/pmlr/proceedings.js +92 -12
  59. package/dist/adapters/pmlr/proceedings.js.map +1 -1
  60. package/dist/adapters/pubmed/articles.d.ts +8 -4
  61. package/dist/adapters/pubmed/articles.d.ts.map +1 -1
  62. package/dist/adapters/pubmed/articles.js +272 -39
  63. package/dist/adapters/pubmed/articles.js.map +1 -1
  64. package/dist/adapters/rxiv/preprints.d.ts +75 -0
  65. package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
  66. package/dist/adapters/rxiv/preprints.js +651 -0
  67. package/dist/adapters/rxiv/preprints.js.map +1 -0
  68. package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
  69. package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
  70. package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
  71. package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
  72. package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
  73. package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
  74. package/dist/adapters/scholar-artifacts/pdf.js +122 -0
  75. package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
  76. package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
  77. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
  78. package/dist/adapters/semantic-scholar/papers.js +80 -6
  79. package/dist/adapters/semantic-scholar/papers.js.map +1 -1
  80. package/dist/adapters/unpaywall/works.d.ts +7 -7
  81. package/dist/adapters/unpaywall/works.d.ts.map +1 -1
  82. package/dist/adapters/unpaywall/works.js +104 -12
  83. package/dist/adapters/unpaywall/works.js.map +1 -1
  84. package/dist/adapters/wanfang/search.d.ts +14 -0
  85. package/dist/adapters/wanfang/search.d.ts.map +1 -1
  86. package/dist/adapters/wanfang/search.js +56 -7
  87. package/dist/adapters/wanfang/search.js.map +1 -1
  88. package/dist/browser/page.d.ts +2 -0
  89. package/dist/browser/page.d.ts.map +1 -1
  90. package/dist/browser/page.js +12 -0
  91. package/dist/browser/page.js.map +1 -1
  92. package/dist/commands/browser/actions.d.ts.map +1 -1
  93. package/dist/commands/browser/actions.js +59 -3
  94. package/dist/commands/browser/actions.js.map +1 -1
  95. package/dist/commands/scholar.d.ts +77 -5
  96. package/dist/commands/scholar.d.ts.map +1 -1
  97. package/dist/commands/scholar.js +2945 -83
  98. package/dist/commands/scholar.js.map +1 -1
  99. package/dist/core/command-contract.d.ts.map +1 -1
  100. package/dist/core/command-contract.js +5 -0
  101. package/dist/core/command-contract.js.map +1 -1
  102. package/dist/core/schema-v2.d.ts +1 -0
  103. package/dist/core/schema-v2.d.ts.map +1 -1
  104. package/dist/core/schema-v2.js +1 -0
  105. package/dist/core/schema-v2.js.map +1 -1
  106. package/dist/discovery/aliases.d.ts.map +1 -1
  107. package/dist/discovery/aliases.js +208 -0
  108. package/dist/discovery/aliases.js.map +1 -1
  109. package/dist/discovery/core-catalog.d.ts +2 -0
  110. package/dist/discovery/core-catalog.d.ts.map +1 -1
  111. package/dist/discovery/core-catalog.js +487 -0
  112. package/dist/discovery/core-catalog.js.map +1 -1
  113. package/dist/discovery/intents.d.ts.map +1 -1
  114. package/dist/discovery/intents.js +273 -2
  115. package/dist/discovery/intents.js.map +1 -1
  116. package/dist/discovery/loader.d.ts.map +1 -1
  117. package/dist/discovery/loader.js +3 -0
  118. package/dist/discovery/loader.js.map +1 -1
  119. package/dist/engine/capability-policy.d.ts.map +1 -1
  120. package/dist/engine/capability-policy.js +30 -4
  121. package/dist/engine/capability-policy.js.map +1 -1
  122. package/dist/engine/kernel/stages.d.ts.map +1 -1
  123. package/dist/engine/kernel/stages.js +3 -0
  124. package/dist/engine/kernel/stages.js.map +1 -1
  125. package/dist/engine/operation-policy.d.ts +4 -1
  126. package/dist/engine/operation-policy.d.ts.map +1 -1
  127. package/dist/engine/operation-policy.js +23 -0
  128. package/dist/engine/operation-policy.js.map +1 -1
  129. package/dist/fast-path/manifest.d.ts +3 -0
  130. package/dist/fast-path/manifest.d.ts.map +1 -1
  131. package/dist/fast-path/manifest.js.map +1 -1
  132. package/dist/fast-path/policy.d.ts.map +1 -1
  133. package/dist/fast-path/policy.js +3 -0
  134. package/dist/fast-path/policy.js.map +1 -1
  135. package/dist/manifest-compact.txt +1 -1
  136. package/dist/manifest.json +6804 -1002
  137. package/dist/registry.d.ts +2 -0
  138. package/dist/registry.d.ts.map +1 -1
  139. package/dist/registry.js +1 -0
  140. package/dist/registry.js.map +1 -1
  141. package/dist/types/scholarly.d.ts +19 -4
  142. package/dist/types/scholarly.d.ts.map +1 -1
  143. package/dist/types/scholarly.js +4 -4
  144. package/dist/types.d.ts +8 -0
  145. package/dist/types.d.ts.map +1 -1
  146. package/dist/types.js.map +1 -1
  147. package/package.json +1 -1
  148. package/server.json +2 -2
  149. package/skills/unicli/SKILL.md +1 -1
  150. package/skills/unicli-claude-code/SKILL.md +1 -1
  151. package/skills/unicli-hermes/SKILL.md +1 -1
  152. package/src/adapters/acl-anthology/papers.test.ts +111 -0
  153. package/src/adapters/acl-anthology/papers.ts +379 -71
  154. package/src/adapters/arxiv/papers.test.ts +46 -0
  155. package/src/adapters/arxiv/papers.ts +251 -4
  156. package/src/adapters/baidu-scholar/search.ts +74 -11
  157. package/src/adapters/biorxiv/preprints.ts +112 -0
  158. package/src/adapters/cnki/search.ts +357 -0
  159. package/src/adapters/cvf/papers.ts +260 -27
  160. package/src/adapters/dblp/publications.test.ts +9 -0
  161. package/src/adapters/dblp/publications.ts +31 -8
  162. package/src/adapters/google-scholar/search.ts +165 -17
  163. package/src/adapters/hf/paper.test.ts +23 -0
  164. package/src/adapters/hf/paper.ts +89 -5
  165. package/src/adapters/hf/top.yaml +34 -2
  166. package/src/adapters/huggingface-papers/daily.yaml +37 -3
  167. package/src/adapters/huggingface-papers/search.yaml +43 -9
  168. package/src/adapters/medrxiv/preprints.ts +112 -0
  169. package/src/adapters/neurips/proceedings.ts +266 -22
  170. package/src/adapters/openalex/works.test.ts +15 -4
  171. package/src/adapters/openalex/works.ts +136 -8
  172. package/src/adapters/openreview/papers.test.ts +31 -0
  173. package/src/adapters/openreview/papers.ts +407 -29
  174. package/src/adapters/pmlr/proceedings.ts +102 -12
  175. package/src/adapters/pubmed/articles.test.ts +88 -1
  176. package/src/adapters/pubmed/articles.ts +343 -44
  177. package/src/adapters/rxiv/preprints.test.ts +233 -0
  178. package/src/adapters/rxiv/preprints.ts +849 -0
  179. package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
  180. package/src/adapters/scholar-artifacts/pdf.ts +133 -0
  181. package/src/adapters/semantic-scholar/papers.ts +98 -6
  182. package/src/adapters/unpaywall/works.ts +141 -12
  183. package/src/adapters/wanfang/search.ts +57 -7
  184. package/src/adapters/cnki/search.yaml +0 -49
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * @owner src/adapters/pubmed/articles.ts
3
- * @does Register agent-facing PubMed search, article, author, citation, and related-article commands.
4
- * @needs NCBI E-utilities API, TypeScript adapter loader, PMID/query validation.
5
- * @feeds surface coverage ledger, biomedical literature command surface, agent-readable PubMed rows.
6
- * @breaks NCBI E-utilities envelope drift, weak PMID validation, or silent empty rows hide literature lookup failures.
3
+ * @does Register agent-facing PubMed search, normalized paper metadata, field/value article detail, PMC full-text read, author, citation, and related-article commands.
4
+ * @needs NCBI E-utilities PubMed/PMC APIs, TypeScript adapter loader, PMID/PMCID/query validation.
5
+ * @feeds surface coverage ledger, biomedical literature command surface, agent-readable PubMed rows, scholar full-text workflow.
6
+ * @breaks NCBI E-utilities envelope drift, weak PMID/PMCID validation, missing PMC full text, or silent empty rows hide literature lookup failures.
7
7
  */
8
8
 
9
9
  import { DOMParser, type Document, type Element } from "@xmldom/xmldom";
@@ -12,6 +12,7 @@ import { cli, Strategy } from "../../registry.js";
12
12
  const EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
13
13
  const SUMMARY_COLUMNS = [
14
14
  "rank",
15
+ "id",
15
16
  "pmid",
16
17
  "title",
17
18
  "authors",
@@ -19,9 +20,23 @@ const SUMMARY_COLUMNS = [
19
20
  "year",
20
21
  "article_type",
21
22
  "doi",
23
+ "pmc_id",
22
24
  "url",
23
25
  ];
24
- const RELATED_COLUMNS = [...SUMMARY_COLUMNS.slice(0, 7), "score", "doi", "url"];
26
+ const RELATED_COLUMNS = [
27
+ "rank",
28
+ "id",
29
+ "pmid",
30
+ "title",
31
+ "authors",
32
+ "journal",
33
+ "year",
34
+ "score",
35
+ "doi",
36
+ "pmc_id",
37
+ "url",
38
+ ];
39
+ const PMC_BASE = "https://pmc.ncbi.nlm.nih.gov/articles";
25
40
 
26
41
  interface PubMedSummary {
27
42
  uid?: unknown;
@@ -70,6 +85,14 @@ export function requirePmid(value: unknown, label = "pmid"): string {
70
85
  return pmid;
71
86
  }
72
87
 
88
+ export function normalizePmcId(value: unknown): string {
89
+ const raw = String(value ?? "").trim();
90
+ if (!raw) return "";
91
+ const match = raw.match(/^(?:PMC)?(\d+)$/i);
92
+ if (!match) throw new Error(`pubmed pmc id "${raw}" is not valid.`);
93
+ return `PMC${match[1]}`;
94
+ }
95
+
73
96
  export function requirePubMedLimit(
74
97
  value: unknown,
75
98
  fallback = 20,
@@ -85,6 +108,20 @@ export function requirePubMedLimit(
85
108
  return n;
86
109
  }
87
110
 
111
+ export function requirePubMedMaxChars(
112
+ value: unknown,
113
+ fallback = 40_000,
114
+ ): number {
115
+ if (value === undefined || value === null || value === "") return fallback;
116
+ const n = Number(value);
117
+ if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
118
+ throw new Error(
119
+ `pubmed max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`,
120
+ );
121
+ }
122
+ return n;
123
+ }
124
+
88
125
  function requireChoice(
89
126
  value: unknown,
90
127
  choices: string[],
@@ -105,9 +142,10 @@ function buildUrl(
105
142
  tool: string,
106
143
  params: Record<string, unknown>,
107
144
  retmode = "json",
145
+ db = "pubmed",
108
146
  ): string {
109
147
  const search = new URLSearchParams();
110
- search.set("db", "pubmed");
148
+ search.set("db", db);
111
149
  search.set("retmode", retmode);
112
150
  if (process.env.NCBI_API_KEY) search.set("api_key", process.env.NCBI_API_KEY);
113
151
  if (process.env.NCBI_EMAIL) search.set("email", process.env.NCBI_EMAIL);
@@ -122,8 +160,9 @@ async function eutilsFetch(
122
160
  tool: string,
123
161
  params: Record<string, unknown>,
124
162
  retmode = "json",
163
+ db = "pubmed",
125
164
  ): Promise<unknown> {
126
- const response = await fetch(buildUrl(tool, params, retmode), {
165
+ const response = await fetch(buildUrl(tool, params, retmode, db), {
127
166
  headers: { "User-Agent": "unicli (https://github.com/olo-dot-io/Uni-CLI)" },
128
167
  });
129
168
  if (!response.ok)
@@ -154,14 +193,26 @@ function authorNames(authors: PubMedSummary["authors"], max = 3): string {
154
193
  }
155
194
 
156
195
  function doi(articleIds: PubMedSummary["articleids"]): string {
196
+ return articleId(articleIds, "doi");
197
+ }
198
+
199
+ function articleId(
200
+ articleIds: PubMedSummary["articleids"],
201
+ type: string,
202
+ ): string {
157
203
  return stringField(
158
204
  Array.isArray(articleIds)
159
- ? articleIds.find((id) => stringField(id.idtype).toLowerCase() === "doi")
160
- ?.value
205
+ ? articleIds.find(
206
+ (id) => stringField(id.idtype).toLowerCase() === type.toLowerCase(),
207
+ )?.value
161
208
  : "",
162
209
  );
163
210
  }
164
211
 
212
+ function pmcUrl(pmcId: string): string {
213
+ return pmcId ? `${PMC_BASE}/${pmcId}/` : "";
214
+ }
215
+
165
216
  function articleType(types: unknown[]): string {
166
217
  const values = Array.isArray(types)
167
218
  ? types.map(stringField).filter(Boolean)
@@ -178,17 +229,27 @@ export function mapPubMedSummaryRows(
178
229
  return pmids.flatMap((pmid, index) => {
179
230
  const summary = summaries.find((item) => stringField(item.uid) === pmid);
180
231
  if (!summary) return [];
232
+ const pmcId = articleId(summary.articleids, "pmc");
233
+ const url = `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`;
181
234
  return [
182
235
  {
183
236
  rank: index + 1,
237
+ id: pmid,
184
238
  pmid,
185
239
  title: cleanText(summary.title),
186
240
  authors: authorNames(summary.authors),
187
241
  journal: stringField(summary.source),
242
+ venue: stringField(summary.source),
188
243
  year: year(summary.pubdate),
189
244
  article_type: articleType(summary.pubtype ?? []),
245
+ type: articleType(summary.pubtype ?? []),
190
246
  doi: doi(summary.articleids),
191
- url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
247
+ pmc_id: pmcId || undefined,
248
+ pmc_url: pmcUrl(pmcId),
249
+ source_adapter: "pubmed",
250
+ source_url: url,
251
+ retrieved_at: new Date().toISOString(),
252
+ url,
192
253
  },
193
254
  ];
194
255
  });
@@ -212,30 +273,42 @@ function firstElement(
212
273
  return root.getElementsByTagName(tagName)[0] ?? null;
213
274
  }
214
275
 
215
- export function mapPubMedArticleRows(
276
+ function elements(root: Document | Element, tagName: string): Element[] {
277
+ const nodes = root.getElementsByTagName(tagName);
278
+ return Array.from({ length: nodes.length }, (_, index) =>
279
+ nodes.item(index),
280
+ ).filter((node): node is Element => node !== null);
281
+ }
282
+
283
+ function articleIdText(
284
+ root: Document | Element,
285
+ attrName: "IdType" | "pub-id-type",
286
+ attrValue: string,
287
+ ): string {
288
+ return (
289
+ elements(root, attrName === "IdType" ? "ArticleId" : "article-id")
290
+ .find(
291
+ (node) =>
292
+ node.getAttribute(attrName)?.toLowerCase() ===
293
+ attrValue.toLowerCase(),
294
+ )
295
+ ?.textContent?.trim() ?? ""
296
+ );
297
+ }
298
+
299
+ export function mapPubMedArticleRecord(
216
300
  xml: string,
217
301
  pmid: string,
218
- fullAbstract = false,
219
- ): Array<Record<string, unknown>> {
302
+ ): Record<string, unknown> {
220
303
  const document = new DOMParser().parseFromString(xml, "text/xml");
221
304
  const title = childText(document, "ArticleTitle");
222
305
  if (!title)
223
306
  throw new Error(`pubmed article ${pmid} did not include a title.`);
307
+ const doiValue = articleIdText(document, "IdType", "doi");
308
+ const pmcId = articleIdText(document, "IdType", "pmc");
224
309
  const abstract = elementTexts(document, "AbstractText").join(" ");
225
- const shownAbstract =
226
- fullAbstract || abstract.length <= 500
227
- ? abstract
228
- : `${abstract.slice(0, 497)}...`;
229
- const doiValue =
230
- Array.from(
231
- { length: document.getElementsByTagName("ArticleId").length },
232
- (_, index) => document.getElementsByTagName("ArticleId").item(index),
233
- )
234
- .filter((node): node is Element => node !== null)
235
- .find((node) => node.getAttribute("IdType")?.toLowerCase() === "doi")
236
- ?.textContent?.trim() ?? "";
237
310
  const authorNodes = document.getElementsByTagName("Author");
238
- const authors = Array.from({ length: authorNodes.length }, (_, index) =>
311
+ const authorList = Array.from({ length: authorNodes.length }, (_, index) =>
239
312
  authorNodes.item(index),
240
313
  )
241
314
  .filter((author): author is Element => author !== null)
@@ -244,31 +317,171 @@ export function mapPubMedArticleRows(
244
317
  .filter(Boolean)
245
318
  .join(" "),
246
319
  )
247
- .filter(Boolean)
248
- .join(", ");
320
+ .filter(Boolean);
249
321
  const journal = firstElement(document, "Journal");
250
322
  const pubDate = firstElement(document, "PubDate");
323
+ const yearValue = pubDate ? childText(pubDate, "Year") : "";
324
+ const sourceUrl = `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`;
325
+ return {
326
+ id: pmid,
327
+ pmid,
328
+ title,
329
+ authors: authorList,
330
+ journal: journal ? childText(journal, "Title") : "",
331
+ venue: journal ? childText(journal, "Title") : "",
332
+ year: yearValue ? Number(yearValue) : undefined,
333
+ date: pubDate ? cleanText(pubDate.textContent ?? "") : "",
334
+ article_type: elementTexts(document, "PublicationType")[0] ?? "",
335
+ type: elementTexts(document, "PublicationType")[0] ?? "",
336
+ language: childText(document, "Language"),
337
+ doi: doiValue || undefined,
338
+ pmc_id: pmcId || undefined,
339
+ pmc_url: pmcUrl(pmcId),
340
+ abstract: abstract || undefined,
341
+ source_adapter: "pubmed",
342
+ source_url: sourceUrl,
343
+ retrieved_at: new Date().toISOString(),
344
+ url: sourceUrl,
345
+ };
346
+ }
347
+
348
+ export function mapPubMedArticleRows(
349
+ xml: string,
350
+ pmid: string,
351
+ fullAbstract = false,
352
+ ): Array<Record<string, unknown>> {
353
+ const record = mapPubMedArticleRecord(xml, pmid);
354
+ const abstract = stringField(record.abstract);
355
+ const shownAbstract =
356
+ fullAbstract || abstract.length <= 500
357
+ ? abstract
358
+ : `${abstract.slice(0, 497)}...`;
251
359
  return [
252
360
  { field: "PMID", value: pmid },
253
- { field: "Title", value: title },
254
- { field: "Authors", value: authors },
255
- { field: "Journal", value: journal ? childText(journal, "Title") : "" },
256
- { field: "Year", value: pubDate ? childText(pubDate, "Year") : "" },
257
- {
258
- field: "Date",
259
- value: pubDate ? cleanText(pubDate.textContent ?? "") : "",
260
- },
261
- {
262
- field: "Article Type",
263
- value: elementTexts(document, "PublicationType")[0] ?? null,
264
- },
265
- { field: "Language", value: childText(document, "Language") },
266
- { field: "DOI", value: doiValue || null },
361
+ { field: "PMCID", value: record.pmc_id || null },
362
+ { field: "Title", value: record.title },
363
+ { field: "Authors", value: (record.authors as string[]).join(", ") },
364
+ { field: "Journal", value: record.journal },
365
+ { field: "Year", value: record.year ? String(record.year) : "" },
366
+ { field: "Date", value: record.date },
367
+ { field: "Article Type", value: record.article_type || null },
368
+ { field: "Language", value: record.language },
369
+ { field: "DOI", value: record.doi || null },
267
370
  { field: "Abstract", value: shownAbstract || null },
268
- { field: "URL", value: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/` },
371
+ { field: "URL", value: record.source_url },
372
+ { field: "PMC URL", value: record.pmc_url || null },
269
373
  ];
270
374
  }
271
375
 
376
+ function directChildElements(root: Element, tagName: string): Element[] {
377
+ const out: Element[] = [];
378
+ for (let index = 0; index < root.childNodes.length; index += 1) {
379
+ const node = root.childNodes.item(index);
380
+ if (node?.nodeType === 1 && node.nodeName === tagName) {
381
+ out.push(node as Element);
382
+ }
383
+ }
384
+ return out;
385
+ }
386
+
387
+ function directChildText(root: Element, tagName: string): string {
388
+ return cleanText(directChildElements(root, tagName)[0]?.textContent ?? "");
389
+ }
390
+
391
+ function sectionText(section: Element): string {
392
+ const title = directChildText(section, "title");
393
+ const paragraphs = directChildElements(section, "p")
394
+ .map((paragraph) => cleanText(paragraph.textContent ?? ""))
395
+ .filter(Boolean);
396
+ const nested = directChildElements(section, "sec")
397
+ .map(sectionText)
398
+ .filter(Boolean);
399
+ return [title ? `## ${title}` : "", ...paragraphs, ...nested]
400
+ .filter(Boolean)
401
+ .join("\n\n");
402
+ }
403
+
404
+ function truncateText(
405
+ text: string,
406
+ maxChars: number,
407
+ ): {
408
+ text: string;
409
+ truncated: boolean;
410
+ } {
411
+ if (text.length <= maxChars) return { text, truncated: false };
412
+ return {
413
+ text: `${text.slice(0, maxChars).trimEnd()}\n\n[truncated at ${maxChars} characters]`,
414
+ truncated: true,
415
+ };
416
+ }
417
+
418
+ export function mapPmcFullTextRow(
419
+ xml: string,
420
+ ref: string,
421
+ maxChars = 40_000,
422
+ ): Record<string, unknown> {
423
+ const document = new DOMParser().parseFromString(xml, "text/xml");
424
+ const title = childText(document, "article-title");
425
+ if (!title) {
426
+ throw new Error(`PMC full text ${ref} did not include an article title.`);
427
+ }
428
+ const pmcId = normalizePmcId(
429
+ articleIdText(document, "pub-id-type", "pmcid") || ref,
430
+ );
431
+ const pmid = articleIdText(document, "pub-id-type", "pmid");
432
+ const doiValue = articleIdText(document, "pub-id-type", "doi");
433
+ const abstract = cleanText(
434
+ firstElement(document, "abstract")?.textContent ?? "",
435
+ );
436
+ const body = firstElement(document, "body");
437
+ const bodyText = body
438
+ ? directChildElements(body, "sec")
439
+ .map(sectionText)
440
+ .filter(Boolean)
441
+ .join("\n\n")
442
+ : "";
443
+ const text = [abstract ? `## Abstract\n\n${abstract}` : "", bodyText]
444
+ .filter(Boolean)
445
+ .join("\n\n");
446
+ if (!text) {
447
+ throw new Error(`PMC full text ${pmcId} did not include readable text.`);
448
+ }
449
+ const truncated = truncateText(text, maxChars);
450
+ return {
451
+ id: pmid || pmcId,
452
+ title,
453
+ pmid: pmid || undefined,
454
+ pmc_id: pmcId,
455
+ doi: doiValue || undefined,
456
+ source_adapter: "pubmed",
457
+ source_url: pmcUrl(pmcId),
458
+ text: truncated.text,
459
+ text_truncated: truncated.truncated,
460
+ text_source: "pmc_xml",
461
+ retrieved_at: new Date().toISOString(),
462
+ };
463
+ }
464
+
465
+ async function pmcIdFromPubMedRef(ref: string): Promise<string> {
466
+ if (/^(?:PMC)?\d+$/i.test(ref) && /^PMC/i.test(ref)) {
467
+ return normalizePmcId(ref);
468
+ }
469
+ const pmid = requirePmid(ref, "pmid");
470
+ const json = (await eutilsFetch(
471
+ "esearch",
472
+ { term: `${pmid}[PMID]`, retmax: 1 },
473
+ "json",
474
+ "pmc",
475
+ )) as { esearchresult?: { idlist?: string[] } };
476
+ const numericPmc = json.esearchresult?.idlist?.[0];
477
+ if (!numericPmc) {
478
+ throw new Error(
479
+ `PubMed PMID ${pmid} has no PubMed Central full text record.`,
480
+ );
481
+ }
482
+ return normalizePmcId(numericPmc);
483
+ }
484
+
272
485
  async function fetchSummaryRows(
273
486
  pmids: string[],
274
487
  label: string,
@@ -346,7 +559,7 @@ cli({
346
559
  },
347
560
  ],
348
561
  columns: ["field", "value"],
349
- capabilities: ["http.fetch", "scholar.get"],
562
+ capabilities: ["http.fetch"],
350
563
  func: async (_page, kwargs) => {
351
564
  const pmid = requirePmid(kwargs.pmid);
352
565
  const xml = String(
@@ -356,6 +569,92 @@ cli({
356
569
  },
357
570
  });
358
571
 
572
+ cli({
573
+ site: "pubmed",
574
+ name: "paper",
575
+ description: "Fetch normalized PubMed article metadata by PMID",
576
+ domain: "pubmed.ncbi.nlm.nih.gov",
577
+ strategy: Strategy.PUBLIC,
578
+ args: [
579
+ {
580
+ name: "pmid",
581
+ type: "str",
582
+ required: true,
583
+ positional: true,
584
+ description: "PubMed ID",
585
+ },
586
+ ],
587
+ columns: [
588
+ "id",
589
+ "title",
590
+ "authors",
591
+ "year",
592
+ "journal",
593
+ "doi",
594
+ "pmc_id",
595
+ "source_url",
596
+ ],
597
+ capabilities: ["http.fetch", "scholar.get"],
598
+ func: async (_page, kwargs) => {
599
+ const pmid = requirePmid(kwargs.pmid ?? kwargs.id ?? kwargs.ref);
600
+ const xml = String(
601
+ await eutilsFetch("efetch", { id: pmid, rettype: "abstract" }, "xml"),
602
+ );
603
+ return [mapPubMedArticleRecord(xml, pmid)];
604
+ },
605
+ });
606
+
607
+ cli({
608
+ site: "pubmed",
609
+ name: "read",
610
+ description: "Read PubMed Central full text for a PMID or PMCID",
611
+ domain: "eutils.ncbi.nlm.nih.gov",
612
+ strategy: Strategy.PUBLIC,
613
+ args: [
614
+ {
615
+ name: "ref",
616
+ type: "str",
617
+ required: true,
618
+ positional: true,
619
+ description: "PubMed PMID or PubMed Central PMCID",
620
+ },
621
+ {
622
+ name: "max-chars",
623
+ type: "int",
624
+ default: 40000,
625
+ description: "Maximum extracted text characters",
626
+ },
627
+ ],
628
+ columns: [
629
+ "id",
630
+ "title",
631
+ "pmid",
632
+ "pmc_id",
633
+ "doi",
634
+ "source_url",
635
+ "text",
636
+ "text_truncated",
637
+ ],
638
+ capabilities: ["http.fetch", "scholar.fulltext"],
639
+ func: async (_page, kwargs) => {
640
+ const ref = requirePubMedText(
641
+ kwargs.ref ?? kwargs.id ?? kwargs.pmid,
642
+ "ref",
643
+ );
644
+ const maxChars = requirePubMedMaxChars(kwargs["max-chars"]);
645
+ const pmcId = await pmcIdFromPubMedRef(ref);
646
+ const xml = String(
647
+ await eutilsFetch(
648
+ "efetch",
649
+ { id: pmcId.replace(/^PMC/i, "") },
650
+ "xml",
651
+ "pmc",
652
+ ),
653
+ );
654
+ return [mapPmcFullTextRow(xml, pmcId, maxChars)];
655
+ },
656
+ });
657
+
359
658
  cli({
360
659
  site: "pubmed",
361
660
  name: "author",