@zenalexa/unicli 0.225.1 → 0.225.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +3 -3
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/dist/adapters/acl-anthology/papers.d.ts +16 -9
- package/dist/adapters/acl-anthology/papers.d.ts.map +1 -1
- package/dist/adapters/acl-anthology/papers.js +322 -58
- package/dist/adapters/acl-anthology/papers.js.map +1 -1
- package/dist/adapters/arxiv/papers.d.ts +22 -4
- package/dist/adapters/arxiv/papers.d.ts.map +1 -1
- package/dist/adapters/arxiv/papers.js +202 -4
- package/dist/adapters/arxiv/papers.js.map +1 -1
- package/dist/adapters/baidu-scholar/search.d.ts +15 -1
- package/dist/adapters/baidu-scholar/search.d.ts.map +1 -1
- package/dist/adapters/baidu-scholar/search.js +72 -8
- package/dist/adapters/baidu-scholar/search.js.map +1 -1
- package/dist/adapters/biorxiv/preprints.d.ts +9 -0
- package/dist/adapters/biorxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/biorxiv/preprints.js +78 -0
- package/dist/adapters/biorxiv/preprints.js.map +1 -0
- package/dist/adapters/cnki/search.d.ts +82 -0
- package/dist/adapters/cnki/search.d.ts.map +1 -0
- package/dist/adapters/cnki/search.js +236 -0
- package/dist/adapters/cnki/search.js.map +1 -0
- package/dist/adapters/cvf/papers.d.ts +12 -7
- package/dist/adapters/cvf/papers.d.ts.map +1 -1
- package/dist/adapters/cvf/papers.js +210 -27
- package/dist/adapters/cvf/papers.js.map +1 -1
- package/dist/adapters/dblp/publications.d.ts +12 -5
- package/dist/adapters/dblp/publications.d.ts.map +1 -1
- package/dist/adapters/dblp/publications.js +31 -8
- package/dist/adapters/dblp/publications.js.map +1 -1
- package/dist/adapters/google-scholar/search.d.ts +22 -1
- package/dist/adapters/google-scholar/search.d.ts.map +1 -1
- package/dist/adapters/google-scholar/search.js +129 -14
- package/dist/adapters/google-scholar/search.js.map +1 -1
- package/dist/adapters/hf/paper.d.ts +12 -3
- package/dist/adapters/hf/paper.d.ts.map +1 -1
- package/dist/adapters/hf/paper.js +65 -5
- package/dist/adapters/hf/paper.js.map +1 -1
- package/dist/adapters/medrxiv/preprints.d.ts +9 -0
- package/dist/adapters/medrxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/medrxiv/preprints.js +78 -0
- package/dist/adapters/medrxiv/preprints.js.map +1 -0
- package/dist/adapters/neurips/proceedings.d.ts +8 -7
- package/dist/adapters/neurips/proceedings.d.ts.map +1 -1
- package/dist/adapters/neurips/proceedings.js +209 -21
- package/dist/adapters/neurips/proceedings.js.map +1 -1
- package/dist/adapters/openalex/works.d.ts +21 -5
- package/dist/adapters/openalex/works.d.ts.map +1 -1
- package/dist/adapters/openalex/works.js +108 -8
- package/dist/adapters/openalex/works.js.map +1 -1
- package/dist/adapters/openreview/papers.d.ts +10 -4
- package/dist/adapters/openreview/papers.d.ts.map +1 -1
- package/dist/adapters/openreview/papers.js +351 -24
- package/dist/adapters/openreview/papers.js.map +1 -1
- package/dist/adapters/pmlr/proceedings.d.ts +6 -6
- package/dist/adapters/pmlr/proceedings.d.ts.map +1 -1
- package/dist/adapters/pmlr/proceedings.js +92 -12
- package/dist/adapters/pmlr/proceedings.js.map +1 -1
- package/dist/adapters/pubmed/articles.d.ts +8 -4
- package/dist/adapters/pubmed/articles.d.ts.map +1 -1
- package/dist/adapters/pubmed/articles.js +272 -39
- package/dist/adapters/pubmed/articles.js.map +1 -1
- package/dist/adapters/rxiv/preprints.d.ts +75 -0
- package/dist/adapters/rxiv/preprints.d.ts.map +1 -0
- package/dist/adapters/rxiv/preprints.js +651 -0
- package/dist/adapters/rxiv/preprints.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts +49 -0
- package/dist/adapters/scholar-artifacts/pdf-read.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js +204 -0
- package/dist/adapters/scholar-artifacts/pdf-read.js.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts +16 -0
- package/dist/adapters/scholar-artifacts/pdf.d.ts.map +1 -0
- package/dist/adapters/scholar-artifacts/pdf.js +122 -0
- package/dist/adapters/scholar-artifacts/pdf.js.map +1 -0
- package/dist/adapters/semantic-scholar/papers.d.ts +6 -6
- package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -1
- package/dist/adapters/semantic-scholar/papers.js +80 -6
- package/dist/adapters/semantic-scholar/papers.js.map +1 -1
- package/dist/adapters/unpaywall/works.d.ts +7 -7
- package/dist/adapters/unpaywall/works.d.ts.map +1 -1
- package/dist/adapters/unpaywall/works.js +104 -12
- package/dist/adapters/unpaywall/works.js.map +1 -1
- package/dist/adapters/wanfang/search.d.ts +14 -0
- package/dist/adapters/wanfang/search.d.ts.map +1 -1
- package/dist/adapters/wanfang/search.js +56 -7
- package/dist/adapters/wanfang/search.js.map +1 -1
- package/dist/browser/page.d.ts +2 -0
- package/dist/browser/page.d.ts.map +1 -1
- package/dist/browser/page.js +12 -0
- package/dist/browser/page.js.map +1 -1
- package/dist/browser/protocol.d.ts +6 -1
- package/dist/browser/protocol.d.ts.map +1 -1
- package/dist/browser/protocol.js.map +1 -1
- package/dist/commands/browser/actions.d.ts.map +1 -1
- package/dist/commands/browser/actions.js +487 -8
- package/dist/commands/browser/actions.js.map +1 -1
- package/dist/commands/compute.js +12 -1
- package/dist/commands/compute.js.map +1 -1
- package/dist/commands/schema.d.ts.map +1 -1
- package/dist/commands/schema.js +22 -0
- package/dist/commands/schema.js.map +1 -1
- package/dist/commands/scholar.d.ts +77 -5
- package/dist/commands/scholar.d.ts.map +1 -1
- package/dist/commands/scholar.js +2945 -83
- package/dist/commands/scholar.js.map +1 -1
- package/dist/commands/search.d.ts.map +1 -1
- package/dist/commands/search.js +14 -3
- package/dist/commands/search.js.map +1 -1
- package/dist/compute/contracts.d.ts +55 -0
- package/dist/compute/contracts.d.ts.map +1 -0
- package/dist/compute/contracts.js +487 -0
- package/dist/compute/contracts.js.map +1 -0
- package/dist/core/command-contract.d.ts.map +1 -1
- package/dist/core/command-contract.js +5 -0
- package/dist/core/command-contract.js.map +1 -1
- package/dist/core/schema-v2.d.ts +1 -0
- package/dist/core/schema-v2.d.ts.map +1 -1
- package/dist/core/schema-v2.js +1 -0
- package/dist/core/schema-v2.js.map +1 -1
- package/dist/discovery/aliases.d.ts +8 -1
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +333 -20
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts +2 -0
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +525 -66
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts +1 -0
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +299 -2
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +3 -0
- package/dist/discovery/loader.js.map +1 -1
- package/dist/discovery/macos-dynamic.d.ts +1 -0
- package/dist/discovery/macos-dynamic.d.ts.map +1 -1
- package/dist/discovery/macos-dynamic.js +20 -1
- package/dist/discovery/macos-dynamic.js.map +1 -1
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +12 -5
- package/dist/discovery/search.js.map +1 -1
- package/dist/engine/browser/evidence.d.ts +34 -1
- package/dist/engine/browser/evidence.d.ts.map +1 -1
- package/dist/engine/browser/evidence.js +141 -6
- package/dist/engine/browser/evidence.js.map +1 -1
- package/dist/engine/capability-policy.d.ts.map +1 -1
- package/dist/engine/capability-policy.js +30 -4
- package/dist/engine/capability-policy.js.map +1 -1
- package/dist/engine/kernel/stages.d.ts.map +1 -1
- package/dist/engine/kernel/stages.js +3 -0
- package/dist/engine/kernel/stages.js.map +1 -1
- package/dist/engine/operation-policy.d.ts +4 -1
- package/dist/engine/operation-policy.d.ts.map +1 -1
- package/dist/engine/operation-policy.js +23 -0
- package/dist/engine/operation-policy.js.map +1 -1
- package/dist/engine/steps/fetch-text.d.ts.map +1 -1
- package/dist/engine/steps/fetch-text.js +2 -2
- package/dist/engine/steps/fetch-text.js.map +1 -1
- package/dist/engine/steps/fetch.d.ts +1 -0
- package/dist/engine/steps/fetch.d.ts.map +1 -1
- package/dist/engine/steps/fetch.js +24 -4
- package/dist/engine/steps/fetch.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts +5 -5
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +61 -8
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/fast-path/manifest.d.ts +3 -0
- package/dist/fast-path/manifest.d.ts.map +1 -1
- package/dist/fast-path/manifest.js.map +1 -1
- package/dist/fast-path/policy.d.ts.map +1 -1
- package/dist/fast-path/policy.js +3 -0
- package/dist/fast-path/policy.js.map +1 -1
- package/dist/fast-path/render.d.ts +2 -0
- package/dist/fast-path/render.d.ts.map +1 -1
- package/dist/fast-path/render.js +9 -0
- package/dist/fast-path/render.js.map +1 -1
- package/dist/manifest-compact.txt +2 -2
- package/dist/manifest.json +6977 -1002
- package/dist/mcp/handler.d.ts +2 -16
- package/dist/mcp/handler.d.ts.map +1 -1
- package/dist/mcp/handler.js.map +1 -1
- package/dist/mcp/http-transport.d.ts +7 -1
- package/dist/mcp/http-transport.d.ts.map +1 -1
- package/dist/mcp/http-transport.js +20 -1
- package/dist/mcp/http-transport.js.map +1 -1
- package/dist/mcp/jsonrpc.d.ts +27 -0
- package/dist/mcp/jsonrpc.d.ts.map +1 -0
- package/dist/mcp/jsonrpc.js +12 -0
- package/dist/mcp/jsonrpc.js.map +1 -0
- package/dist/mcp/origin-guard.d.ts +26 -0
- package/dist/mcp/origin-guard.d.ts.map +1 -0
- package/dist/mcp/origin-guard.js +42 -0
- package/dist/mcp/origin-guard.js.map +1 -0
- package/dist/mcp/profiles/computer-use.d.ts.map +1 -1
- package/dist/mcp/profiles/computer-use.js +30 -270
- package/dist/mcp/profiles/computer-use.js.map +1 -1
- package/dist/mcp/streamable-http/session.d.ts +4 -22
- package/dist/mcp/streamable-http/session.d.ts.map +1 -1
- package/dist/mcp/streamable-http/session.js +4 -24
- package/dist/mcp/streamable-http/session.js.map +1 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +74 -54
- package/dist/mcp/tools.js.map +1 -1
- package/dist/output/envelope.d.ts +2 -0
- package/dist/output/envelope.d.ts.map +1 -1
- package/dist/output/envelope.js.map +1 -1
- package/dist/output/error-map.d.ts +14 -0
- package/dist/output/error-map.d.ts.map +1 -1
- package/dist/output/error-map.js +20 -0
- package/dist/output/error-map.js.map +1 -1
- package/dist/registry.d.ts +2 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +1 -0
- package/dist/registry.js.map +1 -1
- package/dist/transport/cascade.d.ts.map +1 -1
- package/dist/transport/cascade.js +77 -5
- package/dist/transport/cascade.js.map +1 -1
- package/dist/transport/refs.d.ts +33 -1
- package/dist/transport/refs.d.ts.map +1 -1
- package/dist/transport/refs.js +40 -1
- package/dist/transport/refs.js.map +1 -1
- package/dist/types/scholarly.d.ts +19 -4
- package/dist/types/scholarly.d.ts.map +1 -1
- package/dist/types/scholarly.js +4 -4
- package/dist/types.d.ts +8 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/server.json +2 -2
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/acl-anthology/papers.test.ts +111 -0
- package/src/adapters/acl-anthology/papers.ts +379 -71
- package/src/adapters/arxiv/papers.test.ts +46 -0
- package/src/adapters/arxiv/papers.ts +251 -4
- package/src/adapters/baidu-scholar/search.ts +74 -11
- package/src/adapters/biorxiv/preprints.ts +112 -0
- package/src/adapters/cnki/search.ts +357 -0
- package/src/adapters/cvf/papers.ts +260 -27
- package/src/adapters/dblp/publications.test.ts +9 -0
- package/src/adapters/dblp/publications.ts +31 -8
- package/src/adapters/defuddle/read.yaml +30 -0
- package/src/adapters/google-scholar/search.ts +165 -17
- package/src/adapters/hf/paper.test.ts +23 -0
- package/src/adapters/hf/paper.ts +89 -5
- package/src/adapters/hf/top.yaml +34 -2
- package/src/adapters/huggingface-papers/daily.yaml +37 -3
- package/src/adapters/huggingface-papers/search.yaml +43 -9
- package/src/adapters/jina/read.yaml +30 -0
- package/src/adapters/markdown-new/read.yaml +50 -0
- package/src/adapters/medrxiv/preprints.ts +112 -0
- package/src/adapters/neurips/proceedings.ts +266 -22
- package/src/adapters/ollama-cloud/fetch.yaml +39 -0
- package/src/adapters/ollama-cloud/search.yaml +43 -0
- package/src/adapters/openalex/works.test.ts +15 -4
- package/src/adapters/openalex/works.ts +136 -8
- package/src/adapters/openreview/papers.test.ts +31 -0
- package/src/adapters/openreview/papers.ts +407 -29
- package/src/adapters/pmlr/proceedings.ts +102 -12
- package/src/adapters/pubmed/articles.test.ts +88 -1
- package/src/adapters/pubmed/articles.ts +343 -44
- package/src/adapters/rxiv/preprints.test.ts +233 -0
- package/src/adapters/rxiv/preprints.ts +849 -0
- package/src/adapters/scholar-artifacts/pdf-read.ts +277 -0
- package/src/adapters/scholar-artifacts/pdf.ts +133 -0
- package/src/adapters/semantic-scholar/papers.ts +98 -6
- package/src/adapters/unpaywall/works.ts +141 -12
- package/src/adapters/wanfang/search.ts +57 -7
- package/src/adapters/cnki/search.yaml +0 -49
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @owner src/adapters/pubmed/articles.ts
|
|
3
|
-
* @does Register agent-facing PubMed search, article, author, citation, and related-article commands.
|
|
4
|
-
* @needs NCBI E-utilities
|
|
5
|
-
* @feeds surface coverage ledger, biomedical literature command surface, agent-readable PubMed rows.
|
|
6
|
-
* @breaks NCBI E-utilities envelope drift, weak PMID validation, or silent empty rows hide literature lookup failures.
|
|
3
|
+
* @does Register agent-facing PubMed search, normalized paper metadata, field/value article detail, PMC full-text read, author, citation, and related-article commands.
|
|
4
|
+
* @needs NCBI E-utilities PubMed/PMC APIs, TypeScript adapter loader, PMID/PMCID/query validation.
|
|
5
|
+
* @feeds surface coverage ledger, biomedical literature command surface, agent-readable PubMed rows, scholar full-text workflow.
|
|
6
|
+
* @breaks NCBI E-utilities envelope drift, weak PMID/PMCID validation, missing PMC full text, or silent empty rows hide literature lookup failures.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { DOMParser, type Document, type Element } from "@xmldom/xmldom";
|
|
@@ -12,6 +12,7 @@ import { cli, Strategy } from "../../registry.js";
|
|
|
12
12
|
const EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
|
|
13
13
|
const SUMMARY_COLUMNS = [
|
|
14
14
|
"rank",
|
|
15
|
+
"id",
|
|
15
16
|
"pmid",
|
|
16
17
|
"title",
|
|
17
18
|
"authors",
|
|
@@ -19,9 +20,23 @@ const SUMMARY_COLUMNS = [
|
|
|
19
20
|
"year",
|
|
20
21
|
"article_type",
|
|
21
22
|
"doi",
|
|
23
|
+
"pmc_id",
|
|
22
24
|
"url",
|
|
23
25
|
];
|
|
24
|
-
const RELATED_COLUMNS = [
|
|
26
|
+
const RELATED_COLUMNS = [
|
|
27
|
+
"rank",
|
|
28
|
+
"id",
|
|
29
|
+
"pmid",
|
|
30
|
+
"title",
|
|
31
|
+
"authors",
|
|
32
|
+
"journal",
|
|
33
|
+
"year",
|
|
34
|
+
"score",
|
|
35
|
+
"doi",
|
|
36
|
+
"pmc_id",
|
|
37
|
+
"url",
|
|
38
|
+
];
|
|
39
|
+
const PMC_BASE = "https://pmc.ncbi.nlm.nih.gov/articles";
|
|
25
40
|
|
|
26
41
|
interface PubMedSummary {
|
|
27
42
|
uid?: unknown;
|
|
@@ -70,6 +85,14 @@ export function requirePmid(value: unknown, label = "pmid"): string {
|
|
|
70
85
|
return pmid;
|
|
71
86
|
}
|
|
72
87
|
|
|
88
|
+
export function normalizePmcId(value: unknown): string {
|
|
89
|
+
const raw = String(value ?? "").trim();
|
|
90
|
+
if (!raw) return "";
|
|
91
|
+
const match = raw.match(/^(?:PMC)?(\d+)$/i);
|
|
92
|
+
if (!match) throw new Error(`pubmed pmc id "${raw}" is not valid.`);
|
|
93
|
+
return `PMC${match[1]}`;
|
|
94
|
+
}
|
|
95
|
+
|
|
73
96
|
export function requirePubMedLimit(
|
|
74
97
|
value: unknown,
|
|
75
98
|
fallback = 20,
|
|
@@ -85,6 +108,20 @@ export function requirePubMedLimit(
|
|
|
85
108
|
return n;
|
|
86
109
|
}
|
|
87
110
|
|
|
111
|
+
export function requirePubMedMaxChars(
|
|
112
|
+
value: unknown,
|
|
113
|
+
fallback = 40_000,
|
|
114
|
+
): number {
|
|
115
|
+
if (value === undefined || value === null || value === "") return fallback;
|
|
116
|
+
const n = Number(value);
|
|
117
|
+
if (!Number.isInteger(n) || n < 1_000 || n > 1_000_000) {
|
|
118
|
+
throw new Error(
|
|
119
|
+
`pubmed max-chars must be an integer in [1000, 1000000]. Got: ${String(value)}`,
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
return n;
|
|
123
|
+
}
|
|
124
|
+
|
|
88
125
|
function requireChoice(
|
|
89
126
|
value: unknown,
|
|
90
127
|
choices: string[],
|
|
@@ -105,9 +142,10 @@ function buildUrl(
|
|
|
105
142
|
tool: string,
|
|
106
143
|
params: Record<string, unknown>,
|
|
107
144
|
retmode = "json",
|
|
145
|
+
db = "pubmed",
|
|
108
146
|
): string {
|
|
109
147
|
const search = new URLSearchParams();
|
|
110
|
-
search.set("db",
|
|
148
|
+
search.set("db", db);
|
|
111
149
|
search.set("retmode", retmode);
|
|
112
150
|
if (process.env.NCBI_API_KEY) search.set("api_key", process.env.NCBI_API_KEY);
|
|
113
151
|
if (process.env.NCBI_EMAIL) search.set("email", process.env.NCBI_EMAIL);
|
|
@@ -122,8 +160,9 @@ async function eutilsFetch(
|
|
|
122
160
|
tool: string,
|
|
123
161
|
params: Record<string, unknown>,
|
|
124
162
|
retmode = "json",
|
|
163
|
+
db = "pubmed",
|
|
125
164
|
): Promise<unknown> {
|
|
126
|
-
const response = await fetch(buildUrl(tool, params, retmode), {
|
|
165
|
+
const response = await fetch(buildUrl(tool, params, retmode, db), {
|
|
127
166
|
headers: { "User-Agent": "unicli (https://github.com/olo-dot-io/Uni-CLI)" },
|
|
128
167
|
});
|
|
129
168
|
if (!response.ok)
|
|
@@ -154,14 +193,26 @@ function authorNames(authors: PubMedSummary["authors"], max = 3): string {
|
|
|
154
193
|
}
|
|
155
194
|
|
|
156
195
|
function doi(articleIds: PubMedSummary["articleids"]): string {
|
|
196
|
+
return articleId(articleIds, "doi");
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function articleId(
|
|
200
|
+
articleIds: PubMedSummary["articleids"],
|
|
201
|
+
type: string,
|
|
202
|
+
): string {
|
|
157
203
|
return stringField(
|
|
158
204
|
Array.isArray(articleIds)
|
|
159
|
-
? articleIds.find(
|
|
160
|
-
|
|
205
|
+
? articleIds.find(
|
|
206
|
+
(id) => stringField(id.idtype).toLowerCase() === type.toLowerCase(),
|
|
207
|
+
)?.value
|
|
161
208
|
: "",
|
|
162
209
|
);
|
|
163
210
|
}
|
|
164
211
|
|
|
212
|
+
function pmcUrl(pmcId: string): string {
|
|
213
|
+
return pmcId ? `${PMC_BASE}/${pmcId}/` : "";
|
|
214
|
+
}
|
|
215
|
+
|
|
165
216
|
function articleType(types: unknown[]): string {
|
|
166
217
|
const values = Array.isArray(types)
|
|
167
218
|
? types.map(stringField).filter(Boolean)
|
|
@@ -178,17 +229,27 @@ export function mapPubMedSummaryRows(
|
|
|
178
229
|
return pmids.flatMap((pmid, index) => {
|
|
179
230
|
const summary = summaries.find((item) => stringField(item.uid) === pmid);
|
|
180
231
|
if (!summary) return [];
|
|
232
|
+
const pmcId = articleId(summary.articleids, "pmc");
|
|
233
|
+
const url = `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`;
|
|
181
234
|
return [
|
|
182
235
|
{
|
|
183
236
|
rank: index + 1,
|
|
237
|
+
id: pmid,
|
|
184
238
|
pmid,
|
|
185
239
|
title: cleanText(summary.title),
|
|
186
240
|
authors: authorNames(summary.authors),
|
|
187
241
|
journal: stringField(summary.source),
|
|
242
|
+
venue: stringField(summary.source),
|
|
188
243
|
year: year(summary.pubdate),
|
|
189
244
|
article_type: articleType(summary.pubtype ?? []),
|
|
245
|
+
type: articleType(summary.pubtype ?? []),
|
|
190
246
|
doi: doi(summary.articleids),
|
|
191
|
-
|
|
247
|
+
pmc_id: pmcId || undefined,
|
|
248
|
+
pmc_url: pmcUrl(pmcId),
|
|
249
|
+
source_adapter: "pubmed",
|
|
250
|
+
source_url: url,
|
|
251
|
+
retrieved_at: new Date().toISOString(),
|
|
252
|
+
url,
|
|
192
253
|
},
|
|
193
254
|
];
|
|
194
255
|
});
|
|
@@ -212,30 +273,42 @@ function firstElement(
|
|
|
212
273
|
return root.getElementsByTagName(tagName)[0] ?? null;
|
|
213
274
|
}
|
|
214
275
|
|
|
215
|
-
|
|
276
|
+
function elements(root: Document | Element, tagName: string): Element[] {
|
|
277
|
+
const nodes = root.getElementsByTagName(tagName);
|
|
278
|
+
return Array.from({ length: nodes.length }, (_, index) =>
|
|
279
|
+
nodes.item(index),
|
|
280
|
+
).filter((node): node is Element => node !== null);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function articleIdText(
|
|
284
|
+
root: Document | Element,
|
|
285
|
+
attrName: "IdType" | "pub-id-type",
|
|
286
|
+
attrValue: string,
|
|
287
|
+
): string {
|
|
288
|
+
return (
|
|
289
|
+
elements(root, attrName === "IdType" ? "ArticleId" : "article-id")
|
|
290
|
+
.find(
|
|
291
|
+
(node) =>
|
|
292
|
+
node.getAttribute(attrName)?.toLowerCase() ===
|
|
293
|
+
attrValue.toLowerCase(),
|
|
294
|
+
)
|
|
295
|
+
?.textContent?.trim() ?? ""
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export function mapPubMedArticleRecord(
|
|
216
300
|
xml: string,
|
|
217
301
|
pmid: string,
|
|
218
|
-
|
|
219
|
-
): Array<Record<string, unknown>> {
|
|
302
|
+
): Record<string, unknown> {
|
|
220
303
|
const document = new DOMParser().parseFromString(xml, "text/xml");
|
|
221
304
|
const title = childText(document, "ArticleTitle");
|
|
222
305
|
if (!title)
|
|
223
306
|
throw new Error(`pubmed article ${pmid} did not include a title.`);
|
|
307
|
+
const doiValue = articleIdText(document, "IdType", "doi");
|
|
308
|
+
const pmcId = articleIdText(document, "IdType", "pmc");
|
|
224
309
|
const abstract = elementTexts(document, "AbstractText").join(" ");
|
|
225
|
-
const shownAbstract =
|
|
226
|
-
fullAbstract || abstract.length <= 500
|
|
227
|
-
? abstract
|
|
228
|
-
: `${abstract.slice(0, 497)}...`;
|
|
229
|
-
const doiValue =
|
|
230
|
-
Array.from(
|
|
231
|
-
{ length: document.getElementsByTagName("ArticleId").length },
|
|
232
|
-
(_, index) => document.getElementsByTagName("ArticleId").item(index),
|
|
233
|
-
)
|
|
234
|
-
.filter((node): node is Element => node !== null)
|
|
235
|
-
.find((node) => node.getAttribute("IdType")?.toLowerCase() === "doi")
|
|
236
|
-
?.textContent?.trim() ?? "";
|
|
237
310
|
const authorNodes = document.getElementsByTagName("Author");
|
|
238
|
-
const
|
|
311
|
+
const authorList = Array.from({ length: authorNodes.length }, (_, index) =>
|
|
239
312
|
authorNodes.item(index),
|
|
240
313
|
)
|
|
241
314
|
.filter((author): author is Element => author !== null)
|
|
@@ -244,31 +317,171 @@ export function mapPubMedArticleRows(
|
|
|
244
317
|
.filter(Boolean)
|
|
245
318
|
.join(" "),
|
|
246
319
|
)
|
|
247
|
-
.filter(Boolean)
|
|
248
|
-
.join(", ");
|
|
320
|
+
.filter(Boolean);
|
|
249
321
|
const journal = firstElement(document, "Journal");
|
|
250
322
|
const pubDate = firstElement(document, "PubDate");
|
|
323
|
+
const yearValue = pubDate ? childText(pubDate, "Year") : "";
|
|
324
|
+
const sourceUrl = `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`;
|
|
325
|
+
return {
|
|
326
|
+
id: pmid,
|
|
327
|
+
pmid,
|
|
328
|
+
title,
|
|
329
|
+
authors: authorList,
|
|
330
|
+
journal: journal ? childText(journal, "Title") : "",
|
|
331
|
+
venue: journal ? childText(journal, "Title") : "",
|
|
332
|
+
year: yearValue ? Number(yearValue) : undefined,
|
|
333
|
+
date: pubDate ? cleanText(pubDate.textContent ?? "") : "",
|
|
334
|
+
article_type: elementTexts(document, "PublicationType")[0] ?? "",
|
|
335
|
+
type: elementTexts(document, "PublicationType")[0] ?? "",
|
|
336
|
+
language: childText(document, "Language"),
|
|
337
|
+
doi: doiValue || undefined,
|
|
338
|
+
pmc_id: pmcId || undefined,
|
|
339
|
+
pmc_url: pmcUrl(pmcId),
|
|
340
|
+
abstract: abstract || undefined,
|
|
341
|
+
source_adapter: "pubmed",
|
|
342
|
+
source_url: sourceUrl,
|
|
343
|
+
retrieved_at: new Date().toISOString(),
|
|
344
|
+
url: sourceUrl,
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
export function mapPubMedArticleRows(
|
|
349
|
+
xml: string,
|
|
350
|
+
pmid: string,
|
|
351
|
+
fullAbstract = false,
|
|
352
|
+
): Array<Record<string, unknown>> {
|
|
353
|
+
const record = mapPubMedArticleRecord(xml, pmid);
|
|
354
|
+
const abstract = stringField(record.abstract);
|
|
355
|
+
const shownAbstract =
|
|
356
|
+
fullAbstract || abstract.length <= 500
|
|
357
|
+
? abstract
|
|
358
|
+
: `${abstract.slice(0, 497)}...`;
|
|
251
359
|
return [
|
|
252
360
|
{ field: "PMID", value: pmid },
|
|
253
|
-
{ field: "
|
|
254
|
-
{ field: "
|
|
255
|
-
{ field: "
|
|
256
|
-
{ field: "
|
|
257
|
-
{
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
},
|
|
261
|
-
{
|
|
262
|
-
field: "Article Type",
|
|
263
|
-
value: elementTexts(document, "PublicationType")[0] ?? null,
|
|
264
|
-
},
|
|
265
|
-
{ field: "Language", value: childText(document, "Language") },
|
|
266
|
-
{ field: "DOI", value: doiValue || null },
|
|
361
|
+
{ field: "PMCID", value: record.pmc_id || null },
|
|
362
|
+
{ field: "Title", value: record.title },
|
|
363
|
+
{ field: "Authors", value: (record.authors as string[]).join(", ") },
|
|
364
|
+
{ field: "Journal", value: record.journal },
|
|
365
|
+
{ field: "Year", value: record.year ? String(record.year) : "" },
|
|
366
|
+
{ field: "Date", value: record.date },
|
|
367
|
+
{ field: "Article Type", value: record.article_type || null },
|
|
368
|
+
{ field: "Language", value: record.language },
|
|
369
|
+
{ field: "DOI", value: record.doi || null },
|
|
267
370
|
{ field: "Abstract", value: shownAbstract || null },
|
|
268
|
-
{ field: "URL", value:
|
|
371
|
+
{ field: "URL", value: record.source_url },
|
|
372
|
+
{ field: "PMC URL", value: record.pmc_url || null },
|
|
269
373
|
];
|
|
270
374
|
}
|
|
271
375
|
|
|
376
|
+
function directChildElements(root: Element, tagName: string): Element[] {
|
|
377
|
+
const out: Element[] = [];
|
|
378
|
+
for (let index = 0; index < root.childNodes.length; index += 1) {
|
|
379
|
+
const node = root.childNodes.item(index);
|
|
380
|
+
if (node?.nodeType === 1 && node.nodeName === tagName) {
|
|
381
|
+
out.push(node as Element);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
return out;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function directChildText(root: Element, tagName: string): string {
|
|
388
|
+
return cleanText(directChildElements(root, tagName)[0]?.textContent ?? "");
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function sectionText(section: Element): string {
|
|
392
|
+
const title = directChildText(section, "title");
|
|
393
|
+
const paragraphs = directChildElements(section, "p")
|
|
394
|
+
.map((paragraph) => cleanText(paragraph.textContent ?? ""))
|
|
395
|
+
.filter(Boolean);
|
|
396
|
+
const nested = directChildElements(section, "sec")
|
|
397
|
+
.map(sectionText)
|
|
398
|
+
.filter(Boolean);
|
|
399
|
+
return [title ? `## ${title}` : "", ...paragraphs, ...nested]
|
|
400
|
+
.filter(Boolean)
|
|
401
|
+
.join("\n\n");
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function truncateText(
|
|
405
|
+
text: string,
|
|
406
|
+
maxChars: number,
|
|
407
|
+
): {
|
|
408
|
+
text: string;
|
|
409
|
+
truncated: boolean;
|
|
410
|
+
} {
|
|
411
|
+
if (text.length <= maxChars) return { text, truncated: false };
|
|
412
|
+
return {
|
|
413
|
+
text: `${text.slice(0, maxChars).trimEnd()}\n\n[truncated at ${maxChars} characters]`,
|
|
414
|
+
truncated: true,
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
export function mapPmcFullTextRow(
|
|
419
|
+
xml: string,
|
|
420
|
+
ref: string,
|
|
421
|
+
maxChars = 40_000,
|
|
422
|
+
): Record<string, unknown> {
|
|
423
|
+
const document = new DOMParser().parseFromString(xml, "text/xml");
|
|
424
|
+
const title = childText(document, "article-title");
|
|
425
|
+
if (!title) {
|
|
426
|
+
throw new Error(`PMC full text ${ref} did not include an article title.`);
|
|
427
|
+
}
|
|
428
|
+
const pmcId = normalizePmcId(
|
|
429
|
+
articleIdText(document, "pub-id-type", "pmcid") || ref,
|
|
430
|
+
);
|
|
431
|
+
const pmid = articleIdText(document, "pub-id-type", "pmid");
|
|
432
|
+
const doiValue = articleIdText(document, "pub-id-type", "doi");
|
|
433
|
+
const abstract = cleanText(
|
|
434
|
+
firstElement(document, "abstract")?.textContent ?? "",
|
|
435
|
+
);
|
|
436
|
+
const body = firstElement(document, "body");
|
|
437
|
+
const bodyText = body
|
|
438
|
+
? directChildElements(body, "sec")
|
|
439
|
+
.map(sectionText)
|
|
440
|
+
.filter(Boolean)
|
|
441
|
+
.join("\n\n")
|
|
442
|
+
: "";
|
|
443
|
+
const text = [abstract ? `## Abstract\n\n${abstract}` : "", bodyText]
|
|
444
|
+
.filter(Boolean)
|
|
445
|
+
.join("\n\n");
|
|
446
|
+
if (!text) {
|
|
447
|
+
throw new Error(`PMC full text ${pmcId} did not include readable text.`);
|
|
448
|
+
}
|
|
449
|
+
const truncated = truncateText(text, maxChars);
|
|
450
|
+
return {
|
|
451
|
+
id: pmid || pmcId,
|
|
452
|
+
title,
|
|
453
|
+
pmid: pmid || undefined,
|
|
454
|
+
pmc_id: pmcId,
|
|
455
|
+
doi: doiValue || undefined,
|
|
456
|
+
source_adapter: "pubmed",
|
|
457
|
+
source_url: pmcUrl(pmcId),
|
|
458
|
+
text: truncated.text,
|
|
459
|
+
text_truncated: truncated.truncated,
|
|
460
|
+
text_source: "pmc_xml",
|
|
461
|
+
retrieved_at: new Date().toISOString(),
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
async function pmcIdFromPubMedRef(ref: string): Promise<string> {
|
|
466
|
+
if (/^(?:PMC)?\d+$/i.test(ref) && /^PMC/i.test(ref)) {
|
|
467
|
+
return normalizePmcId(ref);
|
|
468
|
+
}
|
|
469
|
+
const pmid = requirePmid(ref, "pmid");
|
|
470
|
+
const json = (await eutilsFetch(
|
|
471
|
+
"esearch",
|
|
472
|
+
{ term: `${pmid}[PMID]`, retmax: 1 },
|
|
473
|
+
"json",
|
|
474
|
+
"pmc",
|
|
475
|
+
)) as { esearchresult?: { idlist?: string[] } };
|
|
476
|
+
const numericPmc = json.esearchresult?.idlist?.[0];
|
|
477
|
+
if (!numericPmc) {
|
|
478
|
+
throw new Error(
|
|
479
|
+
`PubMed PMID ${pmid} has no PubMed Central full text record.`,
|
|
480
|
+
);
|
|
481
|
+
}
|
|
482
|
+
return normalizePmcId(numericPmc);
|
|
483
|
+
}
|
|
484
|
+
|
|
272
485
|
async function fetchSummaryRows(
|
|
273
486
|
pmids: string[],
|
|
274
487
|
label: string,
|
|
@@ -346,7 +559,7 @@ cli({
|
|
|
346
559
|
},
|
|
347
560
|
],
|
|
348
561
|
columns: ["field", "value"],
|
|
349
|
-
capabilities: ["http.fetch"
|
|
562
|
+
capabilities: ["http.fetch"],
|
|
350
563
|
func: async (_page, kwargs) => {
|
|
351
564
|
const pmid = requirePmid(kwargs.pmid);
|
|
352
565
|
const xml = String(
|
|
@@ -356,6 +569,92 @@ cli({
|
|
|
356
569
|
},
|
|
357
570
|
});
|
|
358
571
|
|
|
572
|
+
cli({
|
|
573
|
+
site: "pubmed",
|
|
574
|
+
name: "paper",
|
|
575
|
+
description: "Fetch normalized PubMed article metadata by PMID",
|
|
576
|
+
domain: "pubmed.ncbi.nlm.nih.gov",
|
|
577
|
+
strategy: Strategy.PUBLIC,
|
|
578
|
+
args: [
|
|
579
|
+
{
|
|
580
|
+
name: "pmid",
|
|
581
|
+
type: "str",
|
|
582
|
+
required: true,
|
|
583
|
+
positional: true,
|
|
584
|
+
description: "PubMed ID",
|
|
585
|
+
},
|
|
586
|
+
],
|
|
587
|
+
columns: [
|
|
588
|
+
"id",
|
|
589
|
+
"title",
|
|
590
|
+
"authors",
|
|
591
|
+
"year",
|
|
592
|
+
"journal",
|
|
593
|
+
"doi",
|
|
594
|
+
"pmc_id",
|
|
595
|
+
"source_url",
|
|
596
|
+
],
|
|
597
|
+
capabilities: ["http.fetch", "scholar.get"],
|
|
598
|
+
func: async (_page, kwargs) => {
|
|
599
|
+
const pmid = requirePmid(kwargs.pmid ?? kwargs.id ?? kwargs.ref);
|
|
600
|
+
const xml = String(
|
|
601
|
+
await eutilsFetch("efetch", { id: pmid, rettype: "abstract" }, "xml"),
|
|
602
|
+
);
|
|
603
|
+
return [mapPubMedArticleRecord(xml, pmid)];
|
|
604
|
+
},
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
cli({
|
|
608
|
+
site: "pubmed",
|
|
609
|
+
name: "read",
|
|
610
|
+
description: "Read PubMed Central full text for a PMID or PMCID",
|
|
611
|
+
domain: "eutils.ncbi.nlm.nih.gov",
|
|
612
|
+
strategy: Strategy.PUBLIC,
|
|
613
|
+
args: [
|
|
614
|
+
{
|
|
615
|
+
name: "ref",
|
|
616
|
+
type: "str",
|
|
617
|
+
required: true,
|
|
618
|
+
positional: true,
|
|
619
|
+
description: "PubMed PMID or PubMed Central PMCID",
|
|
620
|
+
},
|
|
621
|
+
{
|
|
622
|
+
name: "max-chars",
|
|
623
|
+
type: "int",
|
|
624
|
+
default: 40000,
|
|
625
|
+
description: "Maximum extracted text characters",
|
|
626
|
+
},
|
|
627
|
+
],
|
|
628
|
+
columns: [
|
|
629
|
+
"id",
|
|
630
|
+
"title",
|
|
631
|
+
"pmid",
|
|
632
|
+
"pmc_id",
|
|
633
|
+
"doi",
|
|
634
|
+
"source_url",
|
|
635
|
+
"text",
|
|
636
|
+
"text_truncated",
|
|
637
|
+
],
|
|
638
|
+
capabilities: ["http.fetch", "scholar.fulltext"],
|
|
639
|
+
func: async (_page, kwargs) => {
|
|
640
|
+
const ref = requirePubMedText(
|
|
641
|
+
kwargs.ref ?? kwargs.id ?? kwargs.pmid,
|
|
642
|
+
"ref",
|
|
643
|
+
);
|
|
644
|
+
const maxChars = requirePubMedMaxChars(kwargs["max-chars"]);
|
|
645
|
+
const pmcId = await pmcIdFromPubMedRef(ref);
|
|
646
|
+
const xml = String(
|
|
647
|
+
await eutilsFetch(
|
|
648
|
+
"efetch",
|
|
649
|
+
{ id: pmcId.replace(/^PMC/i, "") },
|
|
650
|
+
"xml",
|
|
651
|
+
"pmc",
|
|
652
|
+
),
|
|
653
|
+
);
|
|
654
|
+
return [mapPmcFullTextRow(xml, pmcId, maxChars)];
|
|
655
|
+
},
|
|
656
|
+
});
|
|
657
|
+
|
|
359
658
|
cli({
|
|
360
659
|
site: "pubmed",
|
|
361
660
|
name: "author",
|