@yangfei_93sky/biocli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/dist/batch.d.ts +20 -0
- package/dist/batch.js +69 -0
- package/dist/build-manifest.d.ts +38 -0
- package/dist/build-manifest.js +186 -0
- package/dist/cache.d.ts +28 -0
- package/dist/cache.js +126 -0
- package/dist/cli-manifest.json +1500 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.js +336 -0
- package/dist/clis/_shared/common.d.ts +8 -0
- package/dist/clis/_shared/common.js +13 -0
- package/dist/clis/_shared/eutils.d.ts +9 -0
- package/dist/clis/_shared/eutils.js +9 -0
- package/dist/clis/_shared/organism-db.d.ts +23 -0
- package/dist/clis/_shared/organism-db.js +58 -0
- package/dist/clis/_shared/xml-helpers.d.ts +58 -0
- package/dist/clis/_shared/xml-helpers.js +266 -0
- package/dist/clis/aggregate/enrichment.d.ts +7 -0
- package/dist/clis/aggregate/enrichment.js +105 -0
- package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
- package/dist/clis/aggregate/gene-dossier.js +248 -0
- package/dist/clis/aggregate/gene-profile.d.ts +16 -0
- package/dist/clis/aggregate/gene-profile.js +305 -0
- package/dist/clis/aggregate/literature-brief.d.ts +7 -0
- package/dist/clis/aggregate/literature-brief.js +79 -0
- package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
- package/dist/clis/aggregate/variant-dossier.js +161 -0
- package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
- package/dist/clis/aggregate/variant-interpret.js +210 -0
- package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
- package/dist/clis/aggregate/workflow-prepare.js +228 -0
- package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
- package/dist/clis/aggregate/workflow-scout.js +175 -0
- package/dist/clis/clinvar/search.d.ts +8 -0
- package/dist/clis/clinvar/search.js +61 -0
- package/dist/clis/clinvar/variant.d.ts +7 -0
- package/dist/clis/clinvar/variant.js +53 -0
- package/dist/clis/enrichr/analyze.d.ts +7 -0
- package/dist/clis/enrichr/analyze.js +48 -0
- package/dist/clis/ensembl/lookup.d.ts +6 -0
- package/dist/clis/ensembl/lookup.js +38 -0
- package/dist/clis/ensembl/vep.d.ts +7 -0
- package/dist/clis/ensembl/vep.js +86 -0
- package/dist/clis/ensembl/xrefs.d.ts +6 -0
- package/dist/clis/ensembl/xrefs.js +36 -0
- package/dist/clis/gene/fetch.d.ts +10 -0
- package/dist/clis/gene/fetch.js +96 -0
- package/dist/clis/gene/info.d.ts +7 -0
- package/dist/clis/gene/info.js +37 -0
- package/dist/clis/gene/search.d.ts +7 -0
- package/dist/clis/gene/search.js +71 -0
- package/dist/clis/geo/dataset.d.ts +7 -0
- package/dist/clis/geo/dataset.js +55 -0
- package/dist/clis/geo/download.d.ts +17 -0
- package/dist/clis/geo/download.js +115 -0
- package/dist/clis/geo/samples.d.ts +7 -0
- package/dist/clis/geo/samples.js +57 -0
- package/dist/clis/geo/search.d.ts +8 -0
- package/dist/clis/geo/search.js +66 -0
- package/dist/clis/kegg/convert.d.ts +7 -0
- package/dist/clis/kegg/convert.js +37 -0
- package/dist/clis/kegg/disease.d.ts +6 -0
- package/dist/clis/kegg/disease.js +57 -0
- package/dist/clis/kegg/link.d.ts +7 -0
- package/dist/clis/kegg/link.js +36 -0
- package/dist/clis/kegg/pathway.d.ts +6 -0
- package/dist/clis/kegg/pathway.js +37 -0
- package/dist/clis/pubmed/abstract.d.ts +7 -0
- package/dist/clis/pubmed/abstract.js +42 -0
- package/dist/clis/pubmed/cited-by.d.ts +7 -0
- package/dist/clis/pubmed/cited-by.js +77 -0
- package/dist/clis/pubmed/fetch.d.ts +6 -0
- package/dist/clis/pubmed/fetch.js +36 -0
- package/dist/clis/pubmed/info.yaml +22 -0
- package/dist/clis/pubmed/related.d.ts +7 -0
- package/dist/clis/pubmed/related.js +81 -0
- package/dist/clis/pubmed/search.d.ts +8 -0
- package/dist/clis/pubmed/search.js +63 -0
- package/dist/clis/snp/lookup.d.ts +7 -0
- package/dist/clis/snp/lookup.js +57 -0
- package/dist/clis/sra/download.d.ts +18 -0
- package/dist/clis/sra/download.js +217 -0
- package/dist/clis/sra/run.d.ts +8 -0
- package/dist/clis/sra/run.js +77 -0
- package/dist/clis/sra/search.d.ts +8 -0
- package/dist/clis/sra/search.js +83 -0
- package/dist/clis/string/enrichment.d.ts +7 -0
- package/dist/clis/string/enrichment.js +50 -0
- package/dist/clis/string/network.d.ts +7 -0
- package/dist/clis/string/network.js +47 -0
- package/dist/clis/string/partners.d.ts +4 -0
- package/dist/clis/string/partners.js +44 -0
- package/dist/clis/taxonomy/lookup.d.ts +8 -0
- package/dist/clis/taxonomy/lookup.js +54 -0
- package/dist/clis/uniprot/fetch.d.ts +7 -0
- package/dist/clis/uniprot/fetch.js +82 -0
- package/dist/clis/uniprot/search.d.ts +6 -0
- package/dist/clis/uniprot/search.js +65 -0
- package/dist/clis/uniprot/sequence.d.ts +7 -0
- package/dist/clis/uniprot/sequence.js +51 -0
- package/dist/commander-adapter.d.ts +27 -0
- package/dist/commander-adapter.js +286 -0
- package/dist/completion.d.ts +19 -0
- package/dist/completion.js +117 -0
- package/dist/config.d.ts +57 -0
- package/dist/config.js +94 -0
- package/dist/databases/enrichr.d.ts +28 -0
- package/dist/databases/enrichr.js +131 -0
- package/dist/databases/ensembl.d.ts +14 -0
- package/dist/databases/ensembl.js +106 -0
- package/dist/databases/index.d.ts +45 -0
- package/dist/databases/index.js +49 -0
- package/dist/databases/kegg.d.ts +26 -0
- package/dist/databases/kegg.js +136 -0
- package/dist/databases/ncbi.d.ts +28 -0
- package/dist/databases/ncbi.js +144 -0
- package/dist/databases/string-db.d.ts +19 -0
- package/dist/databases/string-db.js +105 -0
- package/dist/databases/uniprot.d.ts +13 -0
- package/dist/databases/uniprot.js +110 -0
- package/dist/discovery.d.ts +32 -0
- package/dist/discovery.js +235 -0
- package/dist/doctor.d.ts +19 -0
- package/dist/doctor.js +151 -0
- package/dist/errors.d.ts +68 -0
- package/dist/errors.js +105 -0
- package/dist/execution.d.ts +15 -0
- package/dist/execution.js +178 -0
- package/dist/hooks.d.ts +48 -0
- package/dist/hooks.js +58 -0
- package/dist/main.d.ts +13 -0
- package/dist/main.js +31 -0
- package/dist/ncbi-fetch.d.ts +10 -0
- package/dist/ncbi-fetch.js +10 -0
- package/dist/output.d.ts +18 -0
- package/dist/output.js +394 -0
- package/dist/pipeline/executor.d.ts +22 -0
- package/dist/pipeline/executor.js +40 -0
- package/dist/pipeline/index.d.ts +6 -0
- package/dist/pipeline/index.js +6 -0
- package/dist/pipeline/registry.d.ts +16 -0
- package/dist/pipeline/registry.js +31 -0
- package/dist/pipeline/steps/fetch.d.ts +21 -0
- package/dist/pipeline/steps/fetch.js +160 -0
- package/dist/pipeline/steps/transform.d.ts +26 -0
- package/dist/pipeline/steps/transform.js +92 -0
- package/dist/pipeline/steps/xml-parse.d.ts +12 -0
- package/dist/pipeline/steps/xml-parse.js +27 -0
- package/dist/pipeline/template.d.ts +35 -0
- package/dist/pipeline/template.js +312 -0
- package/dist/rate-limiter.d.ts +56 -0
- package/dist/rate-limiter.js +120 -0
- package/dist/registry-api.d.ts +15 -0
- package/dist/registry-api.js +13 -0
- package/dist/registry.d.ts +90 -0
- package/dist/registry.js +100 -0
- package/dist/schema.d.ts +80 -0
- package/dist/schema.js +72 -0
- package/dist/spinner.d.ts +19 -0
- package/dist/spinner.js +37 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +27 -0
- package/dist/utils.d.ts +16 -0
- package/dist/utils.js +40 -0
- package/dist/validate.d.ts +29 -0
- package/dist/validate.js +136 -0
- package/dist/verify.d.ts +20 -0
- package/dist/verify.js +131 -0
- package/dist/version.d.ts +13 -0
- package/dist/version.js +36 -0
- package/dist/xml-parser.d.ts +19 -0
- package/dist/xml-parser.js +119 -0
- package/dist/yaml-schema.d.ts +40 -0
- package/dist/yaml-schema.js +62 -0
- package/package.json +68 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PubMed & Gene XML parsing helpers.
|
|
3
|
+
*
|
|
4
|
+
* After fast-xml-parser processes NCBI XML (see xml-parser.ts), the
|
|
5
|
+
* result is a deeply nested JS object. These helpers navigate that
|
|
6
|
+
* structure and return flat, typed records suitable for CLI table output.
|
|
7
|
+
*
|
|
8
|
+
* NOTE: The xml-parser config uses:
|
|
9
|
+
* - '@_' prefix for attributes (e.g. @_EIdType)
|
|
10
|
+
* - '#text' for text nodes
|
|
11
|
+
* - Tags listed in ARRAY_TAGS are always arrays (Author, PubmedArticle, etc.)
|
|
12
|
+
*/
|
|
13
|
+
import { isRecord } from '../../utils.js';
|
|
14
|
+
import { truncate } from './common.js';
|
|
15
|
+
/**
|
|
16
|
+
* Safely drill into a nested object by a dot-separated path.
|
|
17
|
+
* Returns `undefined` if any intermediate key is missing.
|
|
18
|
+
*/
|
|
19
|
+
function dig(obj, ...keys) {
|
|
20
|
+
let cur = obj;
|
|
21
|
+
for (const k of keys) {
|
|
22
|
+
if (!isRecord(cur))
|
|
23
|
+
return undefined;
|
|
24
|
+
cur = cur[k];
|
|
25
|
+
}
|
|
26
|
+
return cur;
|
|
27
|
+
}
|
|
28
|
+
/** Coerce a value to a string, returning '' for nullish values. */
|
|
29
|
+
function str(v) {
|
|
30
|
+
if (v === undefined || v === null)
|
|
31
|
+
return '';
|
|
32
|
+
if (typeof v === 'string')
|
|
33
|
+
return v;
|
|
34
|
+
if (typeof v === 'number')
|
|
35
|
+
return String(v);
|
|
36
|
+
// fast-xml-parser may produce { '#text': 'value' } for text-only nodes
|
|
37
|
+
if (isRecord(v) && '#text' in v)
|
|
38
|
+
return String(v['#text']);
|
|
39
|
+
return String(v);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Format author list from parsed Author array.
|
|
43
|
+
*
|
|
44
|
+
* Each Author element is typically:
|
|
45
|
+
* { LastName: 'Smith', ForeName: 'John', Initials: 'J' }
|
|
46
|
+
* or sometimes:
|
|
47
|
+
* { CollectiveName: 'COVID-19 Genomics UK Consortium' }
|
|
48
|
+
*
|
|
49
|
+
* Returns first 3 authors as "LastName FN, ..." plus "et al." if more.
|
|
50
|
+
*/
|
|
51
|
+
function formatAuthors(authorList) {
|
|
52
|
+
if (!Array.isArray(authorList))
|
|
53
|
+
return '';
|
|
54
|
+
const names = [];
|
|
55
|
+
for (const author of authorList) {
|
|
56
|
+
if (!isRecord(author))
|
|
57
|
+
continue;
|
|
58
|
+
if (author.CollectiveName) {
|
|
59
|
+
names.push(str(author.CollectiveName));
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
const last = str(author.LastName);
|
|
63
|
+
const fore = str(author.ForeName);
|
|
64
|
+
if (last) {
|
|
65
|
+
names.push(fore ? `${last} ${fore.charAt(0)}` : last);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (names.length === 0)
|
|
70
|
+
return '';
|
|
71
|
+
if (names.length <= 3)
|
|
72
|
+
return names.join(', ');
|
|
73
|
+
return names.slice(0, 3).join(', ') + ' et al.';
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Extract DOI from Article's ELocationID list.
|
|
77
|
+
*
|
|
78
|
+
* ELocationID can be a single object or array (though xml-parser doesn't
|
|
79
|
+
* force it into an array since it's not in ARRAY_TAGS). We check for
|
|
80
|
+
* @_EIdType === 'doi'.
|
|
81
|
+
*/
|
|
82
|
+
function extractDoi(article) {
|
|
83
|
+
const eloc = article.ELocationID;
|
|
84
|
+
if (!eloc)
|
|
85
|
+
return '';
|
|
86
|
+
const candidates = Array.isArray(eloc) ? eloc : [eloc];
|
|
87
|
+
for (const entry of candidates) {
|
|
88
|
+
if (isRecord(entry) && entry['@_EIdType'] === 'doi') {
|
|
89
|
+
return str(entry['#text'] ?? entry);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return '';
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Extract publication year from a Journal > JournalIssue > PubDate node.
|
|
96
|
+
*/
|
|
97
|
+
function extractYear(article) {
|
|
98
|
+
// Try Journal > JournalIssue > PubDate > Year
|
|
99
|
+
const journalYear = str(dig(article, 'Journal', 'JournalIssue', 'PubDate', 'Year'));
|
|
100
|
+
if (journalYear)
|
|
101
|
+
return journalYear;
|
|
102
|
+
// Fallback: Journal > JournalIssue > PubDate > MedlineDate (e.g. "2024 Jan-Feb")
|
|
103
|
+
const medlineDate = str(dig(article, 'Journal', 'JournalIssue', 'PubDate', 'MedlineDate'));
|
|
104
|
+
if (medlineDate) {
|
|
105
|
+
const yearMatch = medlineDate.match(/\d{4}/);
|
|
106
|
+
if (yearMatch)
|
|
107
|
+
return yearMatch[0];
|
|
108
|
+
}
|
|
109
|
+
return '';
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Extract abstract text.
|
|
113
|
+
*
|
|
114
|
+
* AbstractText is always an array (from ARRAY_TAGS). Each element may be
|
|
115
|
+
* a plain string or an object with @_Label and #text (structured abstracts).
|
|
116
|
+
*/
|
|
117
|
+
function extractAbstract(article) {
|
|
118
|
+
const abstractNode = article.Abstract;
|
|
119
|
+
if (!isRecord(abstractNode))
|
|
120
|
+
return '';
|
|
121
|
+
const textList = abstractNode.AbstractText;
|
|
122
|
+
if (!Array.isArray(textList))
|
|
123
|
+
return str(textList);
|
|
124
|
+
// Structured abstract: multiple labeled sections
|
|
125
|
+
const parts = [];
|
|
126
|
+
for (const part of textList) {
|
|
127
|
+
if (isRecord(part)) {
|
|
128
|
+
const label = str(part['@_Label']);
|
|
129
|
+
const text = str(part['#text'] ?? part);
|
|
130
|
+
parts.push(label ? `${label}: ${text}` : text);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
parts.push(str(part));
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return parts.join(' ');
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Parse a PubMed efetch XML response (after fast-xml-parser processing)
|
|
140
|
+
* into an array of PubmedArticle records.
|
|
141
|
+
*/
|
|
142
|
+
export function parsePubmedArticles(parsed) {
|
|
143
|
+
if (!isRecord(parsed))
|
|
144
|
+
return [];
|
|
145
|
+
// Top-level key is PubmedArticleSet
|
|
146
|
+
const articleSet = parsed.PubmedArticleSet;
|
|
147
|
+
if (!isRecord(articleSet))
|
|
148
|
+
return [];
|
|
149
|
+
// PubmedArticle is always an array (from ARRAY_TAGS)
|
|
150
|
+
const articles = articleSet.PubmedArticle;
|
|
151
|
+
if (!Array.isArray(articles))
|
|
152
|
+
return [];
|
|
153
|
+
const results = [];
|
|
154
|
+
for (const pa of articles) {
|
|
155
|
+
if (!isRecord(pa))
|
|
156
|
+
continue;
|
|
157
|
+
const citation = pa.MedlineCitation;
|
|
158
|
+
if (!isRecord(citation))
|
|
159
|
+
continue;
|
|
160
|
+
const pmid = str(isRecord(citation.PMID)
|
|
161
|
+
? citation.PMID['#text']
|
|
162
|
+
: citation.PMID);
|
|
163
|
+
const article = citation.Article;
|
|
164
|
+
if (!isRecord(article))
|
|
165
|
+
continue;
|
|
166
|
+
const articleRec = article;
|
|
167
|
+
// Title may be a string or { '#text': '...' } with inline markup
|
|
168
|
+
const title = str(articleRec.ArticleTitle).replace(/<[^>]+>/g, '');
|
|
169
|
+
// Authors
|
|
170
|
+
const authorListNode = articleRec.AuthorList;
|
|
171
|
+
const authorArray = isRecord(authorListNode)
|
|
172
|
+
? authorListNode.Author
|
|
173
|
+
: undefined;
|
|
174
|
+
const authors = formatAuthors(authorArray);
|
|
175
|
+
// Journal title
|
|
176
|
+
const journal = str(dig(articleRec, 'Journal', 'Title'));
|
|
177
|
+
// Year
|
|
178
|
+
const year = extractYear(articleRec);
|
|
179
|
+
// DOI
|
|
180
|
+
const doi = extractDoi(articleRec);
|
|
181
|
+
// Abstract
|
|
182
|
+
const abstract = extractAbstract(articleRec);
|
|
183
|
+
results.push({ pmid, title, authors, journal, year, doi, abstract });
|
|
184
|
+
}
|
|
185
|
+
return results;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Parse Gene esummary JSON response into GeneInfo records.
|
|
189
|
+
*
|
|
190
|
+
* The esummary JSON for the gene database has the structure:
|
|
191
|
+
* {
|
|
192
|
+
* result: {
|
|
193
|
+
* uids: ["7157", ...],
|
|
194
|
+
* "7157": { uid: "7157", name: "TP53", description: "...", ... }
|
|
195
|
+
* }
|
|
196
|
+
* }
|
|
197
|
+
*/
|
|
198
|
+
export function parseGeneSummaries(parsed) {
|
|
199
|
+
if (!isRecord(parsed))
|
|
200
|
+
return [];
|
|
201
|
+
const resultObj = parsed.result;
|
|
202
|
+
if (!isRecord(resultObj))
|
|
203
|
+
return [];
|
|
204
|
+
const uids = resultObj.uids;
|
|
205
|
+
if (!Array.isArray(uids))
|
|
206
|
+
return [];
|
|
207
|
+
const results = [];
|
|
208
|
+
for (const uid of uids) {
|
|
209
|
+
const entry = resultObj[String(uid)];
|
|
210
|
+
if (!isRecord(entry))
|
|
211
|
+
continue;
|
|
212
|
+
const rec = entry;
|
|
213
|
+
results.push({
|
|
214
|
+
geneId: str(rec.uid),
|
|
215
|
+
symbol: str(rec.name),
|
|
216
|
+
name: str(rec.description),
|
|
217
|
+
organism: str(dig(rec, 'organism', 'scientificname') ?? rec.orgname),
|
|
218
|
+
summary: truncate(str(rec.summary), 300),
|
|
219
|
+
chromosome: str(rec.chromosome),
|
|
220
|
+
location: str(rec.maplocation),
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
return results;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Parse Gene efetch XML response (Entrezgene-Set) into GeneInfo records.
|
|
227
|
+
*
|
|
228
|
+
* Gene efetch XML has the structure:
|
|
229
|
+
* Entrezgene-Set > Entrezgene[] > Entrezgene_track-info > Gene-track > Gene-track_geneid
|
|
230
|
+
* etc.
|
|
231
|
+
*
|
|
232
|
+
* This is considerably more complex than esummary, so we prefer esummary
|
|
233
|
+
* for most gene commands. This parser is provided for completeness.
|
|
234
|
+
*/
|
|
235
|
+
export function parseGeneEntries(parsed) {
|
|
236
|
+
if (!isRecord(parsed))
|
|
237
|
+
return [];
|
|
238
|
+
const entrezSet = parsed['Entrezgene-Set'];
|
|
239
|
+
if (!isRecord(entrezSet))
|
|
240
|
+
return [];
|
|
241
|
+
const genes = entrezSet.Entrezgene;
|
|
242
|
+
if (!Array.isArray(genes))
|
|
243
|
+
return [];
|
|
244
|
+
const results = [];
|
|
245
|
+
for (const gene of genes) {
|
|
246
|
+
if (!isRecord(gene))
|
|
247
|
+
continue;
|
|
248
|
+
const g = gene;
|
|
249
|
+
// Gene ID
|
|
250
|
+
const geneId = str(dig(g, 'Entrezgene_track-info', 'Gene-track', 'Gene-track_geneid'));
|
|
251
|
+
// Symbol and name from Entrezgene_gene > Gene-ref
|
|
252
|
+
const geneRef = dig(g, 'Entrezgene_gene', 'Gene-ref');
|
|
253
|
+
const symbol = isRecord(geneRef) ? str(geneRef['Gene-ref_locus']) : '';
|
|
254
|
+
const name = isRecord(geneRef) ? str(geneRef['Gene-ref_desc']) : '';
|
|
255
|
+
// Organism from Entrezgene_source > BioSource > BioSource_org > Org-ref > Org-ref_taxname
|
|
256
|
+
const organism = str(dig(g, 'Entrezgene_source', 'BioSource', 'BioSource_org', 'Org-ref', 'Org-ref_taxname'));
|
|
257
|
+
// Summary
|
|
258
|
+
const summary = truncate(str(g['Entrezgene_summary']), 300);
|
|
259
|
+
// Chromosome & location from Entrezgene_gene > Gene-ref
|
|
260
|
+
const chromosome = isRecord(geneRef) ? str(geneRef['Gene-ref_maploc']) : '';
|
|
261
|
+
// Map location (more specific)
|
|
262
|
+
const location = str(dig(g, 'Entrezgene_location'));
|
|
263
|
+
results.push({ geneId, symbol, name, organism, summary, chromosome, location });
|
|
264
|
+
}
|
|
265
|
+
return results;
|
|
266
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/enrichment — Combined enrichment analysis from Enrichr + STRING.
|
|
3
|
+
*
|
|
4
|
+
* Queries both Enrichr and STRING functional enrichment in parallel,
|
|
5
|
+
* merges and deduplicates results into a unified enrichment report.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { submitGeneList, getEnrichment } from '../../databases/enrichr.js';
|
|
10
|
+
import { buildStringUrl, encodeStringIds } from '../../databases/string-db.js';
|
|
11
|
+
import { createHttpContextForDatabase } from '../../databases/index.js';
|
|
12
|
+
import { wrapResult } from '../../types.js';
|
|
13
|
+
cli({
|
|
14
|
+
site: 'aggregate',
|
|
15
|
+
name: 'enrichment',
|
|
16
|
+
description: 'Combined pathway enrichment from Enrichr + STRING',
|
|
17
|
+
database: 'aggregate',
|
|
18
|
+
strategy: Strategy.PUBLIC,
|
|
19
|
+
defaultFormat: 'json',
|
|
20
|
+
timeoutSeconds: 60,
|
|
21
|
+
args: [
|
|
22
|
+
{ name: 'genes', positional: true, required: true, help: 'Comma-separated gene symbols (e.g. TP53,BRCA1,EGFR,MYC,CDK2)' },
|
|
23
|
+
{ name: 'library', default: 'KEGG_2021_Human', help: 'Enrichr library (e.g. GO_Biological_Process_2023, Reactome_2022)' },
|
|
24
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Max results per source (1-50)' },
|
|
25
|
+
{ name: 'species', type: 'int', default: 9606, help: 'NCBI taxonomy ID for STRING (default: 9606)' },
|
|
26
|
+
],
|
|
27
|
+
columns: ['term', 'category', 'source', 'pValue', 'genes'],
|
|
28
|
+
func: async (_ctx, args) => {
|
|
29
|
+
const geneList = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
|
|
30
|
+
if (geneList.length < 2) {
|
|
31
|
+
throw new CliError('ARGUMENT', 'At least 2 genes required', 'Example: biocli aggregate enrichment TP53,BRCA1,EGFR,MYC,CDK2');
|
|
32
|
+
}
|
|
33
|
+
const library = String(args.library);
|
|
34
|
+
const limit = Math.max(1, Math.min(Number(args.limit), 50));
|
|
35
|
+
const species = String(args.species);
|
|
36
|
+
const errors = [];
|
|
37
|
+
// Run both in parallel
|
|
38
|
+
const [enrichrResult, stringResult] = await Promise.allSettled([
|
|
39
|
+
// Enrichr: 2-step workflow
|
|
40
|
+
(async () => {
|
|
41
|
+
const userListId = await submitGeneList(geneList);
|
|
42
|
+
const results = await getEnrichment(userListId, library);
|
|
43
|
+
return results.slice(0, limit).map(r => ({
|
|
44
|
+
term: String(r.term),
|
|
45
|
+
category: library,
|
|
46
|
+
source: 'Enrichr',
|
|
47
|
+
pValue: Number(r.adjustedPValue).toExponential(2),
|
|
48
|
+
genes: String(r.genes),
|
|
49
|
+
}));
|
|
50
|
+
})(),
|
|
51
|
+
// STRING functional enrichment
|
|
52
|
+
(async () => {
|
|
53
|
+
const stringCtx = createHttpContextForDatabase('string');
|
|
54
|
+
const data = await stringCtx.fetchJson(buildStringUrl('enrichment', {
|
|
55
|
+
identifiers: encodeStringIds(geneList),
|
|
56
|
+
species,
|
|
57
|
+
}));
|
|
58
|
+
if (!Array.isArray(data))
|
|
59
|
+
return [];
|
|
60
|
+
return data
|
|
61
|
+
.filter(item => {
|
|
62
|
+
// Only keep KEGG/GO/Reactome categories
|
|
63
|
+
const cat = String(item.category ?? '');
|
|
64
|
+
return ['Process', 'Function', 'Component', 'KEGG', 'Reactome'].some(c => cat.includes(c));
|
|
65
|
+
})
|
|
66
|
+
.slice(0, limit)
|
|
67
|
+
.map(item => {
|
|
68
|
+
const inputGenes = item.inputGenes;
|
|
69
|
+
const geneStr = Array.isArray(inputGenes) ? inputGenes.join(',') : String(inputGenes ?? '');
|
|
70
|
+
return {
|
|
71
|
+
term: String(item.description ?? item.term ?? ''),
|
|
72
|
+
category: String(item.category ?? ''),
|
|
73
|
+
source: 'STRING',
|
|
74
|
+
pValue: Number(item.fdr ?? 1).toExponential(2),
|
|
75
|
+
genes: geneStr,
|
|
76
|
+
};
|
|
77
|
+
});
|
|
78
|
+
})(),
|
|
79
|
+
]);
|
|
80
|
+
const rows = [];
|
|
81
|
+
if (enrichrResult.status === 'fulfilled') {
|
|
82
|
+
rows.push(...enrichrResult.value);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
errors.push(`Enrichr: ${enrichrResult.reason}`);
|
|
86
|
+
}
|
|
87
|
+
if (stringResult.status === 'fulfilled') {
|
|
88
|
+
rows.push(...stringResult.value);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
errors.push(`STRING: ${stringResult.reason}`);
|
|
92
|
+
}
|
|
93
|
+
if (!rows.length) {
|
|
94
|
+
throw new CliError('NOT_FOUND', 'No enrichment results from any source', errors.length ? `Errors: ${errors.join('; ')}` : 'Try adding more genes');
|
|
95
|
+
}
|
|
96
|
+
// Sort by p-value
|
|
97
|
+
rows.sort((a, b) => parseFloat(a.pValue) - parseFloat(b.pValue));
|
|
98
|
+
const activeSources = [...new Set(rows.map(r => r.source))];
|
|
99
|
+
return wrapResult(rows, {
|
|
100
|
+
sources: activeSources,
|
|
101
|
+
warnings: errors,
|
|
102
|
+
query: geneList.join(','),
|
|
103
|
+
});
|
|
104
|
+
},
|
|
105
|
+
});
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/gene-dossier — Comprehensive gene intelligence report.
|
|
3
|
+
*
|
|
4
|
+
* Builds on gene-profile and adds:
|
|
5
|
+
* - Recent PubMed literature (top papers)
|
|
6
|
+
* - ClinVar clinical significance
|
|
7
|
+
* - Summary assessment for agent consumption
|
|
8
|
+
*
|
|
9
|
+
* This is the highest-level gene command — a complete "dossier" that
|
|
10
|
+
* an AI agent can use to understand a gene's biological role, clinical
|
|
11
|
+
* relevance, and research landscape in one call.
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/gene-dossier — Comprehensive gene intelligence report.
|
|
3
|
+
*
|
|
4
|
+
* Builds on gene-profile and adds:
|
|
5
|
+
* - Recent PubMed literature (top papers)
|
|
6
|
+
* - ClinVar clinical significance
|
|
7
|
+
* - Summary assessment for agent consumption
|
|
8
|
+
*
|
|
9
|
+
* This is the highest-level gene command — a complete "dossier" that
|
|
10
|
+
* an AI agent can use to understand a gene's biological role, clinical
|
|
11
|
+
* relevance, and research landscape in one call.
|
|
12
|
+
*/
|
|
13
|
+
import { cli, Strategy } from '../../registry.js';
|
|
14
|
+
import { CliError } from '../../errors.js';
|
|
15
|
+
import { wrapResult } from '../../types.js';
|
|
16
|
+
import { createHttpContextForDatabase } from '../../databases/index.js';
|
|
17
|
+
import { buildEutilsUrl } from '../../databases/ncbi.js';
|
|
18
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
19
|
+
import { buildUniprotUrl } from '../../databases/uniprot.js';
|
|
20
|
+
import { buildKeggUrl, parseKeggTsv } from '../../databases/kegg.js';
|
|
21
|
+
import { buildStringUrl } from '../../databases/string-db.js';
|
|
22
|
+
import { parseGeneSummaries } from '../_shared/xml-helpers.js';
|
|
23
|
+
import { resolveOrganism } from '../_shared/organism-db.js';
|
|
24
|
+
// Reuse the gene-profile building blocks but add literature + clinical layers
|
|
25
|
+
async function fetchRecentLiterature(ctx, symbol, limit) {
|
|
26
|
+
// Search PubMed for recent papers about this gene
|
|
27
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
28
|
+
db: 'pubmed',
|
|
29
|
+
term: `${symbol} AND "last 5 years"[PDat]`,
|
|
30
|
+
retmax: String(limit),
|
|
31
|
+
sort: 'relevance',
|
|
32
|
+
retmode: 'json',
|
|
33
|
+
}));
|
|
34
|
+
const esearch = searchResult?.esearchresult;
|
|
35
|
+
const pmids = esearch?.idlist ?? [];
|
|
36
|
+
if (!pmids.length)
|
|
37
|
+
return [];
|
|
38
|
+
const xmlData = await ctx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
39
|
+
db: 'pubmed',
|
|
40
|
+
id: pmids.join(','),
|
|
41
|
+
rettype: 'xml',
|
|
42
|
+
}));
|
|
43
|
+
const articles = parsePubmedArticles(xmlData);
|
|
44
|
+
return articles.map(a => ({
|
|
45
|
+
pmid: a.pmid,
|
|
46
|
+
title: a.title,
|
|
47
|
+
authors: a.authors,
|
|
48
|
+
journal: a.journal,
|
|
49
|
+
year: a.year,
|
|
50
|
+
doi: a.doi,
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
async function fetchClinvarSignificance(ctx, symbol) {
|
|
54
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
55
|
+
db: 'clinvar',
|
|
56
|
+
term: `${symbol}[Gene Name]`,
|
|
57
|
+
retmax: '10',
|
|
58
|
+
retmode: 'json',
|
|
59
|
+
}));
|
|
60
|
+
const ids = searchResult?.esearchresult?.idlist ?? [];
|
|
61
|
+
if (!ids.length)
|
|
62
|
+
return [];
|
|
63
|
+
const summary = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
64
|
+
db: 'clinvar',
|
|
65
|
+
id: ids.join(','),
|
|
66
|
+
retmode: 'json',
|
|
67
|
+
}));
|
|
68
|
+
const resultObj = summary?.result;
|
|
69
|
+
const uids = resultObj?.uids ?? [];
|
|
70
|
+
return uids.map(uid => {
|
|
71
|
+
const item = (resultObj?.[uid] ?? {});
|
|
72
|
+
const sig = typeof item.clinical_significance === 'object'
|
|
73
|
+
? item.clinical_significance?.description ?? ''
|
|
74
|
+
: String(item.clinical_significance ?? '');
|
|
75
|
+
const traits = Array.isArray(item.trait_set)
|
|
76
|
+
? item.trait_set.map(t => String(t.trait_name ?? '')).join('; ')
|
|
77
|
+
: '';
|
|
78
|
+
return {
|
|
79
|
+
title: String(item.title ?? ''),
|
|
80
|
+
significance: String(sig),
|
|
81
|
+
condition: traits,
|
|
82
|
+
accession: String(item.accession ?? ''),
|
|
83
|
+
};
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
cli({
|
|
87
|
+
site: 'aggregate',
|
|
88
|
+
name: 'gene-dossier',
|
|
89
|
+
description: 'Complete gene intelligence report (profile + literature + clinical)',
|
|
90
|
+
database: 'aggregate',
|
|
91
|
+
strategy: Strategy.PUBLIC,
|
|
92
|
+
defaultFormat: 'json',
|
|
93
|
+
timeoutSeconds: 90,
|
|
94
|
+
args: [
|
|
95
|
+
{ name: 'gene', positional: true, required: true, help: 'Gene symbol (e.g. TP53)' },
|
|
96
|
+
{ name: 'organism', default: 'human', help: 'Organism (e.g. human, mouse)' },
|
|
97
|
+
{ name: 'papers', type: 'int', default: 5, help: 'Number of recent papers to include' },
|
|
98
|
+
],
|
|
99
|
+
columns: ['symbol', 'name', 'pathways', 'interactions', 'literature', 'clinvar'],
|
|
100
|
+
func: async (_ctx, args) => {
|
|
101
|
+
const symbol = String(args.gene).trim();
|
|
102
|
+
if (!symbol)
|
|
103
|
+
throw new CliError('ARGUMENT', 'Gene symbol is required');
|
|
104
|
+
const org = resolveOrganism(String(args.organism));
|
|
105
|
+
const paperCount = Math.max(1, Math.min(Number(args.papers), 20));
|
|
106
|
+
const sources = [];
|
|
107
|
+
const warnings = [];
|
|
108
|
+
const ids = {};
|
|
109
|
+
const ncbiCtx = createHttpContextForDatabase('ncbi');
|
|
110
|
+
const uniprotCtx = createHttpContextForDatabase('uniprot');
|
|
111
|
+
const keggCtx = createHttpContextForDatabase('kegg');
|
|
112
|
+
const stringCtx = createHttpContextForDatabase('string');
|
|
113
|
+
// Phase 1: Core profile (parallel)
|
|
114
|
+
const [ncbiResult, uniprotResult, stringResult, litResult, clinvarResult] = await Promise.allSettled([
|
|
115
|
+
// NCBI Gene
|
|
116
|
+
(async () => {
|
|
117
|
+
const sr = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
118
|
+
db: 'gene', term: `${symbol}[Gene Name] AND ${org.name}[Organism]`,
|
|
119
|
+
retmax: '5', retmode: 'json',
|
|
120
|
+
}));
|
|
121
|
+
const gids = sr?.esearchresult?.idlist ?? [];
|
|
122
|
+
if (!gids.length)
|
|
123
|
+
return null;
|
|
124
|
+
const summ = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
125
|
+
db: 'gene', id: gids.join(','), retmode: 'json',
|
|
126
|
+
}));
|
|
127
|
+
const genes = parseGeneSummaries(summ);
|
|
128
|
+
const best = genes.find(g => g.symbol.toUpperCase() === symbol.toUpperCase()) ?? genes[0];
|
|
129
|
+
return best ?? null;
|
|
130
|
+
})(),
|
|
131
|
+
// UniProt
|
|
132
|
+
(async () => {
|
|
133
|
+
const data = await uniprotCtx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
|
|
134
|
+
query: `gene:${symbol} AND organism_id:${org.taxId} AND reviewed:true`,
|
|
135
|
+
format: 'json', size: '1',
|
|
136
|
+
}));
|
|
137
|
+
const results = (data?.results ?? []);
|
|
138
|
+
return results[0] ?? null;
|
|
139
|
+
})(),
|
|
140
|
+
// STRING partners
|
|
141
|
+
(async () => {
|
|
142
|
+
const data = await stringCtx.fetchJson(buildStringUrl('interaction_partners', {
|
|
143
|
+
identifiers: symbol, species: String(org.taxId), limit: '10', required_score: '400',
|
|
144
|
+
}));
|
|
145
|
+
return Array.isArray(data) ? data.map(i => ({
|
|
146
|
+
partner: String(i.preferredName_B ?? ''),
|
|
147
|
+
score: Number(i.score ?? 0),
|
|
148
|
+
})) : [];
|
|
149
|
+
})(),
|
|
150
|
+
// Literature
|
|
151
|
+
fetchRecentLiterature(ncbiCtx, symbol, paperCount),
|
|
152
|
+
// ClinVar
|
|
153
|
+
fetchClinvarSignificance(ncbiCtx, symbol),
|
|
154
|
+
]);
|
|
155
|
+
// Extract NCBI
|
|
156
|
+
let ncbiGene = null;
|
|
157
|
+
if (ncbiResult.status === 'fulfilled' && ncbiResult.value) {
|
|
158
|
+
ncbiGene = ncbiResult.value;
|
|
159
|
+
sources.push('NCBI Gene');
|
|
160
|
+
ids.ncbiGeneId = String(ncbiGene.geneId);
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
warnings.push(`NCBI Gene: ${ncbiResult.status === 'rejected' ? ncbiResult.reason : 'not found'}`);
|
|
164
|
+
}
|
|
165
|
+
// Extract UniProt (function + GO terms)
|
|
166
|
+
let uniprotFunc = '';
|
|
167
|
+
let goTerms = [];
|
|
168
|
+
if (uniprotResult.status === 'fulfilled' && uniprotResult.value) {
|
|
169
|
+
const entry = uniprotResult.value;
|
|
170
|
+
ids.uniprotAccession = String(entry.primaryAccession ?? '');
|
|
171
|
+
const comments = (entry.comments ?? []);
|
|
172
|
+
const fc = comments.find(c => c.commentType === 'FUNCTION');
|
|
173
|
+
const texts = (fc?.texts ?? []);
|
|
174
|
+
uniprotFunc = texts.map(t => String(t.value ?? '')).join(' ');
|
|
175
|
+
// Extract GO terms from cross-references
|
|
176
|
+
const xrefs = (entry.uniProtKBCrossReferences ?? []);
|
|
177
|
+
goTerms = xrefs
|
|
178
|
+
.filter(x => x.database === 'GO')
|
|
179
|
+
.map(x => {
|
|
180
|
+
const id = String(x.id ?? '');
|
|
181
|
+
const props = (x.properties ?? []);
|
|
182
|
+
const termProp = props.find(p => p.key === 'GoTerm');
|
|
183
|
+
const term = String(termProp?.value ?? '');
|
|
184
|
+
const aspectMap = { C: 'CC', F: 'MF', P: 'BP' };
|
|
185
|
+
const [aspect, ...nameParts] = term.split(':');
|
|
186
|
+
return { id, name: nameParts.join(':'), aspect: aspectMap[aspect] ?? aspect };
|
|
187
|
+
});
|
|
188
|
+
sources.push('UniProt');
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
warnings.push(`UniProt: ${uniprotResult.status === 'rejected' ? uniprotResult.reason : 'not found'}`);
|
|
192
|
+
}
|
|
193
|
+
// Extract STRING
|
|
194
|
+
const interactions = stringResult.status === 'fulfilled' ? stringResult.value : [];
|
|
195
|
+
if (interactions.length)
|
|
196
|
+
sources.push('STRING');
|
|
197
|
+
// Extract literature
|
|
198
|
+
const literature = litResult.status === 'fulfilled' ? litResult.value : [];
|
|
199
|
+
if (literature.length)
|
|
200
|
+
sources.push('PubMed');
|
|
201
|
+
else
|
|
202
|
+
warnings.push(`PubMed: ${litResult.status === 'rejected' ? litResult.reason : 'no recent papers'}`);
|
|
203
|
+
// Extract ClinVar
|
|
204
|
+
const clinvar = clinvarResult.status === 'fulfilled' ? clinvarResult.value : [];
|
|
205
|
+
if (clinvar.length)
|
|
206
|
+
sources.push('ClinVar');
|
|
207
|
+
// KEGG pathways (sequential, needs gene ID)
|
|
208
|
+
let pathways = [];
|
|
209
|
+
if (ncbiGene?.geneId) {
|
|
210
|
+
try {
|
|
211
|
+
const keggId = `${org.keggOrg}:${ncbiGene.geneId}`;
|
|
212
|
+
const pathText = await keggCtx.fetchText(buildKeggUrl(`/link/pathway/${keggId}`));
|
|
213
|
+
if (pathText.trim()) {
|
|
214
|
+
const links = parseKeggTsv(pathText);
|
|
215
|
+
const pathIds = links.map(l => l.value.replace(/^path:/, '')).filter(Boolean);
|
|
216
|
+
const listText = await keggCtx.fetchText(buildKeggUrl(`/list/pathway/${org.keggOrg}`));
|
|
217
|
+
const pathMap = new Map(parseKeggTsv(listText).map(p => [p.key, p.value.replace(/ - .*$/, '')]));
|
|
218
|
+
pathways = pathIds.map(id => ({ id, name: pathMap.get(id) ?? id }));
|
|
219
|
+
ids.keggId = keggId;
|
|
220
|
+
sources.push('KEGG');
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
catch (err) {
|
|
224
|
+
warnings.push(`KEGG: ${err instanceof Error ? err.message : String(err)}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const dossier = {
|
|
228
|
+
symbol,
|
|
229
|
+
name: String(ncbiGene?.name ?? ''),
|
|
230
|
+
summary: String(ncbiGene?.summary ?? ''),
|
|
231
|
+
function: uniprotFunc,
|
|
232
|
+
chromosome: String(ncbiGene?.chromosome ?? ''),
|
|
233
|
+
location: String(ncbiGene?.location ?? ''),
|
|
234
|
+
pathways,
|
|
235
|
+
goTerms,
|
|
236
|
+
interactions,
|
|
237
|
+
recentLiterature: literature,
|
|
238
|
+
clinicalVariants: clinvar,
|
|
239
|
+
};
|
|
240
|
+
return wrapResult(dossier, {
|
|
241
|
+
ids,
|
|
242
|
+
sources,
|
|
243
|
+
warnings,
|
|
244
|
+
organism: org.name,
|
|
245
|
+
query: symbol,
|
|
246
|
+
});
|
|
247
|
+
},
|
|
248
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/gene-profile — Complete gene profile from multiple databases.
|
|
3
|
+
*
|
|
4
|
+
* THE KILLER FEATURE: one command queries NCBI Gene, UniProt, KEGG, and
|
|
5
|
+
* STRING in parallel and returns a unified, agent-friendly JSON object.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - Single gene: biocli aggregate gene-profile TP53
|
|
9
|
+
* - Batch: biocli aggregate gene-profile TP53,BRCA1,EGFR
|
|
10
|
+
*
|
|
11
|
+
* Design:
|
|
12
|
+
* - Promise.allSettled for partial failure tolerance
|
|
13
|
+
* - _meta.sources tracks which databases contributed
|
|
14
|
+
* - _meta.errors reports partial failures without crashing
|
|
15
|
+
*/
|
|
16
|
+
export {};
|