@yangfei_93sky/biocli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/dist/batch.d.ts +20 -0
- package/dist/batch.js +69 -0
- package/dist/build-manifest.d.ts +38 -0
- package/dist/build-manifest.js +186 -0
- package/dist/cache.d.ts +28 -0
- package/dist/cache.js +126 -0
- package/dist/cli-manifest.json +1500 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.js +336 -0
- package/dist/clis/_shared/common.d.ts +8 -0
- package/dist/clis/_shared/common.js +13 -0
- package/dist/clis/_shared/eutils.d.ts +9 -0
- package/dist/clis/_shared/eutils.js +9 -0
- package/dist/clis/_shared/organism-db.d.ts +23 -0
- package/dist/clis/_shared/organism-db.js +58 -0
- package/dist/clis/_shared/xml-helpers.d.ts +58 -0
- package/dist/clis/_shared/xml-helpers.js +266 -0
- package/dist/clis/aggregate/enrichment.d.ts +7 -0
- package/dist/clis/aggregate/enrichment.js +105 -0
- package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
- package/dist/clis/aggregate/gene-dossier.js +248 -0
- package/dist/clis/aggregate/gene-profile.d.ts +16 -0
- package/dist/clis/aggregate/gene-profile.js +305 -0
- package/dist/clis/aggregate/literature-brief.d.ts +7 -0
- package/dist/clis/aggregate/literature-brief.js +79 -0
- package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
- package/dist/clis/aggregate/variant-dossier.js +161 -0
- package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
- package/dist/clis/aggregate/variant-interpret.js +210 -0
- package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
- package/dist/clis/aggregate/workflow-prepare.js +228 -0
- package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
- package/dist/clis/aggregate/workflow-scout.js +175 -0
- package/dist/clis/clinvar/search.d.ts +8 -0
- package/dist/clis/clinvar/search.js +61 -0
- package/dist/clis/clinvar/variant.d.ts +7 -0
- package/dist/clis/clinvar/variant.js +53 -0
- package/dist/clis/enrichr/analyze.d.ts +7 -0
- package/dist/clis/enrichr/analyze.js +48 -0
- package/dist/clis/ensembl/lookup.d.ts +6 -0
- package/dist/clis/ensembl/lookup.js +38 -0
- package/dist/clis/ensembl/vep.d.ts +7 -0
- package/dist/clis/ensembl/vep.js +86 -0
- package/dist/clis/ensembl/xrefs.d.ts +6 -0
- package/dist/clis/ensembl/xrefs.js +36 -0
- package/dist/clis/gene/fetch.d.ts +10 -0
- package/dist/clis/gene/fetch.js +96 -0
- package/dist/clis/gene/info.d.ts +7 -0
- package/dist/clis/gene/info.js +37 -0
- package/dist/clis/gene/search.d.ts +7 -0
- package/dist/clis/gene/search.js +71 -0
- package/dist/clis/geo/dataset.d.ts +7 -0
- package/dist/clis/geo/dataset.js +55 -0
- package/dist/clis/geo/download.d.ts +17 -0
- package/dist/clis/geo/download.js +115 -0
- package/dist/clis/geo/samples.d.ts +7 -0
- package/dist/clis/geo/samples.js +57 -0
- package/dist/clis/geo/search.d.ts +8 -0
- package/dist/clis/geo/search.js +66 -0
- package/dist/clis/kegg/convert.d.ts +7 -0
- package/dist/clis/kegg/convert.js +37 -0
- package/dist/clis/kegg/disease.d.ts +6 -0
- package/dist/clis/kegg/disease.js +57 -0
- package/dist/clis/kegg/link.d.ts +7 -0
- package/dist/clis/kegg/link.js +36 -0
- package/dist/clis/kegg/pathway.d.ts +6 -0
- package/dist/clis/kegg/pathway.js +37 -0
- package/dist/clis/pubmed/abstract.d.ts +7 -0
- package/dist/clis/pubmed/abstract.js +42 -0
- package/dist/clis/pubmed/cited-by.d.ts +7 -0
- package/dist/clis/pubmed/cited-by.js +77 -0
- package/dist/clis/pubmed/fetch.d.ts +6 -0
- package/dist/clis/pubmed/fetch.js +36 -0
- package/dist/clis/pubmed/info.yaml +22 -0
- package/dist/clis/pubmed/related.d.ts +7 -0
- package/dist/clis/pubmed/related.js +81 -0
- package/dist/clis/pubmed/search.d.ts +8 -0
- package/dist/clis/pubmed/search.js +63 -0
- package/dist/clis/snp/lookup.d.ts +7 -0
- package/dist/clis/snp/lookup.js +57 -0
- package/dist/clis/sra/download.d.ts +18 -0
- package/dist/clis/sra/download.js +217 -0
- package/dist/clis/sra/run.d.ts +8 -0
- package/dist/clis/sra/run.js +77 -0
- package/dist/clis/sra/search.d.ts +8 -0
- package/dist/clis/sra/search.js +83 -0
- package/dist/clis/string/enrichment.d.ts +7 -0
- package/dist/clis/string/enrichment.js +50 -0
- package/dist/clis/string/network.d.ts +7 -0
- package/dist/clis/string/network.js +47 -0
- package/dist/clis/string/partners.d.ts +4 -0
- package/dist/clis/string/partners.js +44 -0
- package/dist/clis/taxonomy/lookup.d.ts +8 -0
- package/dist/clis/taxonomy/lookup.js +54 -0
- package/dist/clis/uniprot/fetch.d.ts +7 -0
- package/dist/clis/uniprot/fetch.js +82 -0
- package/dist/clis/uniprot/search.d.ts +6 -0
- package/dist/clis/uniprot/search.js +65 -0
- package/dist/clis/uniprot/sequence.d.ts +7 -0
- package/dist/clis/uniprot/sequence.js +51 -0
- package/dist/commander-adapter.d.ts +27 -0
- package/dist/commander-adapter.js +286 -0
- package/dist/completion.d.ts +19 -0
- package/dist/completion.js +117 -0
- package/dist/config.d.ts +57 -0
- package/dist/config.js +94 -0
- package/dist/databases/enrichr.d.ts +28 -0
- package/dist/databases/enrichr.js +131 -0
- package/dist/databases/ensembl.d.ts +14 -0
- package/dist/databases/ensembl.js +106 -0
- package/dist/databases/index.d.ts +45 -0
- package/dist/databases/index.js +49 -0
- package/dist/databases/kegg.d.ts +26 -0
- package/dist/databases/kegg.js +136 -0
- package/dist/databases/ncbi.d.ts +28 -0
- package/dist/databases/ncbi.js +144 -0
- package/dist/databases/string-db.d.ts +19 -0
- package/dist/databases/string-db.js +105 -0
- package/dist/databases/uniprot.d.ts +13 -0
- package/dist/databases/uniprot.js +110 -0
- package/dist/discovery.d.ts +32 -0
- package/dist/discovery.js +235 -0
- package/dist/doctor.d.ts +19 -0
- package/dist/doctor.js +151 -0
- package/dist/errors.d.ts +68 -0
- package/dist/errors.js +105 -0
- package/dist/execution.d.ts +15 -0
- package/dist/execution.js +178 -0
- package/dist/hooks.d.ts +48 -0
- package/dist/hooks.js +58 -0
- package/dist/main.d.ts +13 -0
- package/dist/main.js +31 -0
- package/dist/ncbi-fetch.d.ts +10 -0
- package/dist/ncbi-fetch.js +10 -0
- package/dist/output.d.ts +18 -0
- package/dist/output.js +394 -0
- package/dist/pipeline/executor.d.ts +22 -0
- package/dist/pipeline/executor.js +40 -0
- package/dist/pipeline/index.d.ts +6 -0
- package/dist/pipeline/index.js +6 -0
- package/dist/pipeline/registry.d.ts +16 -0
- package/dist/pipeline/registry.js +31 -0
- package/dist/pipeline/steps/fetch.d.ts +21 -0
- package/dist/pipeline/steps/fetch.js +160 -0
- package/dist/pipeline/steps/transform.d.ts +26 -0
- package/dist/pipeline/steps/transform.js +92 -0
- package/dist/pipeline/steps/xml-parse.d.ts +12 -0
- package/dist/pipeline/steps/xml-parse.js +27 -0
- package/dist/pipeline/template.d.ts +35 -0
- package/dist/pipeline/template.js +312 -0
- package/dist/rate-limiter.d.ts +56 -0
- package/dist/rate-limiter.js +120 -0
- package/dist/registry-api.d.ts +15 -0
- package/dist/registry-api.js +13 -0
- package/dist/registry.d.ts +90 -0
- package/dist/registry.js +100 -0
- package/dist/schema.d.ts +80 -0
- package/dist/schema.js +72 -0
- package/dist/spinner.d.ts +19 -0
- package/dist/spinner.js +37 -0
- package/dist/types.d.ts +101 -0
- package/dist/types.js +27 -0
- package/dist/utils.d.ts +16 -0
- package/dist/utils.js +40 -0
- package/dist/validate.d.ts +29 -0
- package/dist/validate.js +136 -0
- package/dist/verify.d.ts +20 -0
- package/dist/verify.js +131 -0
- package/dist/version.d.ts +13 -0
- package/dist/version.js +36 -0
- package/dist/xml-parser.d.ts +19 -0
- package/dist/xml-parser.js +119 -0
- package/dist/yaml-schema.d.ts +40 -0
- package/dist/yaml-schema.js +62 -0
- package/package.json +68 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/gene-profile — Complete gene profile from multiple databases.
|
|
3
|
+
*
|
|
4
|
+
* THE KILLER FEATURE: one command queries NCBI Gene, UniProt, KEGG, and
|
|
5
|
+
* STRING in parallel and returns a unified, agent-friendly JSON object.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - Single gene: biocli aggregate gene-profile TP53
|
|
9
|
+
* - Batch: biocli aggregate gene-profile TP53,BRCA1,EGFR
|
|
10
|
+
*
|
|
11
|
+
* Design:
|
|
12
|
+
* - Promise.allSettled for partial failure tolerance
|
|
13
|
+
* - _meta.sources tracks which databases contributed
|
|
14
|
+
* - _meta.errors reports partial failures without crashing
|
|
15
|
+
*/
|
|
16
|
+
import { cli, Strategy } from '../../registry.js';
|
|
17
|
+
import { CliError } from '../../errors.js';
|
|
18
|
+
import { wrapResult } from '../../types.js';
|
|
19
|
+
import { createHttpContextForDatabase } from '../../databases/index.js';
|
|
20
|
+
import { buildEutilsUrl } from '../../databases/ncbi.js';
|
|
21
|
+
import { buildUniprotUrl } from '../../databases/uniprot.js';
|
|
22
|
+
import { buildKeggUrl, parseKeggTsv, parseKeggEntry } from '../../databases/kegg.js';
|
|
23
|
+
import { buildStringUrl } from '../../databases/string-db.js';
|
|
24
|
+
import { parseGeneSummaries } from '../_shared/xml-helpers.js';
|
|
25
|
+
import { resolveOrganism } from '../_shared/organism-db.js';
|
|
26
|
+
// ── NCBI Gene fetch ───────────────────────────────────────────────────────────
|
|
27
|
+
async function fetchNcbiGene(ctx, symbol, organism) {
|
|
28
|
+
// Fetch top 5 candidates to detect ambiguity
|
|
29
|
+
const searchResult = await ctx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
30
|
+
db: 'gene', term: `${symbol}[Gene Name] AND ${organism}[Organism]`,
|
|
31
|
+
retmax: '5', retmode: 'json',
|
|
32
|
+
}));
|
|
33
|
+
const ids = searchResult?.esearchresult?.idlist ?? [];
|
|
34
|
+
if (!ids.length)
|
|
35
|
+
return null;
|
|
36
|
+
const summaryResult = await ctx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
37
|
+
db: 'gene', id: ids.join(','), retmode: 'json',
|
|
38
|
+
}));
|
|
39
|
+
const genes = parseGeneSummaries(summaryResult);
|
|
40
|
+
if (!genes.length)
|
|
41
|
+
return null;
|
|
42
|
+
// Prefer exact symbol match to avoid returning a wrong gene
|
|
43
|
+
const exactMatch = genes.find(g => g.symbol.toUpperCase() === symbol.toUpperCase());
|
|
44
|
+
const best = exactMatch ?? genes[0];
|
|
45
|
+
return {
|
|
46
|
+
geneId: best.geneId,
|
|
47
|
+
name: best.name,
|
|
48
|
+
summary: best.summary,
|
|
49
|
+
chromosome: best.chromosome,
|
|
50
|
+
location: best.location,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
// ── UniProt fetch ─────────────────────────────────────────────────────────────
|
|
54
|
+
async function fetchUniprotData(ctx, symbol, taxId) {
|
|
55
|
+
// Fetch top 5 and pick the exact gene name match
|
|
56
|
+
const query = `gene:${symbol} AND organism_id:${taxId} AND reviewed:true`;
|
|
57
|
+
const data = await ctx.fetchJson(buildUniprotUrl('/uniprotkb/search', {
|
|
58
|
+
query, format: 'json', size: '5',
|
|
59
|
+
}));
|
|
60
|
+
const results = (data?.results ?? []);
|
|
61
|
+
if (!results.length)
|
|
62
|
+
return null;
|
|
63
|
+
// Find exact gene name match among candidates
|
|
64
|
+
const getGeneName = (e) => {
|
|
65
|
+
const genes = e.genes;
|
|
66
|
+
const primary = genes?.[0];
|
|
67
|
+
const gn = primary?.geneName;
|
|
68
|
+
return String(gn?.value ?? '');
|
|
69
|
+
};
|
|
70
|
+
const exactMatch = results.find(e => getGeneName(e).toUpperCase() === symbol.toUpperCase());
|
|
71
|
+
const entry = exactMatch ?? results[0];
|
|
72
|
+
const accession = String(entry.primaryAccession ?? '');
|
|
73
|
+
// Function
|
|
74
|
+
const comments = (entry.comments ?? []);
|
|
75
|
+
const funcComment = comments.find(c => c.commentType === 'FUNCTION');
|
|
76
|
+
const funcTexts = (funcComment?.texts ?? []);
|
|
77
|
+
const funcText = funcTexts.map(t => String(t.value ?? '')).join(' ');
|
|
78
|
+
// Subcellular location
|
|
79
|
+
const locComment = comments.find(c => c.commentType === 'SUBCELLULAR LOCATION');
|
|
80
|
+
const locEntries = (locComment?.subcellularLocations ?? []);
|
|
81
|
+
const locations = locEntries.map(l => String(l.location?.value ?? '')).filter(Boolean);
|
|
82
|
+
// GO terms
|
|
83
|
+
const xrefs = (entry.uniProtKBCrossReferences ?? []);
|
|
84
|
+
const goTerms = xrefs
|
|
85
|
+
.filter(x => x.database === 'GO')
|
|
86
|
+
.map(x => {
|
|
87
|
+
const id = String(x.id ?? '');
|
|
88
|
+
const props = (x.properties ?? []);
|
|
89
|
+
const termProp = props.find(p => p.key === 'GoTerm');
|
|
90
|
+
const term = String(termProp?.value ?? '');
|
|
91
|
+
const aspectMap = { C: 'CC', F: 'MF', P: 'BP' };
|
|
92
|
+
const [aspect, ...nameParts] = term.split(':');
|
|
93
|
+
return { id, name: nameParts.join(':'), aspect: aspectMap[aspect] ?? aspect };
|
|
94
|
+
});
|
|
95
|
+
// Ensembl cross-ref
|
|
96
|
+
const ensemblXref = xrefs.find(x => x.database === 'Ensembl');
|
|
97
|
+
const ensemblProps = (ensemblXref?.properties ?? []);
|
|
98
|
+
const ensemblGeneProp = ensemblProps.find(p => p.key === 'GeneId');
|
|
99
|
+
const ensemblGeneId = ensemblGeneProp ? String(ensemblGeneProp.value) : undefined;
|
|
100
|
+
return {
|
|
101
|
+
accession,
|
|
102
|
+
function: funcText,
|
|
103
|
+
subcellularLocation: locations.join(', '),
|
|
104
|
+
goTerms,
|
|
105
|
+
ensemblGeneId,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
// ── KEGG fetch ────────────────────────────────────────────────────────────────
|
|
109
|
+
/**
|
|
110
|
+
* Normalize KEGG pathway IDs: /link/pathway returns "path:hsa04115"
|
|
111
|
+
* but /list/pathway returns "hsa04115". Strip the "path:" prefix.
|
|
112
|
+
*/
|
|
113
|
+
function normalizeKeggId(id) {
|
|
114
|
+
return id.replace(/^path:/, '');
|
|
115
|
+
}
|
|
116
|
+
async function fetchKeggData(ctx, keggOrg, geneId, errors) {
|
|
117
|
+
const keggId = `${keggOrg}:${geneId}`;
|
|
118
|
+
// Fetch pathways with name resolution
|
|
119
|
+
let pathways = [];
|
|
120
|
+
try {
|
|
121
|
+
const pathText = await ctx.fetchText(buildKeggUrl(`/link/pathway/${keggId}`));
|
|
122
|
+
if (pathText.trim()) {
|
|
123
|
+
const links = parseKeggTsv(pathText);
|
|
124
|
+
const pathIds = links.map(l => l.value).filter(Boolean);
|
|
125
|
+
if (pathIds.length) {
|
|
126
|
+
// /list/pathway/hsa returns "hsa04115\tPathway name - Homo sapiens (human)"
|
|
127
|
+
const listText = await ctx.fetchText(buildKeggUrl(`/list/pathway/${keggOrg}`));
|
|
128
|
+
const allPaths = parseKeggTsv(listText);
|
|
129
|
+
const pathMap = new Map(allPaths.map(p => [p.key, p.value.replace(/ - .*$/, '')]));
|
|
130
|
+
pathways = pathIds.map(rawId => {
|
|
131
|
+
const normalized = normalizeKeggId(rawId);
|
|
132
|
+
return { id: normalized, name: pathMap.get(normalized) ?? normalized };
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
errors.push(`KEGG pathways: ${err instanceof Error ? err.message : String(err)}`);
|
|
139
|
+
}
|
|
140
|
+
// Fetch diseases with name resolution (reuse kegg/disease.ts pattern)
|
|
141
|
+
let diseases = [];
|
|
142
|
+
try {
|
|
143
|
+
const diseaseText = await ctx.fetchText(buildKeggUrl(`/link/disease/${keggId}`));
|
|
144
|
+
if (diseaseText.trim()) {
|
|
145
|
+
const links = parseKeggTsv(diseaseText);
|
|
146
|
+
const diseaseIds = links.map(l => l.value).filter(Boolean);
|
|
147
|
+
// Batch name resolution: /get accepts up to 10 IDs joined with '+'
|
|
148
|
+
const names = {};
|
|
149
|
+
for (let i = 0; i < diseaseIds.length; i += 10) {
|
|
150
|
+
const batch = diseaseIds.slice(i, i + 10);
|
|
151
|
+
try {
|
|
152
|
+
const text = await ctx.fetchText(buildKeggUrl(`/get/${batch.join('+')}`));
|
|
153
|
+
for (const entryText of text.split('///').filter(e => e.trim())) {
|
|
154
|
+
const parsed = parseKeggEntry(entryText);
|
|
155
|
+
if (parsed.ENTRY && parsed.NAME) {
|
|
156
|
+
const id = 'ds:' + parsed.ENTRY.split(/\s+/)[0];
|
|
157
|
+
names[id] = parsed.NAME;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch (err) {
|
|
162
|
+
errors.push(`KEGG disease names (batch ${i / 10 + 1}): ${err instanceof Error ? err.message : String(err)}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
diseases = diseaseIds.map(id => ({ id, name: names[id] ?? '' }));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
catch (err) {
|
|
169
|
+
errors.push(`KEGG diseases: ${err instanceof Error ? err.message : String(err)}`);
|
|
170
|
+
}
|
|
171
|
+
return { keggId, pathways, diseases };
|
|
172
|
+
}
|
|
173
|
+
// ── STRING fetch ──────────────────────────────────────────────────────────────
|
|
174
|
+
async function fetchStringPartners(ctx, symbol, taxId) {
|
|
175
|
+
// Let errors propagate — Promise.allSettled in the caller handles them
|
|
176
|
+
const data = await ctx.fetchJson(buildStringUrl('interaction_partners', {
|
|
177
|
+
identifiers: symbol,
|
|
178
|
+
species: String(taxId),
|
|
179
|
+
limit: '10',
|
|
180
|
+
required_score: '400',
|
|
181
|
+
}));
|
|
182
|
+
if (!Array.isArray(data))
|
|
183
|
+
return [];
|
|
184
|
+
return data.map(item => ({
|
|
185
|
+
partner: String(item.preferredName_B ?? ''),
|
|
186
|
+
score: Number(item.score ?? 0),
|
|
187
|
+
}));
|
|
188
|
+
}
|
|
189
|
+
// ── Main command ──────────────────────────────────────────────────────────────
|
|
190
|
+
async function buildGeneProfile(symbol, organismName, taxId, keggOrg) {
|
|
191
|
+
const meta = { sources: [], queriedAt: new Date().toISOString(), errors: [] };
|
|
192
|
+
const ncbiCtx = createHttpContextForDatabase('ncbi');
|
|
193
|
+
const uniprotCtx = createHttpContextForDatabase('uniprot');
|
|
194
|
+
const keggCtx = createHttpContextForDatabase('kegg');
|
|
195
|
+
const stringCtx = createHttpContextForDatabase('string');
|
|
196
|
+
// Parallel queries with partial failure tolerance
|
|
197
|
+
const [ncbiResult, uniprotResult, stringResult] = await Promise.allSettled([
|
|
198
|
+
fetchNcbiGene(ncbiCtx, symbol, organismName),
|
|
199
|
+
fetchUniprotData(uniprotCtx, symbol, taxId),
|
|
200
|
+
fetchStringPartners(stringCtx, symbol, taxId),
|
|
201
|
+
]);
|
|
202
|
+
// Extract NCBI data
|
|
203
|
+
let ncbiData = null;
|
|
204
|
+
if (ncbiResult.status === 'fulfilled' && ncbiResult.value) {
|
|
205
|
+
ncbiData = ncbiResult.value;
|
|
206
|
+
meta.sources.push('NCBI Gene');
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
meta.errors.push(`NCBI: ${ncbiResult.status === 'rejected' ? ncbiResult.reason : 'no data'}`);
|
|
210
|
+
}
|
|
211
|
+
// Extract UniProt data
|
|
212
|
+
let uniprotData = null;
|
|
213
|
+
if (uniprotResult.status === 'fulfilled' && uniprotResult.value) {
|
|
214
|
+
uniprotData = uniprotResult.value;
|
|
215
|
+
meta.sources.push('UniProt');
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
meta.errors.push(`UniProt: ${uniprotResult.status === 'rejected' ? uniprotResult.reason : 'no data'}`);
|
|
219
|
+
}
|
|
220
|
+
// Extract STRING data
|
|
221
|
+
let interactions = [];
|
|
222
|
+
if (stringResult.status === 'fulfilled' && stringResult.value.length) {
|
|
223
|
+
interactions = stringResult.value;
|
|
224
|
+
meta.sources.push('STRING');
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
meta.errors.push(`STRING: ${stringResult.status === 'rejected' ? stringResult.reason : 'no data'}`);
|
|
228
|
+
}
|
|
229
|
+
// KEGG (needs NCBI Gene ID first, so sequential)
|
|
230
|
+
// Errors are pushed to meta.errors inside fetchKeggData, not silently swallowed
|
|
231
|
+
let keggData = null;
|
|
232
|
+
if (ncbiData?.geneId) {
|
|
233
|
+
try {
|
|
234
|
+
keggData = await fetchKeggData(keggCtx, keggOrg, ncbiData.geneId, meta.errors);
|
|
235
|
+
if (keggData.pathways.length || keggData.diseases.length) {
|
|
236
|
+
meta.sources.push('KEGG');
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
catch (err) {
|
|
240
|
+
meta.errors.push(`KEGG: ${err instanceof Error ? err.message : String(err)}`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
else {
|
|
244
|
+
meta.errors.push('KEGG: skipped (no NCBI Gene ID to map from)');
|
|
245
|
+
}
|
|
246
|
+
const profileData = {
|
|
247
|
+
symbol,
|
|
248
|
+
name: ncbiData?.name ?? '',
|
|
249
|
+
summary: ncbiData?.summary ?? '',
|
|
250
|
+
chromosome: ncbiData?.chromosome ?? '',
|
|
251
|
+
location: ncbiData?.location ?? '',
|
|
252
|
+
function: uniprotData?.function ?? '',
|
|
253
|
+
subcellularLocation: uniprotData?.subcellularLocation ?? '',
|
|
254
|
+
pathways: (keggData?.pathways ?? []).map(p => ({ ...p, source: 'KEGG' })),
|
|
255
|
+
goTerms: uniprotData?.goTerms ?? [],
|
|
256
|
+
interactions,
|
|
257
|
+
diseases: (keggData?.diseases ?? []).map(d => ({ ...d, source: 'KEGG' })),
|
|
258
|
+
};
|
|
259
|
+
const ids = {};
|
|
260
|
+
if (ncbiData?.geneId)
|
|
261
|
+
ids.ncbiGeneId = ncbiData.geneId;
|
|
262
|
+
if (uniprotData?.accession)
|
|
263
|
+
ids.uniprotAccession = uniprotData.accession;
|
|
264
|
+
if (keggData?.keggId)
|
|
265
|
+
ids.keggId = keggData.keggId;
|
|
266
|
+
if (uniprotData?.ensemblGeneId)
|
|
267
|
+
ids.ensemblGeneId = uniprotData.ensemblGeneId;
|
|
268
|
+
return wrapResult(profileData, {
|
|
269
|
+
ids,
|
|
270
|
+
sources: meta.sources,
|
|
271
|
+
warnings: meta.errors,
|
|
272
|
+
organism: organismName,
|
|
273
|
+
query: symbol,
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
cli({
|
|
277
|
+
site: 'aggregate',
|
|
278
|
+
name: 'gene-profile',
|
|
279
|
+
description: 'Complete gene profile from NCBI + UniProt + KEGG + STRING',
|
|
280
|
+
database: 'aggregate',
|
|
281
|
+
strategy: Strategy.PUBLIC,
|
|
282
|
+
defaultFormat: 'json',
|
|
283
|
+
timeoutSeconds: 60,
|
|
284
|
+
args: [
|
|
285
|
+
{ name: 'genes', positional: true, required: true, help: 'Gene symbol(s), comma-separated (e.g. TP53 or TP53,BRCA1,EGFR)' },
|
|
286
|
+
{ name: 'organism', default: 'human', help: 'Organism (e.g. human, mouse, 9606)' },
|
|
287
|
+
],
|
|
288
|
+
columns: ['symbol', 'name', 'organism', 'pathways', 'goTerms', 'interactions'],
|
|
289
|
+
func: async (_ctx, args) => {
|
|
290
|
+
const genes = String(args.genes).split(',').map(s => s.trim()).filter(Boolean);
|
|
291
|
+
if (!genes.length) {
|
|
292
|
+
throw new CliError('ARGUMENT', 'At least one gene symbol is required');
|
|
293
|
+
}
|
|
294
|
+
const org = resolveOrganism(String(args.organism));
|
|
295
|
+
if (genes.length === 1) {
|
|
296
|
+
return await buildGeneProfile(genes[0], org.name, org.taxId, org.keggOrg);
|
|
297
|
+
}
|
|
298
|
+
// Batch: process genes sequentially to respect rate limits
|
|
299
|
+
const profiles = [];
|
|
300
|
+
for (const gene of genes) {
|
|
301
|
+
profiles.push(await buildGeneProfile(gene, org.name, org.taxId, org.keggOrg));
|
|
302
|
+
}
|
|
303
|
+
return profiles;
|
|
304
|
+
},
|
|
305
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/literature-brief — PubMed literature summary for a topic.
|
|
3
|
+
*
|
|
4
|
+
* Fetches recent papers and returns structured data with abstracts,
|
|
5
|
+
* optimized for AI agent consumption and literature review tasks.
|
|
6
|
+
*/
|
|
7
|
+
import { cli, Strategy } from '../../registry.js';
|
|
8
|
+
import { CliError } from '../../errors.js';
|
|
9
|
+
import { wrapResult } from '../../types.js';
|
|
10
|
+
import { createHttpContextForDatabase } from '../../databases/index.js';
|
|
11
|
+
import { buildEutilsUrl } from '../../databases/ncbi.js';
|
|
12
|
+
import { parsePubmedArticles } from '../_shared/xml-helpers.js';
|
|
13
|
+
cli({
|
|
14
|
+
site: 'aggregate',
|
|
15
|
+
name: 'literature-brief',
|
|
16
|
+
description: 'Literature summary with abstracts for a research topic',
|
|
17
|
+
database: 'aggregate',
|
|
18
|
+
strategy: Strategy.PUBLIC,
|
|
19
|
+
defaultFormat: 'json',
|
|
20
|
+
timeoutSeconds: 60,
|
|
21
|
+
args: [
|
|
22
|
+
{ name: 'query', positional: true, required: true, help: 'Search query (e.g. "TP53 immunotherapy", "CRISPR cancer")' },
|
|
23
|
+
{ name: 'limit', type: 'int', default: 10, help: 'Number of papers (1-50)' },
|
|
24
|
+
{ name: 'sort', default: 'relevance', choices: ['relevance', 'date'], help: 'Sort order' },
|
|
25
|
+
{ name: 'years', type: 'int', default: 5, help: 'Limit to last N years' },
|
|
26
|
+
],
|
|
27
|
+
columns: ['pmid', 'title', 'journal', 'year', 'abstract'],
|
|
28
|
+
func: async (_ctx, args) => {
|
|
29
|
+
const query = String(args.query).trim();
|
|
30
|
+
if (!query)
|
|
31
|
+
throw new CliError('ARGUMENT', 'Search query is required');
|
|
32
|
+
const limit = Math.max(1, Math.min(Number(args.limit), 50));
|
|
33
|
+
const sort = String(args.sort) === 'date' ? 'pub_date' : 'relevance';
|
|
34
|
+
const years = Math.max(1, Math.min(Number(args.years), 20));
|
|
35
|
+
const ncbiCtx = createHttpContextForDatabase('ncbi');
|
|
36
|
+
const warnings = [];
|
|
37
|
+
// Build date-restricted query
|
|
38
|
+
const dateQuery = `${query} AND "last ${years} years"[PDat]`;
|
|
39
|
+
// Step 1: esearch
|
|
40
|
+
const searchResult = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
41
|
+
db: 'pubmed',
|
|
42
|
+
term: dateQuery,
|
|
43
|
+
retmax: String(limit),
|
|
44
|
+
sort,
|
|
45
|
+
retmode: 'json',
|
|
46
|
+
}));
|
|
47
|
+
const esearch = searchResult?.esearchresult;
|
|
48
|
+
const pmids = esearch?.idlist ?? [];
|
|
49
|
+
const totalCount = Number(esearch?.count ?? 0);
|
|
50
|
+
if (!pmids.length) {
|
|
51
|
+
throw new CliError('NOT_FOUND', `No papers found for "${query}"`, 'Try broader terms or increase --years');
|
|
52
|
+
}
|
|
53
|
+
// Step 2: efetch with full abstracts
|
|
54
|
+
const xmlData = await ncbiCtx.fetchXml(buildEutilsUrl('efetch.fcgi', {
|
|
55
|
+
db: 'pubmed',
|
|
56
|
+
id: pmids.join(','),
|
|
57
|
+
rettype: 'xml',
|
|
58
|
+
}));
|
|
59
|
+
const articles = parsePubmedArticles(xmlData);
|
|
60
|
+
if (!articles.length) {
|
|
61
|
+
throw new CliError('PARSE_ERROR', 'Failed to parse PubMed response');
|
|
62
|
+
}
|
|
63
|
+
const papers = articles.map(a => ({
|
|
64
|
+
pmid: a.pmid,
|
|
65
|
+
title: a.title,
|
|
66
|
+
authors: a.authors,
|
|
67
|
+
journal: a.journal,
|
|
68
|
+
year: a.year,
|
|
69
|
+
doi: a.doi,
|
|
70
|
+
abstract: a.abstract,
|
|
71
|
+
}));
|
|
72
|
+
return wrapResult({ papers, totalAvailable: totalCount }, {
|
|
73
|
+
sources: ['PubMed'],
|
|
74
|
+
warnings,
|
|
75
|
+
query,
|
|
76
|
+
ids: { totalPmids: String(totalCount) },
|
|
77
|
+
});
|
|
78
|
+
},
|
|
79
|
+
});
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/variant-dossier — Comprehensive variant interpretation report.
|
|
3
|
+
*
|
|
4
|
+
* Cross-queries:
|
|
5
|
+
* - NCBI dbSNP (basic variant info)
|
|
6
|
+
* - ClinVar (clinical significance)
|
|
7
|
+
* - Ensembl VEP (functional consequence prediction)
|
|
8
|
+
*
|
|
9
|
+
* Accepts rsID (rs334), HGVS notation, or gene:variant format.
|
|
10
|
+
*/
|
|
11
|
+
export {};
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/variant-dossier — Comprehensive variant interpretation report.
|
|
3
|
+
*
|
|
4
|
+
* Cross-queries:
|
|
5
|
+
* - NCBI dbSNP (basic variant info)
|
|
6
|
+
* - ClinVar (clinical significance)
|
|
7
|
+
* - Ensembl VEP (functional consequence prediction)
|
|
8
|
+
*
|
|
9
|
+
* Accepts rsID (rs334), HGVS notation, or gene:variant format.
|
|
10
|
+
*/
|
|
11
|
+
import { cli, Strategy } from '../../registry.js';
|
|
12
|
+
import { CliError } from '../../errors.js';
|
|
13
|
+
import { wrapResult } from '../../types.js';
|
|
14
|
+
import { createHttpContextForDatabase } from '../../databases/index.js';
|
|
15
|
+
import { buildEutilsUrl } from '../../databases/ncbi.js';
|
|
16
|
+
import { buildEnsemblUrl } from '../../databases/ensembl.js';
|
|
17
|
+
cli({
|
|
18
|
+
site: 'aggregate',
|
|
19
|
+
name: 'variant-dossier',
|
|
20
|
+
description: 'Comprehensive variant interpretation (dbSNP + ClinVar + VEP)',
|
|
21
|
+
database: 'aggregate',
|
|
22
|
+
strategy: Strategy.PUBLIC,
|
|
23
|
+
defaultFormat: 'json',
|
|
24
|
+
timeoutSeconds: 60,
|
|
25
|
+
args: [
|
|
26
|
+
{ name: 'variant', positional: true, required: true, help: 'Variant ID: rsID (rs334), HGVS, or genomic coordinate' },
|
|
27
|
+
],
|
|
28
|
+
columns: ['variant', 'gene', 'consequence', 'clinicalSignificance', 'condition'],
|
|
29
|
+
func: async (_ctx, args) => {
|
|
30
|
+
const variant = String(args.variant).trim();
|
|
31
|
+
if (!variant)
|
|
32
|
+
throw new CliError('ARGUMENT', 'Variant ID is required');
|
|
33
|
+
const sources = [];
|
|
34
|
+
const warnings = [];
|
|
35
|
+
const ids = {};
|
|
36
|
+
const ncbiCtx = createHttpContextForDatabase('ncbi');
|
|
37
|
+
const ensemblCtx = createHttpContextForDatabase('ensembl');
|
|
38
|
+
// Determine if input is rsID
|
|
39
|
+
const isRsId = /^rs\d+$/i.test(variant);
|
|
40
|
+
if (isRsId)
|
|
41
|
+
ids.rsId = variant;
|
|
42
|
+
// Parallel queries
|
|
43
|
+
const [snpResult, clinvarResult, vepResult] = await Promise.allSettled([
|
|
44
|
+
// dbSNP lookup
|
|
45
|
+
isRsId ? (async () => {
|
|
46
|
+
const data = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
47
|
+
db: 'snp', id: variant.replace(/^rs/i, ''), retmode: 'json',
|
|
48
|
+
}));
|
|
49
|
+
const result = data?.result;
|
|
50
|
+
const snpId = variant.replace(/^rs/i, '');
|
|
51
|
+
const entry = result?.[snpId];
|
|
52
|
+
if (!entry)
|
|
53
|
+
return null;
|
|
54
|
+
return {
|
|
55
|
+
rsid: `rs${snpId}`,
|
|
56
|
+
gene: String((Array.isArray(entry.genes) && entry.genes.length > 0) ? entry.genes[0].name ?? '' : ''),
|
|
57
|
+
chromosome: String(entry.chr ?? ''),
|
|
58
|
+
position: String(entry.chrpos ?? ''),
|
|
59
|
+
alleles: String(entry.docsum ?? ''),
|
|
60
|
+
maf: String(entry.global_maf ?? ''),
|
|
61
|
+
};
|
|
62
|
+
})() : Promise.resolve(null),
|
|
63
|
+
// ClinVar search
|
|
64
|
+
isRsId ? (async () => {
|
|
65
|
+
const sr = await ncbiCtx.fetchJson(buildEutilsUrl('esearch.fcgi', {
|
|
66
|
+
db: 'clinvar', term: `${variant}[Variant ID]`, retmax: '5', retmode: 'json',
|
|
67
|
+
}));
|
|
68
|
+
const cvIds = sr?.esearchresult?.idlist ?? [];
|
|
69
|
+
if (!cvIds.length)
|
|
70
|
+
return [];
|
|
71
|
+
const summ = await ncbiCtx.fetchJson(buildEutilsUrl('esummary.fcgi', {
|
|
72
|
+
db: 'clinvar', id: cvIds.join(','), retmode: 'json',
|
|
73
|
+
}));
|
|
74
|
+
const resultObj = summ?.result;
|
|
75
|
+
const uids = resultObj?.uids ?? [];
|
|
76
|
+
return uids.map(uid => {
|
|
77
|
+
const item = (resultObj?.[uid] ?? {});
|
|
78
|
+
const sig = typeof item.clinical_significance === 'object'
|
|
79
|
+
? String(item.clinical_significance?.description ?? '')
|
|
80
|
+
: String(item.clinical_significance ?? '');
|
|
81
|
+
const traits = Array.isArray(item.trait_set)
|
|
82
|
+
? item.trait_set.map(t => String(t.trait_name ?? '')).join('; ')
|
|
83
|
+
: '';
|
|
84
|
+
return {
|
|
85
|
+
title: String(item.title ?? ''),
|
|
86
|
+
significance: sig,
|
|
87
|
+
condition: traits,
|
|
88
|
+
accession: String(item.accession ?? ''),
|
|
89
|
+
};
|
|
90
|
+
});
|
|
91
|
+
})() : Promise.resolve([]),
|
|
92
|
+
// Ensembl VEP
|
|
93
|
+
(async () => {
|
|
94
|
+
const vepPath = isRsId
|
|
95
|
+
? `/vep/human/id/${variant}`
|
|
96
|
+
: `/vep/human/hgvs/${encodeURIComponent(variant)}`;
|
|
97
|
+
const data = await ensemblCtx.fetchJson(buildEnsemblUrl(vepPath, { canonical: '1', hgvs: '1', protein: '1' }));
|
|
98
|
+
if (!Array.isArray(data) || !data.length)
|
|
99
|
+
return [];
|
|
100
|
+
const entry = data[0];
|
|
101
|
+
const tc = (entry.transcript_consequences ?? []);
|
|
102
|
+
// Pick canonical transcript or first
|
|
103
|
+
const sorted = [...tc].sort((a, b) => (a.canonical ? -1 : 0) - (b.canonical ? -1 : 0));
|
|
104
|
+
return sorted.slice(0, 5).map(t => ({
|
|
105
|
+
gene: String(t.gene_symbol ?? ''),
|
|
106
|
+
transcript: String(t.transcript_id ?? ''),
|
|
107
|
+
consequence: (t.consequence_terms ?? []).join(', '),
|
|
108
|
+
impact: String(t.impact ?? ''),
|
|
109
|
+
aminoAcids: String(t.amino_acids ?? ''),
|
|
110
|
+
codons: String(t.codons ?? ''),
|
|
111
|
+
biotype: String(t.biotype ?? ''),
|
|
112
|
+
canonical: Boolean(t.canonical),
|
|
113
|
+
}));
|
|
114
|
+
})(),
|
|
115
|
+
]);
|
|
116
|
+
// Assemble
|
|
117
|
+
let snpData = null;
|
|
118
|
+
if (snpResult.status === 'fulfilled' && snpResult.value) {
|
|
119
|
+
snpData = snpResult.value;
|
|
120
|
+
sources.push('dbSNP');
|
|
121
|
+
if (snpData.gene)
|
|
122
|
+
ids.gene = snpData.gene;
|
|
123
|
+
}
|
|
124
|
+
else if (snpResult.status === 'rejected') {
|
|
125
|
+
warnings.push(`dbSNP: ${snpResult.reason}`);
|
|
126
|
+
}
|
|
127
|
+
const clinvar = clinvarResult.status === 'fulfilled' ? clinvarResult.value : [];
|
|
128
|
+
if (clinvar.length)
|
|
129
|
+
sources.push('ClinVar');
|
|
130
|
+
else if (clinvarResult.status === 'rejected') {
|
|
131
|
+
warnings.push(`ClinVar: ${clinvarResult.reason}`);
|
|
132
|
+
}
|
|
133
|
+
const vep = vepResult.status === 'fulfilled' ? vepResult.value : [];
|
|
134
|
+
if (vep.length)
|
|
135
|
+
sources.push('Ensembl VEP');
|
|
136
|
+
else if (vepResult.status === 'rejected') {
|
|
137
|
+
warnings.push(`Ensembl VEP: ${vepResult.reason}`);
|
|
138
|
+
}
|
|
139
|
+
if (!snpData && !clinvar.length && !vep.length) {
|
|
140
|
+
throw new CliError('NOT_FOUND', `No data found for variant "${variant}"`, 'Check the variant ID format (e.g. rs334, NM_000518.5:c.20A>T)');
|
|
141
|
+
}
|
|
142
|
+
const dossier = {
|
|
143
|
+
variant,
|
|
144
|
+
gene: snpData?.gene ?? vep[0]?.gene ?? '',
|
|
145
|
+
chromosome: snpData?.chromosome ?? '',
|
|
146
|
+
position: snpData?.position ?? '',
|
|
147
|
+
vepConsequences: vep,
|
|
148
|
+
clinicalVariants: clinvar,
|
|
149
|
+
dbsnp: snpData ? {
|
|
150
|
+
alleles: snpData.alleles,
|
|
151
|
+
maf: snpData.maf,
|
|
152
|
+
} : null,
|
|
153
|
+
};
|
|
154
|
+
return wrapResult(dossier, {
|
|
155
|
+
ids,
|
|
156
|
+
sources,
|
|
157
|
+
warnings,
|
|
158
|
+
query: variant,
|
|
159
|
+
});
|
|
160
|
+
},
|
|
161
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aggregate/variant-interpret — Variant interpretation with clinical context.
|
|
3
|
+
*
|
|
4
|
+
* Builds on variant-dossier by adding:
|
|
5
|
+
* - UniProt protein function context for the affected gene
|
|
6
|
+
* - Structured interpretation summary (pathogenicity, impact, recommendation)
|
|
7
|
+
*
|
|
8
|
+
* Cross-queries: dbSNP + ClinVar + Ensembl VEP + UniProt
|
|
9
|
+
*/
|
|
10
|
+
export {};
|