@kernel.chat/kbot 3.41.0 → 3.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/agent-teams.d.ts +1 -1
- package/dist/agent-teams.d.ts.map +1 -1
- package/dist/agent-teams.js +36 -3
- package/dist/agent-teams.js.map +1 -1
- package/dist/agents/specialists.d.ts.map +1 -1
- package/dist/agents/specialists.js +20 -0
- package/dist/agents/specialists.js.map +1 -1
- package/dist/channels/kbot-channel.js +8 -31
- package/dist/channels/kbot-channel.js.map +1 -1
- package/dist/cli.js +8 -8
- package/dist/digest.js +1 -1
- package/dist/digest.js.map +1 -1
- package/dist/email-service.d.ts.map +1 -1
- package/dist/email-service.js +1 -2
- package/dist/email-service.js.map +1 -1
- package/dist/episodic-memory.d.ts.map +1 -1
- package/dist/episodic-memory.js +14 -0
- package/dist/episodic-memory.js.map +1 -1
- package/dist/learned-router.d.ts.map +1 -1
- package/dist/learned-router.js +29 -0
- package/dist/learned-router.js.map +1 -1
- package/dist/tools/email.d.ts.map +1 -1
- package/dist/tools/email.js +2 -3
- package/dist/tools/email.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +7 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/lab-bio.d.ts +2 -0
- package/dist/tools/lab-bio.d.ts.map +1 -0
- package/dist/tools/lab-bio.js +1392 -0
- package/dist/tools/lab-bio.js.map +1 -0
- package/dist/tools/lab-chem.d.ts +2 -0
- package/dist/tools/lab-chem.d.ts.map +1 -0
- package/dist/tools/lab-chem.js +1257 -0
- package/dist/tools/lab-chem.js.map +1 -0
- package/dist/tools/lab-core.d.ts +2 -0
- package/dist/tools/lab-core.d.ts.map +1 -0
- package/dist/tools/lab-core.js +2452 -0
- package/dist/tools/lab-core.js.map +1 -0
- package/dist/tools/lab-data.d.ts +2 -0
- package/dist/tools/lab-data.d.ts.map +1 -0
- package/dist/tools/lab-data.js +2464 -0
- package/dist/tools/lab-data.js.map +1 -0
- package/dist/tools/lab-earth.d.ts +2 -0
- package/dist/tools/lab-earth.d.ts.map +1 -0
- package/dist/tools/lab-earth.js +1124 -0
- package/dist/tools/lab-earth.js.map +1 -0
- package/dist/tools/lab-math.d.ts +2 -0
- package/dist/tools/lab-math.d.ts.map +1 -0
- package/dist/tools/lab-math.js +3021 -0
- package/dist/tools/lab-math.js.map +1 -0
- package/dist/tools/lab-physics.d.ts +2 -0
- package/dist/tools/lab-physics.d.ts.map +1 -0
- package/dist/tools/lab-physics.js +2423 -0
- package/dist/tools/lab-physics.js.map +1 -0
- package/package.json +2 -3
|
@@ -0,0 +1,1392 @@
|
|
|
1
|
+
// kbot Life Sciences Tools — Bioinformatics, genomics, proteomics, clinical research
|
|
2
|
+
// Real API integrations with NCBI, UniProt, PDB, ChEMBL, Reactome, GBIF, Open Targets, ClinicalTrials.gov
|
|
3
|
+
// No external dependencies — all built on native fetch + regex XML parsing.
|
|
4
|
+
import { registerTool } from './index.js';
|
|
5
|
+
const UA = 'KBot/3.0 (Lab Tools)';
|
|
6
|
+
// ── NCBI rate limiter (max 3 requests/sec without API key) ──────────────
|
|
7
|
+
let lastNCBICall = 0;
|
|
8
|
+
async function ncbiThrottle() {
|
|
9
|
+
const now = Date.now();
|
|
10
|
+
const elapsed = now - lastNCBICall;
|
|
11
|
+
if (elapsed < 334) {
|
|
12
|
+
await new Promise(resolve => setTimeout(resolve, 334 - elapsed));
|
|
13
|
+
}
|
|
14
|
+
lastNCBICall = Date.now();
|
|
15
|
+
}
|
|
16
|
+
// ── XML helpers (regex-based, no external parser) ───────────────────────
|
|
17
|
+
function xmlTag(xml, tag) {
|
|
18
|
+
const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i');
|
|
19
|
+
const m = xml.match(re);
|
|
20
|
+
return m ? m[1].trim() : '';
|
|
21
|
+
}
|
|
22
|
+
function xmlTagAll(xml, tag) {
|
|
23
|
+
const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'gi');
|
|
24
|
+
const results = [];
|
|
25
|
+
let m;
|
|
26
|
+
while ((m = re.exec(xml)) !== null) {
|
|
27
|
+
results.push(m[1].trim());
|
|
28
|
+
}
|
|
29
|
+
return results;
|
|
30
|
+
}
|
|
31
|
+
// ── Standard genetic code codon table ───────────────────────────────────
|
|
32
|
+
const CODON_TABLE = {
|
|
33
|
+
TTT: 'F', TTC: 'F', TTA: 'L', TTG: 'L',
|
|
34
|
+
CTT: 'L', CTC: 'L', CTA: 'L', CTG: 'L',
|
|
35
|
+
ATT: 'I', ATC: 'I', ATA: 'I', ATG: 'M',
|
|
36
|
+
GTT: 'V', GTC: 'V', GTA: 'V', GTG: 'V',
|
|
37
|
+
TCT: 'S', TCC: 'S', TCA: 'S', TCG: 'S',
|
|
38
|
+
CCT: 'P', CCC: 'P', CCA: 'P', CCG: 'P',
|
|
39
|
+
ACT: 'T', ACC: 'T', ACA: 'T', ACG: 'T',
|
|
40
|
+
GCT: 'A', GCC: 'A', GCA: 'A', GCG: 'A',
|
|
41
|
+
TAT: 'Y', TAC: 'Y', TAA: '*', TAG: '*',
|
|
42
|
+
CAT: 'H', CAC: 'H', CAA: 'Q', CAG: 'Q',
|
|
43
|
+
AAT: 'N', AAC: 'N', AAA: 'K', AAG: 'K',
|
|
44
|
+
GAT: 'D', GAC: 'D', GAA: 'E', GAG: 'E',
|
|
45
|
+
TGT: 'C', TGC: 'C', TGA: '*', TGG: 'W',
|
|
46
|
+
CGT: 'R', CGC: 'R', CGA: 'R', CGG: 'R',
|
|
47
|
+
AGT: 'S', AGC: 'S', AGA: 'R', AGG: 'R',
|
|
48
|
+
GGT: 'G', GGC: 'G', GGA: 'G', GGG: 'G',
|
|
49
|
+
};
|
|
50
|
+
const AA_WEIGHTS = {
|
|
51
|
+
A: 89.09, R: 174.20, N: 132.12, D: 133.10, C: 121.16,
|
|
52
|
+
E: 147.13, Q: 146.15, G: 75.03, H: 155.16, I: 131.17,
|
|
53
|
+
L: 131.17, K: 146.19, M: 149.21, F: 165.19, P: 115.13,
|
|
54
|
+
S: 105.09, T: 119.12, W: 204.23, Y: 181.19, V: 117.15,
|
|
55
|
+
};
|
|
56
|
+
const DNA_WEIGHTS = {
|
|
57
|
+
A: 331.2, T: 322.2, G: 347.2, C: 307.2,
|
|
58
|
+
};
|
|
59
|
+
// ── Registration ────────────────────────────────────────────────────────
|
|
60
|
+
export function registerLabBioTools() {
|
|
61
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
62
|
+
// 1. PubMed Search
|
|
63
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
64
|
+
registerTool({
|
|
65
|
+
name: 'pubmed_search',
|
|
66
|
+
description: 'Search PubMed for biomedical literature via NCBI E-utilities. Returns titles, authors, journal, year, abstract, DOI, and PMID. Use MeSH terms for precise filtering.',
|
|
67
|
+
parameters: {
|
|
68
|
+
query: { type: 'string', description: 'Search query (e.g., "CRISPR cancer therapy")', required: true },
|
|
69
|
+
mesh_terms: { type: 'string', description: 'Optional MeSH terms to AND with query (e.g., "Neoplasms[MeSH]")' },
|
|
70
|
+
limit: { type: 'number', description: 'Max results (default 10, max 50)' },
|
|
71
|
+
sort: { type: 'string', description: 'Sort order: "relevance" (default) or "date"' },
|
|
72
|
+
},
|
|
73
|
+
tier: 'free',
|
|
74
|
+
async execute(args) {
|
|
75
|
+
const query = String(args.query);
|
|
76
|
+
const meshTerms = args.mesh_terms ? String(args.mesh_terms) : '';
|
|
77
|
+
const limit = Math.min(typeof args.limit === 'number' ? args.limit : 10, 50);
|
|
78
|
+
const sort = String(args.sort || 'relevance');
|
|
79
|
+
let fullQuery = query;
|
|
80
|
+
if (meshTerms)
|
|
81
|
+
fullQuery += ` AND ${meshTerms}`;
|
|
82
|
+
const sortParam = sort === 'date' ? '&sort=pub_date' : '&sort=relevance';
|
|
83
|
+
try {
|
|
84
|
+
// Step 1: esearch to get PMIDs
|
|
85
|
+
await ncbiThrottle();
|
|
86
|
+
const searchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=${limit}${sortParam}&term=${encodeURIComponent(fullQuery)}`;
|
|
87
|
+
const searchRes = await fetch(searchUrl, {
|
|
88
|
+
headers: { 'User-Agent': UA },
|
|
89
|
+
signal: AbortSignal.timeout(10000),
|
|
90
|
+
});
|
|
91
|
+
if (!searchRes.ok)
|
|
92
|
+
return `PubMed search error: HTTP ${searchRes.status}`;
|
|
93
|
+
const searchData = await searchRes.json();
|
|
94
|
+
const idList = searchData?.esearchresult?.idlist || [];
|
|
95
|
+
const totalCount = searchData?.esearchresult?.count || '0';
|
|
96
|
+
if (idList.length === 0)
|
|
97
|
+
return `No PubMed results for "${query}". Try broader terms or check MeSH vocabulary.`;
|
|
98
|
+
// Step 2: efetch to get article details
|
|
99
|
+
await ncbiThrottle();
|
|
100
|
+
const fetchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=${idList.join(',')}`;
|
|
101
|
+
const fetchRes = await fetch(fetchUrl, {
|
|
102
|
+
headers: { 'User-Agent': UA },
|
|
103
|
+
signal: AbortSignal.timeout(10000),
|
|
104
|
+
});
|
|
105
|
+
if (!fetchRes.ok)
|
|
106
|
+
return `PubMed fetch error: HTTP ${fetchRes.status}`;
|
|
107
|
+
const xml = await fetchRes.text();
|
|
108
|
+
// Parse articles from XML
|
|
109
|
+
const articles = xmlTagAll(xml, 'PubmedArticle');
|
|
110
|
+
const results = [`## PubMed Results (${totalCount} total, showing ${articles.length})\n`];
|
|
111
|
+
for (const article of articles) {
|
|
112
|
+
const pmid = xmlTag(article, 'PMID');
|
|
113
|
+
const title = xmlTag(article, 'ArticleTitle').replace(/<[^>]+>/g, '');
|
|
114
|
+
const abstractText = xmlTag(article, 'AbstractText').replace(/<[^>]+>/g, '');
|
|
115
|
+
const journal = xmlTag(article, 'Title');
|
|
116
|
+
const year = xmlTag(article, 'Year');
|
|
117
|
+
// Authors
|
|
118
|
+
const authorNodes = xmlTagAll(article, 'Author');
|
|
119
|
+
const authors = authorNodes.slice(0, 5).map(a => {
|
|
120
|
+
const last = xmlTag(a, 'LastName');
|
|
121
|
+
const initials = xmlTag(a, 'Initials');
|
|
122
|
+
return last ? `${last} ${initials}` : xmlTag(a, 'CollectiveName');
|
|
123
|
+
}).filter(Boolean);
|
|
124
|
+
const authorStr = authors.join(', ') + (authorNodes.length > 5 ? ' et al.' : '');
|
|
125
|
+
// DOI
|
|
126
|
+
const articleIdList = xmlTagAll(article, 'ArticleId');
|
|
127
|
+
let doi = '';
|
|
128
|
+
for (const idBlock of articleIdList) {
|
|
129
|
+
if (idBlock.includes('doi')) {
|
|
130
|
+
// The DOI is the text content, but we need to check the IdType attribute
|
|
131
|
+
const doiMatch = article.match(/<ArticleId IdType="doi">([^<]+)<\/ArticleId>/i);
|
|
132
|
+
if (doiMatch)
|
|
133
|
+
doi = doiMatch[1];
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
let entry = `### ${title}\n`;
|
|
138
|
+
entry += `**Authors:** ${authorStr || 'N/A'}\n`;
|
|
139
|
+
entry += `**Journal:** ${journal || 'N/A'} (${year || 'N/A'})\n`;
|
|
140
|
+
entry += `**PMID:** [${pmid}](https://pubmed.ncbi.nlm.nih.gov/${pmid}/)`;
|
|
141
|
+
if (doi)
|
|
142
|
+
entry += ` | **DOI:** [${doi}](https://doi.org/${doi})`;
|
|
143
|
+
entry += '\n';
|
|
144
|
+
if (abstractText) {
|
|
145
|
+
const truncated = abstractText.length > 500 ? abstractText.slice(0, 500) + '...' : abstractText;
|
|
146
|
+
entry += `\n> ${truncated}\n`;
|
|
147
|
+
}
|
|
148
|
+
results.push(entry);
|
|
149
|
+
}
|
|
150
|
+
return results.join('\n---\n');
|
|
151
|
+
}
|
|
152
|
+
catch (e) {
|
|
153
|
+
return `PubMed search failed: ${e?.message || e}`;
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
158
|
+
// 2. Gene Lookup
|
|
159
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
160
|
+
registerTool({
|
|
161
|
+
name: 'gene_lookup',
|
|
162
|
+
description: 'Look up gene information by symbol or name using MyGene.info. Returns function, chromosome location, aliases, and associated diseases.',
|
|
163
|
+
parameters: {
|
|
164
|
+
gene: { type: 'string', description: 'Gene symbol or name (e.g., "TP53", "BRCA1", "tumor protein p53")', required: true },
|
|
165
|
+
organism: { type: 'string', description: 'Organism (default: "human"). Also: "mouse", "rat", "zebrafish", etc.' },
|
|
166
|
+
},
|
|
167
|
+
tier: 'free',
|
|
168
|
+
async execute(args) {
|
|
169
|
+
const gene = String(args.gene);
|
|
170
|
+
const organism = String(args.organism || 'human');
|
|
171
|
+
try {
|
|
172
|
+
const fields = 'symbol,name,summary,genomic_pos,alias,type_of_gene,entrezgene,ensembl.gene,taxid,generif,pathway.kegg';
|
|
173
|
+
const url = `https://mygene.info/v3/query?q=${encodeURIComponent(gene)}&species=${encodeURIComponent(organism)}&fields=${fields}&size=5`;
|
|
174
|
+
const res = await fetch(url, {
|
|
175
|
+
headers: { 'User-Agent': UA },
|
|
176
|
+
signal: AbortSignal.timeout(10000),
|
|
177
|
+
});
|
|
178
|
+
if (!res.ok)
|
|
179
|
+
return `MyGene.info error: HTTP ${res.status}`;
|
|
180
|
+
const data = await res.json();
|
|
181
|
+
const hits = data.hits || [];
|
|
182
|
+
if (hits.length === 0)
|
|
183
|
+
return `No gene found for "${gene}" in ${organism}. Try the official HGNC symbol.`;
|
|
184
|
+
const results = [`## Gene Lookup: "${gene}" (${organism})\n`];
|
|
185
|
+
for (const hit of hits.slice(0, 3)) {
|
|
186
|
+
let entry = `### ${hit.symbol || gene} — ${hit.name || 'Unknown'}\n`;
|
|
187
|
+
entry += `**Type:** ${hit.type_of_gene || 'N/A'}\n`;
|
|
188
|
+
if (hit.entrezgene)
|
|
189
|
+
entry += `**Entrez ID:** [${hit.entrezgene}](https://www.ncbi.nlm.nih.gov/gene/${hit.entrezgene})\n`;
|
|
190
|
+
if (hit.ensembl?.gene)
|
|
191
|
+
entry += `**Ensembl:** ${hit.ensembl.gene}\n`;
|
|
192
|
+
// Genomic position
|
|
193
|
+
if (hit.genomic_pos) {
|
|
194
|
+
const pos = Array.isArray(hit.genomic_pos) ? hit.genomic_pos[0] : hit.genomic_pos;
|
|
195
|
+
if (pos)
|
|
196
|
+
entry += `**Location:** Chr${pos.chr}:${pos.start?.toLocaleString()}-${pos.end?.toLocaleString()} (${pos.strand > 0 ? '+' : '-'} strand)\n`;
|
|
197
|
+
}
|
|
198
|
+
// Aliases
|
|
199
|
+
if (hit.alias) {
|
|
200
|
+
const aliases = Array.isArray(hit.alias) ? hit.alias : [hit.alias];
|
|
201
|
+
entry += `**Aliases:** ${aliases.slice(0, 10).join(', ')}\n`;
|
|
202
|
+
}
|
|
203
|
+
// Summary
|
|
204
|
+
if (hit.summary) {
|
|
205
|
+
const truncated = hit.summary.length > 600 ? hit.summary.slice(0, 600) + '...' : hit.summary;
|
|
206
|
+
entry += `\n**Summary:** ${truncated}\n`;
|
|
207
|
+
}
|
|
208
|
+
// Pathways
|
|
209
|
+
if (hit.pathway?.kegg) {
|
|
210
|
+
const pathways = Array.isArray(hit.pathway.kegg) ? hit.pathway.kegg : [hit.pathway.kegg];
|
|
211
|
+
entry += `\n**KEGG Pathways:**\n`;
|
|
212
|
+
for (const p of pathways.slice(0, 5)) {
|
|
213
|
+
entry += `- ${p.name || p.id}\n`;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
results.push(entry);
|
|
217
|
+
}
|
|
218
|
+
return results.join('\n---\n');
|
|
219
|
+
}
|
|
220
|
+
catch (e) {
|
|
221
|
+
return `Gene lookup failed: ${e?.message || e}`;
|
|
222
|
+
}
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
226
|
+
// 3. Protein Search
|
|
227
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
228
|
+
registerTool({
|
|
229
|
+
name: 'protein_search',
|
|
230
|
+
description: 'Search UniProt for protein information. Returns sequence length, function, subcellular location, and GO terms.',
|
|
231
|
+
parameters: {
|
|
232
|
+
query: { type: 'string', description: 'Protein name, gene symbol, or keyword (e.g., "insulin", "P53_HUMAN")', required: true },
|
|
233
|
+
organism: { type: 'string', description: 'Organism filter (e.g., "Homo sapiens", "Mus musculus")' },
|
|
234
|
+
reviewed: { type: 'boolean', description: 'Only reviewed (Swiss-Prot) entries (default: true)' },
|
|
235
|
+
},
|
|
236
|
+
tier: 'free',
|
|
237
|
+
async execute(args) {
|
|
238
|
+
const query = String(args.query);
|
|
239
|
+
const organism = args.organism ? String(args.organism) : '';
|
|
240
|
+
const reviewed = args.reviewed !== false;
|
|
241
|
+
try {
|
|
242
|
+
let fullQuery = query;
|
|
243
|
+
if (organism)
|
|
244
|
+
fullQuery += ` AND organism_name:"${organism}"`;
|
|
245
|
+
if (reviewed)
|
|
246
|
+
fullQuery += ' AND reviewed:true';
|
|
247
|
+
const url = `https://rest.uniprot.org/uniprotkb/search?query=${encodeURIComponent(fullQuery)}&format=json&size=5`;
|
|
248
|
+
const res = await fetch(url, {
|
|
249
|
+
headers: { 'User-Agent': UA },
|
|
250
|
+
signal: AbortSignal.timeout(10000),
|
|
251
|
+
});
|
|
252
|
+
if (!res.ok)
|
|
253
|
+
return `UniProt error: HTTP ${res.status}`;
|
|
254
|
+
const data = await res.json();
|
|
255
|
+
const results_arr = data.results || [];
|
|
256
|
+
if (results_arr.length === 0)
|
|
257
|
+
return `No UniProt results for "${query}". Try a broader query or set reviewed=false.`;
|
|
258
|
+
const output = [`## UniProt Search: "${query}"\n`];
|
|
259
|
+
for (const entry of results_arr) {
|
|
260
|
+
const accession = entry.primaryAccession || 'N/A';
|
|
261
|
+
const entryName = entry.uniProtkbId || '';
|
|
262
|
+
const proteinName = entry.proteinDescription?.recommendedName?.fullName?.value
|
|
263
|
+
|| entry.proteinDescription?.submittedName?.[0]?.fullName?.value
|
|
264
|
+
|| 'Unknown';
|
|
265
|
+
const organism_name = entry.organism?.scientificName || '';
|
|
266
|
+
const seqLen = entry.sequence?.length || 0;
|
|
267
|
+
const seqMW = entry.sequence?.molWeight || 0;
|
|
268
|
+
let block = `### ${proteinName}\n`;
|
|
269
|
+
block += `**Accession:** [${accession}](https://www.uniprot.org/uniprot/${accession}) (${entryName})\n`;
|
|
270
|
+
block += `**Organism:** ${organism_name}\n`;
|
|
271
|
+
block += `**Sequence:** ${seqLen} aa | ${(seqMW / 1000).toFixed(1)} kDa\n`;
|
|
272
|
+
// Function
|
|
273
|
+
const funcComments = (entry.comments || []).filter((c) => c.commentType === 'FUNCTION');
|
|
274
|
+
if (funcComments.length > 0) {
|
|
275
|
+
const funcText = funcComments[0].texts?.[0]?.value || '';
|
|
276
|
+
if (funcText) {
|
|
277
|
+
const truncated = funcText.length > 400 ? funcText.slice(0, 400) + '...' : funcText;
|
|
278
|
+
block += `\n**Function:** ${truncated}\n`;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
// Subcellular location
|
|
282
|
+
const locComments = (entry.comments || []).filter((c) => c.commentType === 'SUBCELLULAR LOCATION');
|
|
283
|
+
if (locComments.length > 0) {
|
|
284
|
+
const locations = locComments[0].subcellularLocations?.map((sl) => sl.location?.value).filter(Boolean) || [];
|
|
285
|
+
if (locations.length > 0)
|
|
286
|
+
block += `**Subcellular Location:** ${locations.join(', ')}\n`;
|
|
287
|
+
}
|
|
288
|
+
// GO terms
|
|
289
|
+
const goTerms = (entry.uniProtKBCrossReferences || []).filter((x) => x.database === 'GO');
|
|
290
|
+
if (goTerms.length > 0) {
|
|
291
|
+
const goGroups = { F: [], P: [], C: [] };
|
|
292
|
+
for (const go of goTerms.slice(0, 20)) {
|
|
293
|
+
const name = go.properties?.find((p) => p.key === 'GoTerm')?.value || go.id;
|
|
294
|
+
if (name.startsWith('F:'))
|
|
295
|
+
goGroups.F.push(name.slice(2));
|
|
296
|
+
else if (name.startsWith('P:'))
|
|
297
|
+
goGroups.P.push(name.slice(2));
|
|
298
|
+
else if (name.startsWith('C:'))
|
|
299
|
+
goGroups.C.push(name.slice(2));
|
|
300
|
+
}
|
|
301
|
+
if (goGroups.F.length > 0)
|
|
302
|
+
block += `**Molecular Function:** ${goGroups.F.slice(0, 5).join('; ')}\n`;
|
|
303
|
+
if (goGroups.P.length > 0)
|
|
304
|
+
block += `**Biological Process:** ${goGroups.P.slice(0, 5).join('; ')}\n`;
|
|
305
|
+
if (goGroups.C.length > 0)
|
|
306
|
+
block += `**Cellular Component:** ${goGroups.C.slice(0, 5).join('; ')}\n`;
|
|
307
|
+
}
|
|
308
|
+
output.push(block);
|
|
309
|
+
}
|
|
310
|
+
return output.join('\n---\n');
|
|
311
|
+
}
|
|
312
|
+
catch (e) {
|
|
313
|
+
return `Protein search failed: ${e?.message || e}`;
|
|
314
|
+
}
|
|
315
|
+
},
|
|
316
|
+
});
|
|
317
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
318
|
+
// 4. Protein Structure (PDB)
|
|
319
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
320
|
+
registerTool({
|
|
321
|
+
name: 'protein_structure',
|
|
322
|
+
description: 'Fetch protein 3D structure info from RCSB PDB. Get resolution, experimental method, ligands, and chain details by PDB ID or text search.',
|
|
323
|
+
parameters: {
|
|
324
|
+
pdb_id: { type: 'string', description: 'PDB ID (e.g., "1TUP", "6LU7"). If provided, fetches directly.' },
|
|
325
|
+
query: { type: 'string', description: 'Text search query (e.g., "p53 DNA binding domain"). Used if pdb_id not given.' },
|
|
326
|
+
},
|
|
327
|
+
tier: 'free',
|
|
328
|
+
async execute(args) {
|
|
329
|
+
const pdbId = args.pdb_id ? String(args.pdb_id).toUpperCase() : '';
|
|
330
|
+
const query = args.query ? String(args.query) : '';
|
|
331
|
+
if (!pdbId && !query)
|
|
332
|
+
return 'Provide either pdb_id or query to search PDB structures.';
|
|
333
|
+
try {
|
|
334
|
+
let ids = [];
|
|
335
|
+
if (pdbId) {
|
|
336
|
+
ids = [pdbId];
|
|
337
|
+
}
|
|
338
|
+
else {
|
|
339
|
+
// Text search via RCSB search API
|
|
340
|
+
const searchBody = JSON.stringify({
|
|
341
|
+
query: {
|
|
342
|
+
type: 'terminal',
|
|
343
|
+
service: 'full_text',
|
|
344
|
+
parameters: { value: query },
|
|
345
|
+
},
|
|
346
|
+
return_type: 'entry',
|
|
347
|
+
request_options: { paginate: { start: 0, rows: 5 } },
|
|
348
|
+
});
|
|
349
|
+
const searchRes = await fetch('https://search.rcsb.org/rcsbsearch/v2/query', {
|
|
350
|
+
method: 'POST',
|
|
351
|
+
headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
|
|
352
|
+
body: searchBody,
|
|
353
|
+
signal: AbortSignal.timeout(10000),
|
|
354
|
+
});
|
|
355
|
+
if (!searchRes.ok)
|
|
356
|
+
return `PDB search error: HTTP ${searchRes.status}`;
|
|
357
|
+
const searchData = await searchRes.json();
|
|
358
|
+
ids = (searchData.result_set || []).map((r) => r.identifier).slice(0, 5);
|
|
359
|
+
if (ids.length === 0)
|
|
360
|
+
return `No PDB structures found for "${query}".`;
|
|
361
|
+
}
|
|
362
|
+
const output = [`## PDB Structure${ids.length > 1 ? 's' : ''}\n`];
|
|
363
|
+
for (const id of ids) {
|
|
364
|
+
const res = await fetch(`https://data.rcsb.org/rest/v1/core/entry/${id}`, {
|
|
365
|
+
headers: { 'User-Agent': UA },
|
|
366
|
+
signal: AbortSignal.timeout(10000),
|
|
367
|
+
});
|
|
368
|
+
if (!res.ok) {
|
|
369
|
+
output.push(`**${id}**: Not found (HTTP ${res.status})`);
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
const data = await res.json();
|
|
373
|
+
const title = data.struct?.title || 'No title';
|
|
374
|
+
const method = data.exptl?.[0]?.method || 'N/A';
|
|
375
|
+
const resolution = data.rcsb_entry_info?.resolution_combined?.[0];
|
|
376
|
+
const deposited = data.rcsb_accession_info?.deposit_date || '';
|
|
377
|
+
const polymerCount = data.rcsb_entry_info?.polymer_entity_count || 0;
|
|
378
|
+
const nonPolymerCount = data.rcsb_entry_info?.nonpolymer_entity_count || 0;
|
|
379
|
+
const citation = data.rcsb_primary_citation;
|
|
380
|
+
let block = `### [${id}](https://www.rcsb.org/structure/${id}) — ${title}\n`;
|
|
381
|
+
block += `**Method:** ${method}`;
|
|
382
|
+
if (resolution)
|
|
383
|
+
block += ` | **Resolution:** ${resolution} A`;
|
|
384
|
+
block += '\n';
|
|
385
|
+
block += `**Deposited:** ${deposited}\n`;
|
|
386
|
+
block += `**Entities:** ${polymerCount} polymer, ${nonPolymerCount} non-polymer (ligands/ions)\n`;
|
|
387
|
+
// Polymer entities (chains)
|
|
388
|
+
if (data.rcsb_entry_info?.polymer_entity_count_protein) {
|
|
389
|
+
block += `**Protein chains:** ${data.rcsb_entry_info.polymer_entity_count_protein}\n`;
|
|
390
|
+
}
|
|
391
|
+
if (data.rcsb_entry_info?.polymer_entity_count_nucleic_acid) {
|
|
392
|
+
block += `**Nucleic acid chains:** ${data.rcsb_entry_info.polymer_entity_count_nucleic_acid}\n`;
|
|
393
|
+
}
|
|
394
|
+
// Primary citation
|
|
395
|
+
if (citation) {
|
|
396
|
+
block += `\n**Citation:** ${citation.title || ''}\n`;
|
|
397
|
+
const authors = citation.rcsb_authors?.slice(0, 3).join(', ') || '';
|
|
398
|
+
if (authors)
|
|
399
|
+
block += `*${authors}${citation.rcsb_authors?.length > 3 ? ' et al.' : ''}* `;
|
|
400
|
+
if (citation.pdbx_database_id_journal)
|
|
401
|
+
block += `${citation.pdbx_database_id_journal} `;
|
|
402
|
+
if (citation.year)
|
|
403
|
+
block += `(${citation.year})`;
|
|
404
|
+
if (citation.pdbx_database_id_DOI)
|
|
405
|
+
block += ` DOI: ${citation.pdbx_database_id_DOI}`;
|
|
406
|
+
block += '\n';
|
|
407
|
+
}
|
|
408
|
+
output.push(block);
|
|
409
|
+
}
|
|
410
|
+
return output.join('\n---\n');
|
|
411
|
+
}
|
|
412
|
+
catch (e) {
|
|
413
|
+
return `PDB lookup failed: ${e?.message || e}`;
|
|
414
|
+
}
|
|
415
|
+
},
|
|
416
|
+
});
|
|
417
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
418
|
+
// 5. BLAST Search
|
|
419
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
420
|
+
registerTool({
|
|
421
|
+
name: 'blast_search',
|
|
422
|
+
description: 'Submit a sequence for NCBI BLAST homology search. Supports blastn (nucleotide), blastp (protein), blastx (translated). Async — submits job, polls for results.',
|
|
423
|
+
parameters: {
|
|
424
|
+
sequence: { type: 'string', description: 'Nucleotide or protein sequence (FASTA or raw)', required: true },
|
|
425
|
+
program: { type: 'string', description: 'BLAST program: "blastn", "blastp", or "blastx"', required: true },
|
|
426
|
+
database: { type: 'string', description: 'Database: "nr" (non-redundant protein), "nt" (nucleotide), "swissprot"', required: true },
|
|
427
|
+
},
|
|
428
|
+
tier: 'free',
|
|
429
|
+
timeout: 180_000,
|
|
430
|
+
async execute(args) {
|
|
431
|
+
const sequence = String(args.sequence).trim();
|
|
432
|
+
const program = String(args.program || 'blastn');
|
|
433
|
+
const database = String(args.database || 'nr');
|
|
434
|
+
if (sequence.length < 10)
|
|
435
|
+
return 'Sequence too short for BLAST. Provide at least 10 residues/bases.';
|
|
436
|
+
// Clean sequence: remove FASTA header if present
|
|
437
|
+
const cleanSeq = sequence.split('\n').filter(line => !line.startsWith('>')).join('');
|
|
438
|
+
try {
|
|
439
|
+
// Step 1: Submit BLAST job
|
|
440
|
+
await ncbiThrottle();
|
|
441
|
+
const submitParams = new URLSearchParams({
|
|
442
|
+
CMD: 'Put',
|
|
443
|
+
PROGRAM: program,
|
|
444
|
+
DATABASE: database,
|
|
445
|
+
QUERY: cleanSeq,
|
|
446
|
+
FORMAT_TYPE: 'XML',
|
|
447
|
+
HITLIST_SIZE: '10',
|
|
448
|
+
});
|
|
449
|
+
const submitRes = await fetch('https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi', {
|
|
450
|
+
method: 'POST',
|
|
451
|
+
headers: { 'User-Agent': UA, 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
452
|
+
body: submitParams.toString(),
|
|
453
|
+
signal: AbortSignal.timeout(30000),
|
|
454
|
+
});
|
|
455
|
+
if (!submitRes.ok)
|
|
456
|
+
return `BLAST submission failed: HTTP ${submitRes.status}`;
|
|
457
|
+
const submitText = await submitRes.text();
|
|
458
|
+
// Extract RID from response
|
|
459
|
+
const ridMatch = submitText.match(/RID\s*=\s*(\S+)/);
|
|
460
|
+
if (!ridMatch)
|
|
461
|
+
return `BLAST submission failed: could not get Request ID.\n\nResponse excerpt:\n${submitText.slice(0, 500)}`;
|
|
462
|
+
const rid = ridMatch[1];
|
|
463
|
+
// Extract estimated wait time
|
|
464
|
+
const rtoeMatch = submitText.match(/RTOE\s*=\s*(\d+)/);
|
|
465
|
+
const rtoe = rtoeMatch ? parseInt(rtoeMatch[1], 10) : 15;
|
|
466
|
+
// Step 2: Poll for results
|
|
467
|
+
const startTime = Date.now();
|
|
468
|
+
const maxWait = 120_000; // 2 minutes max polling
|
|
469
|
+
let waitTime = Math.min(rtoe * 1000, 15000); // Start with estimated wait, cap at 15s
|
|
470
|
+
// Initial wait before first poll
|
|
471
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(waitTime, 10000)));
|
|
472
|
+
let resultXml = '';
|
|
473
|
+
while (Date.now() - startTime < maxWait) {
|
|
474
|
+
await ncbiThrottle();
|
|
475
|
+
const pollUrl = `https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=XML&RID=${rid}`;
|
|
476
|
+
const pollRes = await fetch(pollUrl, {
|
|
477
|
+
headers: { 'User-Agent': UA },
|
|
478
|
+
signal: AbortSignal.timeout(120_000),
|
|
479
|
+
});
|
|
480
|
+
if (!pollRes.ok) {
|
|
481
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
482
|
+
continue;
|
|
483
|
+
}
|
|
484
|
+
const pollText = await pollRes.text();
|
|
485
|
+
// Check status
|
|
486
|
+
if (pollText.includes('Status=WAITING')) {
|
|
487
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
488
|
+
continue;
|
|
489
|
+
}
|
|
490
|
+
if (pollText.includes('Status=FAILED'))
|
|
491
|
+
return `BLAST job failed (RID: ${rid}). The sequence may be invalid for ${program}.`;
|
|
492
|
+
if (pollText.includes('Status=UNKNOWN'))
|
|
493
|
+
return `BLAST job expired or unknown (RID: ${rid}).`;
|
|
494
|
+
// If we have actual results (XML with hits)
|
|
495
|
+
if (pollText.includes('<BlastOutput>') || pollText.includes('<Hit>')) {
|
|
496
|
+
resultXml = pollText;
|
|
497
|
+
break;
|
|
498
|
+
}
|
|
499
|
+
// Still processing
|
|
500
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
501
|
+
}
|
|
502
|
+
if (!resultXml)
|
|
503
|
+
return `BLAST search timed out after ${Math.round(maxWait / 1000)}s. RID: ${rid} — check manually at https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&RID=${rid}`;
|
|
504
|
+
// Step 3: Parse results
|
|
505
|
+
const hits = xmlTagAll(resultXml, 'Hit');
|
|
506
|
+
if (hits.length === 0)
|
|
507
|
+
return `BLAST completed but found no significant hits for your ${program} search against ${database}.`;
|
|
508
|
+
const output = [`## BLAST Results (${program} vs ${database})\n**RID:** ${rid} | **Hits:** ${hits.length}\n`];
|
|
509
|
+
for (const hit of hits.slice(0, 10)) {
|
|
510
|
+
const hitNum = xmlTag(hit, 'Hit_num');
|
|
511
|
+
const hitDef = xmlTag(hit, 'Hit_def').slice(0, 120);
|
|
512
|
+
const hitAccession = xmlTag(hit, 'Hit_accession');
|
|
513
|
+
const hitLen = xmlTag(hit, 'Hit_len');
|
|
514
|
+
// Best HSP
|
|
515
|
+
const hsps = xmlTagAll(hit, 'Hsp');
|
|
516
|
+
const hsp = hsps[0] || '';
|
|
517
|
+
const evalue = xmlTag(hsp, 'Hsp_evalue');
|
|
518
|
+
const bitScore = xmlTag(hsp, 'Hsp_bit-score');
|
|
519
|
+
const identity = xmlTag(hsp, 'Hsp_identity');
|
|
520
|
+
const alignLen = xmlTag(hsp, 'Hsp_align-len');
|
|
521
|
+
const identPct = alignLen ? ((parseInt(identity, 10) / parseInt(alignLen, 10)) * 100).toFixed(1) : 'N/A';
|
|
522
|
+
let block = `**${hitNum}. ${hitDef}**\n`;
|
|
523
|
+
block += `Accession: ${hitAccession} | Length: ${hitLen}\n`;
|
|
524
|
+
block += `E-value: ${evalue} | Bit score: ${bitScore} | Identity: ${identPct}% (${identity}/${alignLen})\n`;
|
|
525
|
+
output.push(block);
|
|
526
|
+
}
|
|
527
|
+
return output.join('\n---\n');
|
|
528
|
+
}
|
|
529
|
+
catch (e) {
|
|
530
|
+
return `BLAST search failed: ${e?.message || e}`;
|
|
531
|
+
}
|
|
532
|
+
},
|
|
533
|
+
});
|
|
534
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
535
|
+
// 6. Drug Lookup (ChEMBL)
|
|
536
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
537
|
+
registerTool({
|
|
538
|
+
name: 'drug_lookup',
|
|
539
|
+
description: 'Search ChEMBL for drugs and compounds. Returns targets, mechanism of action, clinical phase, and molecular properties.',
|
|
540
|
+
parameters: {
|
|
541
|
+
query: { type: 'string', description: 'Drug name, target, or mechanism (e.g., "imatinib", "EGFR inhibitor")', required: true },
|
|
542
|
+
search_type: { type: 'string', description: 'Search type: "name" (default), "target", or "mechanism"' },
|
|
543
|
+
},
|
|
544
|
+
tier: 'free',
|
|
545
|
+
async execute(args) {
|
|
546
|
+
const query = String(args.query);
|
|
547
|
+
const searchType = String(args.search_type || 'name');
|
|
548
|
+
try {
|
|
549
|
+
let url;
|
|
550
|
+
if (searchType === 'target') {
|
|
551
|
+
url = `https://www.ebi.ac.uk/chembl/api/data/target/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
|
|
552
|
+
}
|
|
553
|
+
else if (searchType === 'mechanism') {
|
|
554
|
+
url = `https://www.ebi.ac.uk/chembl/api/data/mechanism/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
|
|
555
|
+
}
|
|
556
|
+
else {
|
|
557
|
+
url = `https://www.ebi.ac.uk/chembl/api/data/molecule/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
|
|
558
|
+
}
|
|
559
|
+
const res = await fetch(url, {
|
|
560
|
+
headers: { 'User-Agent': UA },
|
|
561
|
+
signal: AbortSignal.timeout(10000),
|
|
562
|
+
});
|
|
563
|
+
if (!res.ok)
|
|
564
|
+
return `ChEMBL error: HTTP ${res.status}`;
|
|
565
|
+
const data = await res.json();
|
|
566
|
+
if (searchType === 'target') {
|
|
567
|
+
const targets = data.targets || [];
|
|
568
|
+
if (targets.length === 0)
|
|
569
|
+
return `No targets found for "${query}" in ChEMBL.`;
|
|
570
|
+
const output = [`## ChEMBL Targets for "${query}"\n`];
|
|
571
|
+
for (const t of targets.slice(0, 5)) {
|
|
572
|
+
let block = `### ${t.pref_name || 'Unknown'}\n`;
|
|
573
|
+
block += `**ChEMBL ID:** ${t.target_chembl_id || 'N/A'}\n`;
|
|
574
|
+
block += `**Type:** ${t.target_type || 'N/A'}\n`;
|
|
575
|
+
block += `**Organism:** ${t.organism || 'N/A'}\n`;
|
|
576
|
+
if (t.target_components?.[0]?.accession) {
|
|
577
|
+
block += `**UniProt:** ${t.target_components[0].accession}\n`;
|
|
578
|
+
}
|
|
579
|
+
output.push(block);
|
|
580
|
+
}
|
|
581
|
+
return output.join('\n---\n');
|
|
582
|
+
}
|
|
583
|
+
if (searchType === 'mechanism') {
|
|
584
|
+
const mechanisms = data.mechanisms || [];
|
|
585
|
+
if (mechanisms.length === 0)
|
|
586
|
+
return `No mechanisms found for "${query}" in ChEMBL.`;
|
|
587
|
+
const output = [`## ChEMBL Mechanisms: "${query}"\n`];
|
|
588
|
+
for (const m of mechanisms.slice(0, 10)) {
|
|
589
|
+
let block = `**${m.molecule_chembl_id}** → ${m.target_chembl_id || 'N/A'}\n`;
|
|
590
|
+
block += `Mechanism: ${m.mechanism_of_action || 'N/A'}\n`;
|
|
591
|
+
block += `Action type: ${m.action_type || 'N/A'}\n`;
|
|
592
|
+
if (m.max_phase !== undefined)
|
|
593
|
+
block += `Max phase: ${m.max_phase}\n`;
|
|
594
|
+
output.push(block);
|
|
595
|
+
}
|
|
596
|
+
return output.join('\n---\n');
|
|
597
|
+
}
|
|
598
|
+
// Default: molecule search
|
|
599
|
+
const molecules = data.molecules || [];
|
|
600
|
+
if (molecules.length === 0)
|
|
601
|
+
return `No molecules found for "${query}" in ChEMBL.`;
|
|
602
|
+
const output = [`## ChEMBL Molecules: "${query}"\n`];
|
|
603
|
+
for (const mol of molecules.slice(0, 5)) {
|
|
604
|
+
const name = mol.pref_name || mol.molecule_chembl_id || 'Unknown';
|
|
605
|
+
const chemblId = mol.molecule_chembl_id || 'N/A';
|
|
606
|
+
const maxPhase = mol.max_phase !== undefined ? mol.max_phase : 'N/A';
|
|
607
|
+
const type = mol.molecule_type || 'N/A';
|
|
608
|
+
const props = mol.molecule_properties || {};
|
|
609
|
+
let block = `### ${name}\n`;
|
|
610
|
+
block += `**ChEMBL ID:** [${chemblId}](https://www.ebi.ac.uk/chembl/compound_report_card/${chemblId}/)\n`;
|
|
611
|
+
block += `**Type:** ${type} | **Max Clinical Phase:** ${maxPhase}\n`;
|
|
612
|
+
if (mol.first_approval)
|
|
613
|
+
block += `**First Approval:** ${mol.first_approval}\n`;
|
|
614
|
+
// Molecular properties
|
|
615
|
+
if (props.full_mwt)
|
|
616
|
+
block += `**MW:** ${props.full_mwt} Da`;
|
|
617
|
+
if (props.alogp)
|
|
618
|
+
block += ` | **ALogP:** ${props.alogp}`;
|
|
619
|
+
if (props.hba)
|
|
620
|
+
block += ` | **HBA:** ${props.hba}`;
|
|
621
|
+
if (props.hbd)
|
|
622
|
+
block += ` | **HBD:** ${props.hbd}`;
|
|
623
|
+
if (props.psa)
|
|
624
|
+
block += ` | **PSA:** ${props.psa}`;
|
|
625
|
+
if (props.full_mwt)
|
|
626
|
+
block += '\n';
|
|
627
|
+
if (props.ro5_violations !== undefined)
|
|
628
|
+
block += `**Lipinski violations:** ${props.ro5_violations}\n`;
|
|
629
|
+
if (mol.molecule_structures?.canonical_smiles) {
|
|
630
|
+
block += `**SMILES:** \`${mol.molecule_structures.canonical_smiles.slice(0, 100)}\`\n`;
|
|
631
|
+
}
|
|
632
|
+
output.push(block);
|
|
633
|
+
}
|
|
634
|
+
return output.join('\n---\n');
|
|
635
|
+
}
|
|
636
|
+
catch (e) {
|
|
637
|
+
return `Drug lookup failed: ${e?.message || e}`;
|
|
638
|
+
}
|
|
639
|
+
},
|
|
640
|
+
});
|
|
641
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
642
|
+
// 7. Pathway Search (Reactome)
|
|
643
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
644
|
+
registerTool({
|
|
645
|
+
name: 'pathway_search',
|
|
646
|
+
description: 'Search Reactome for biological pathways. Returns pathway names, species, summaries, and hierarchical relationships.',
|
|
647
|
+
parameters: {
|
|
648
|
+
query: { type: 'string', description: 'Pathway name or keyword (e.g., "apoptosis", "glycolysis", "MAPK signaling")', required: true },
|
|
649
|
+
organism: { type: 'string', description: 'Species name (default: "Homo sapiens")' },
|
|
650
|
+
},
|
|
651
|
+
tier: 'free',
|
|
652
|
+
async execute(args) {
|
|
653
|
+
const query = String(args.query);
|
|
654
|
+
const organism = String(args.organism || 'Homo sapiens');
|
|
655
|
+
try {
|
|
656
|
+
const url = `https://reactome.org/ContentService/search/query?query=${encodeURIComponent(query)}&species=${encodeURIComponent(organism)}&types=Pathway&cluster=true`;
|
|
657
|
+
const res = await fetch(url, {
|
|
658
|
+
headers: { 'User-Agent': UA, 'Accept': 'application/json' },
|
|
659
|
+
signal: AbortSignal.timeout(10000),
|
|
660
|
+
});
|
|
661
|
+
if (!res.ok)
|
|
662
|
+
return `Reactome error: HTTP ${res.status}`;
|
|
663
|
+
const data = await res.json();
|
|
664
|
+
const groups = data.results || [];
|
|
665
|
+
if (groups.length === 0)
|
|
666
|
+
return `No pathways found for "${query}" in Reactome.`;
|
|
667
|
+
const output = [`## Reactome Pathways: "${query}"\n`];
|
|
668
|
+
let count = 0;
|
|
669
|
+
for (const group of groups) {
|
|
670
|
+
const entries = group.entries || [];
|
|
671
|
+
for (const entry of entries) {
|
|
672
|
+
if (count >= 10)
|
|
673
|
+
break;
|
|
674
|
+
let block = `### ${entry.name || 'Unknown'}\n`;
|
|
675
|
+
block += `**ID:** [${entry.stId}](https://reactome.org/content/detail/${entry.stId})\n`;
|
|
676
|
+
block += `**Species:** ${entry.species?.[0] || organism}\n`;
|
|
677
|
+
if (entry.summation) {
|
|
678
|
+
const summary = entry.summation.replace(/<[^>]+>/g, '');
|
|
679
|
+
const truncated = summary.length > 300 ? summary.slice(0, 300) + '...' : summary;
|
|
680
|
+
block += `\n${truncated}\n`;
|
|
681
|
+
}
|
|
682
|
+
if (entry.compartmentNames?.length > 0) {
|
|
683
|
+
block += `**Compartments:** ${entry.compartmentNames.join(', ')}\n`;
|
|
684
|
+
}
|
|
685
|
+
output.push(block);
|
|
686
|
+
count++;
|
|
687
|
+
}
|
|
688
|
+
if (count >= 10)
|
|
689
|
+
break;
|
|
690
|
+
}
|
|
691
|
+
return output.join('\n---\n');
|
|
692
|
+
}
|
|
693
|
+
catch (e) {
|
|
694
|
+
return `Pathway search failed: ${e?.message || e}`;
|
|
695
|
+
}
|
|
696
|
+
},
|
|
697
|
+
});
|
|
698
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
699
|
+
// 8. Taxonomy Lookup (GBIF)
|
|
700
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
701
|
+
registerTool({
|
|
702
|
+
name: 'taxonomy_lookup',
|
|
703
|
+
description: 'Look up taxonomic classification of any organism via GBIF. Returns full lineage from kingdom to species with taxonomic status.',
|
|
704
|
+
parameters: {
|
|
705
|
+
name: { type: 'string', description: 'Organism name (e.g., "Homo sapiens", "E. coli", "giant panda")', required: true },
|
|
706
|
+
rank: { type: 'string', description: 'Expected rank filter: "species", "genus", "family", etc.' },
|
|
707
|
+
},
|
|
708
|
+
tier: 'free',
|
|
709
|
+
async execute(args) {
|
|
710
|
+
const name = String(args.name);
|
|
711
|
+
const rank = args.rank ? String(args.rank).toUpperCase() : '';
|
|
712
|
+
try {
|
|
713
|
+
let url = `https://api.gbif.org/v1/species/search?q=${encodeURIComponent(name)}&limit=5`;
|
|
714
|
+
if (rank)
|
|
715
|
+
url += `&rank=${rank}`;
|
|
716
|
+
const res = await fetch(url, {
|
|
717
|
+
headers: { 'User-Agent': UA },
|
|
718
|
+
signal: AbortSignal.timeout(10000),
|
|
719
|
+
});
|
|
720
|
+
if (!res.ok)
|
|
721
|
+
return `GBIF error: HTTP ${res.status}`;
|
|
722
|
+
const data = await res.json();
|
|
723
|
+
const results_arr = data.results || [];
|
|
724
|
+
if (results_arr.length === 0)
|
|
725
|
+
return `No taxonomic records for "${name}" in GBIF.`;
|
|
726
|
+
const output = [`## Taxonomy: "${name}"\n`];
|
|
727
|
+
for (const sp of results_arr.slice(0, 3)) {
|
|
728
|
+
let block = `### ${sp.canonicalName || sp.scientificName || name}\n`;
|
|
729
|
+
if (sp.authorship)
|
|
730
|
+
block += `*${sp.authorship}*\n`;
|
|
731
|
+
block += `**Rank:** ${sp.rank || 'N/A'}\n`;
|
|
732
|
+
block += `**Status:** ${sp.taxonomicStatus || 'N/A'}\n`;
|
|
733
|
+
block += `**GBIF Key:** [${sp.key}](https://www.gbif.org/species/${sp.key})\n`;
|
|
734
|
+
// Full lineage
|
|
735
|
+
const lineage = [];
|
|
736
|
+
if (sp.kingdom)
|
|
737
|
+
lineage.push(`Kingdom: ${sp.kingdom}`);
|
|
738
|
+
if (sp.phylum)
|
|
739
|
+
lineage.push(`Phylum: ${sp.phylum}`);
|
|
740
|
+
if (sp.class)
|
|
741
|
+
lineage.push(`Class: ${sp.class}`);
|
|
742
|
+
if (sp.order)
|
|
743
|
+
lineage.push(`Order: ${sp.order}`);
|
|
744
|
+
if (sp.family)
|
|
745
|
+
lineage.push(`Family: ${sp.family}`);
|
|
746
|
+
if (sp.genus)
|
|
747
|
+
lineage.push(`Genus: *${sp.genus}*`);
|
|
748
|
+
if (sp.species)
|
|
749
|
+
lineage.push(`Species: *${sp.species}*`);
|
|
750
|
+
if (lineage.length > 0) {
|
|
751
|
+
block += `\n**Lineage:**\n${lineage.map(l => `- ${l}`).join('\n')}\n`;
|
|
752
|
+
}
|
|
753
|
+
if (sp.vernacularNames?.length > 0) {
|
|
754
|
+
const common = sp.vernacularNames.slice(0, 5).map((v) => v.vernacularName).filter(Boolean);
|
|
755
|
+
if (common.length > 0)
|
|
756
|
+
block += `\n**Common Names:** ${common.join(', ')}\n`;
|
|
757
|
+
}
|
|
758
|
+
if (sp.descriptions?.length > 0) {
|
|
759
|
+
const desc = sp.descriptions[0].description || '';
|
|
760
|
+
if (desc)
|
|
761
|
+
block += `\n${desc.slice(0, 300)}${desc.length > 300 ? '...' : ''}\n`;
|
|
762
|
+
}
|
|
763
|
+
output.push(block);
|
|
764
|
+
}
|
|
765
|
+
return output.join('\n---\n');
|
|
766
|
+
}
|
|
767
|
+
catch (e) {
|
|
768
|
+
return `Taxonomy lookup failed: ${e?.message || e}`;
|
|
769
|
+
}
|
|
770
|
+
},
|
|
771
|
+
});
|
|
772
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
773
|
+
// 9. Clinical Trials
|
|
774
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
775
|
+
registerTool({
|
|
776
|
+
name: 'clinical_trials',
|
|
777
|
+
description: 'Search ClinicalTrials.gov for clinical studies. Filter by condition, drug, status, and phase.',
|
|
778
|
+
parameters: {
|
|
779
|
+
query: { type: 'string', description: 'Search term (e.g., "pembrolizumab melanoma", "COVID-19 vaccine")', required: true },
|
|
780
|
+
status: { type: 'string', description: 'Filter: "recruiting", "completed", "active" (active, not recruiting), "enrolling" (enrolling by invitation)' },
|
|
781
|
+
phase: { type: 'string', description: 'Phase filter: "EARLY_PHASE1", "PHASE1", "PHASE2", "PHASE3", "PHASE4"' },
|
|
782
|
+
},
|
|
783
|
+
tier: 'free',
|
|
784
|
+
async execute(args) {
|
|
785
|
+
const query = String(args.query);
|
|
786
|
+
const status = args.status ? String(args.status) : '';
|
|
787
|
+
const phase = args.phase ? String(args.phase) : '';
|
|
788
|
+
try {
|
|
789
|
+
let url = `https://clinicaltrials.gov/api/v2/studies?query.term=${encodeURIComponent(query)}&pageSize=10`;
|
|
790
|
+
// Map user-friendly status to API values
|
|
791
|
+
if (status) {
|
|
792
|
+
const statusMap = {
|
|
793
|
+
recruiting: 'RECRUITING',
|
|
794
|
+
completed: 'COMPLETED',
|
|
795
|
+
active: 'ACTIVE_NOT_RECRUITING',
|
|
796
|
+
enrolling: 'ENROLLING_BY_INVITATION',
|
|
797
|
+
};
|
|
798
|
+
const mapped = statusMap[status.toLowerCase()] || status.toUpperCase();
|
|
799
|
+
url += `&filter.overallStatus=${mapped}`;
|
|
800
|
+
}
|
|
801
|
+
if (phase) {
|
|
802
|
+
url += `&filter.phase=${phase.toUpperCase()}`;
|
|
803
|
+
}
|
|
804
|
+
const res = await fetch(url, {
|
|
805
|
+
headers: { 'User-Agent': UA },
|
|
806
|
+
signal: AbortSignal.timeout(10000),
|
|
807
|
+
});
|
|
808
|
+
if (!res.ok)
|
|
809
|
+
return `ClinicalTrials.gov error: HTTP ${res.status}`;
|
|
810
|
+
const data = await res.json();
|
|
811
|
+
const studies = data.studies || [];
|
|
812
|
+
if (studies.length === 0)
|
|
813
|
+
return `No clinical trials found for "${query}".`;
|
|
814
|
+
const totalCount = data.totalCount || studies.length;
|
|
815
|
+
const output = [`## Clinical Trials: "${query}" (${totalCount} total)\n`];
|
|
816
|
+
for (const study of studies) {
|
|
817
|
+
const proto = study.protocolSection || {};
|
|
818
|
+
const id_module = proto.identificationModule || {};
|
|
819
|
+
const status_module = proto.statusModule || {};
|
|
820
|
+
const design_module = proto.designModule || {};
|
|
821
|
+
const desc_module = proto.descriptionModule || {};
|
|
822
|
+
const conditions_module = proto.conditionsModule || {};
|
|
823
|
+
const sponsor_module = proto.sponsorCollaboratorsModule || {};
|
|
824
|
+
const nctId = id_module.nctId || 'N/A';
|
|
825
|
+
const title = id_module.officialTitle || id_module.briefTitle || 'Untitled';
|
|
826
|
+
const overallStatus = status_module.overallStatus || 'N/A';
|
|
827
|
+
const phases = design_module.phases?.join(', ') || 'N/A';
|
|
828
|
+
const startDate = status_module.startDateStruct?.date || '';
|
|
829
|
+
const completionDate = status_module.completionDateStruct?.date || '';
|
|
830
|
+
const briefSummary = desc_module.briefSummary || '';
|
|
831
|
+
const conditions = conditions_module.conditions?.join(', ') || '';
|
|
832
|
+
const sponsor = sponsor_module.leadSponsor?.name || '';
|
|
833
|
+
let block = `### ${title.slice(0, 150)}\n`;
|
|
834
|
+
block += `**NCT ID:** [${nctId}](https://clinicaltrials.gov/study/${nctId})\n`;
|
|
835
|
+
block += `**Status:** ${overallStatus} | **Phase:** ${phases}\n`;
|
|
836
|
+
if (sponsor)
|
|
837
|
+
block += `**Sponsor:** ${sponsor}\n`;
|
|
838
|
+
if (conditions)
|
|
839
|
+
block += `**Conditions:** ${conditions}\n`;
|
|
840
|
+
if (startDate)
|
|
841
|
+
block += `**Dates:** ${startDate}${completionDate ? ` → ${completionDate}` : ''}\n`;
|
|
842
|
+
if (briefSummary) {
|
|
843
|
+
const truncated = briefSummary.length > 300 ? briefSummary.slice(0, 300) + '...' : briefSummary;
|
|
844
|
+
block += `\n> ${truncated}\n`;
|
|
845
|
+
}
|
|
846
|
+
output.push(block);
|
|
847
|
+
}
|
|
848
|
+
return output.join('\n---\n');
|
|
849
|
+
}
|
|
850
|
+
catch (e) {
|
|
851
|
+
return `Clinical trials search failed: ${e?.message || e}`;
|
|
852
|
+
}
|
|
853
|
+
},
|
|
854
|
+
});
|
|
855
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
856
|
+
// 10. Disease Info (Open Targets)
|
|
857
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
858
|
+
registerTool({
|
|
859
|
+
name: 'disease_info',
|
|
860
|
+
description: 'Look up disease information from Open Targets Platform. Returns associated genes, drugs, and therapeutic areas via GraphQL.',
|
|
861
|
+
parameters: {
|
|
862
|
+
disease: { type: 'string', description: 'Disease name (e.g., "lung cancer", "Alzheimer", "diabetes mellitus")', required: true },
|
|
863
|
+
},
|
|
864
|
+
tier: 'free',
|
|
865
|
+
async execute(args) {
|
|
866
|
+
const disease = String(args.disease);
|
|
867
|
+
try {
|
|
868
|
+
// Step 1: Search for disease ID
|
|
869
|
+
const searchQuery = `
|
|
870
|
+
query SearchDisease($q: String!) {
|
|
871
|
+
search(queryString: $q, entityNames: ["disease"], page: { index: 0, size: 3 }) {
|
|
872
|
+
hits {
|
|
873
|
+
id
|
|
874
|
+
entity
|
|
875
|
+
name
|
|
876
|
+
description
|
|
877
|
+
}
|
|
878
|
+
total
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
`;
|
|
882
|
+
const searchRes = await fetch('https://api.platform.opentargets.org/api/v4/graphql', {
|
|
883
|
+
method: 'POST',
|
|
884
|
+
headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
|
|
885
|
+
body: JSON.stringify({ query: searchQuery, variables: { q: disease } }),
|
|
886
|
+
signal: AbortSignal.timeout(10000),
|
|
887
|
+
});
|
|
888
|
+
if (!searchRes.ok)
|
|
889
|
+
return `Open Targets error: HTTP ${searchRes.status}`;
|
|
890
|
+
const searchData = await searchRes.json();
|
|
891
|
+
const hits = searchData.data?.search?.hits || [];
|
|
892
|
+
const diseaseHits = hits.filter((h) => h.entity === 'disease');
|
|
893
|
+
if (diseaseHits.length === 0)
|
|
894
|
+
return `No disease found for "${disease}" in Open Targets.`;
|
|
895
|
+
const output = [`## Disease Info: "${disease}"\n`];
|
|
896
|
+
for (const hit of diseaseHits.slice(0, 2)) {
|
|
897
|
+
const diseaseId = hit.id;
|
|
898
|
+
// Step 2: Get disease details with associations
|
|
899
|
+
const detailQuery = `
|
|
900
|
+
query DiseaseDetail($id: String!) {
|
|
901
|
+
disease(efoId: $id) {
|
|
902
|
+
id
|
|
903
|
+
name
|
|
904
|
+
description
|
|
905
|
+
therapeuticAreas {
|
|
906
|
+
id
|
|
907
|
+
name
|
|
908
|
+
}
|
|
909
|
+
synonyms {
|
|
910
|
+
terms
|
|
911
|
+
relation
|
|
912
|
+
}
|
|
913
|
+
knownDrugs(size: 10) {
|
|
914
|
+
uniqueTargetCount
|
|
915
|
+
uniqueDrugCount
|
|
916
|
+
rows {
|
|
917
|
+
drug {
|
|
918
|
+
id
|
|
919
|
+
name
|
|
920
|
+
drugType
|
|
921
|
+
maximumClinicalTrialPhase
|
|
922
|
+
}
|
|
923
|
+
mechanismOfAction
|
|
924
|
+
approvedIndications
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
associatedTargets(page: { index: 0, size: 10 }) {
|
|
928
|
+
count
|
|
929
|
+
rows {
|
|
930
|
+
target {
|
|
931
|
+
id
|
|
932
|
+
approvedSymbol
|
|
933
|
+
approvedName
|
|
934
|
+
}
|
|
935
|
+
score
|
|
936
|
+
datatypeScores {
|
|
937
|
+
componentId: id
|
|
938
|
+
score
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
`;
|
|
945
|
+
const detailRes = await fetch('https://api.platform.opentargets.org/api/v4/graphql', {
|
|
946
|
+
method: 'POST',
|
|
947
|
+
headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
|
|
948
|
+
body: JSON.stringify({ query: detailQuery, variables: { id: diseaseId } }),
|
|
949
|
+
signal: AbortSignal.timeout(10000),
|
|
950
|
+
});
|
|
951
|
+
if (!detailRes.ok)
|
|
952
|
+
continue;
|
|
953
|
+
const detailData = await detailRes.json();
|
|
954
|
+
const d = detailData.data?.disease;
|
|
955
|
+
if (!d)
|
|
956
|
+
continue;
|
|
957
|
+
let block = `### ${d.name}\n`;
|
|
958
|
+
block += `**EFO ID:** [${d.id}](https://platform.opentargets.org/disease/${d.id})\n`;
|
|
959
|
+
if (d.description) {
|
|
960
|
+
const truncated = d.description.length > 500 ? d.description.slice(0, 500) + '...' : d.description;
|
|
961
|
+
block += `\n${truncated}\n`;
|
|
962
|
+
}
|
|
963
|
+
// Therapeutic areas
|
|
964
|
+
if (d.therapeuticAreas?.length > 0) {
|
|
965
|
+
block += `\n**Therapeutic Areas:** ${d.therapeuticAreas.map((t) => t.name).join(', ')}\n`;
|
|
966
|
+
}
|
|
967
|
+
// Synonyms
|
|
968
|
+
if (d.synonyms?.length > 0) {
|
|
969
|
+
const exactSynonyms = d.synonyms
|
|
970
|
+
.filter((s) => s.relation === 'HAS_EXACT_SYNONYM')
|
|
971
|
+
.flatMap((s) => s.terms || [])
|
|
972
|
+
.slice(0, 8);
|
|
973
|
+
if (exactSynonyms.length > 0) {
|
|
974
|
+
block += `**Synonyms:** ${exactSynonyms.join(', ')}\n`;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
// Top associated genes
|
|
978
|
+
const targets = d.associatedTargets;
|
|
979
|
+
if (targets?.rows?.length > 0) {
|
|
980
|
+
block += `\n**Top Associated Genes** (${targets.count} total):\n`;
|
|
981
|
+
for (const row of targets.rows.slice(0, 8)) {
|
|
982
|
+
const t = row.target;
|
|
983
|
+
block += `- **${t.approvedSymbol}** (${t.approvedName}) — score: ${row.score.toFixed(3)}\n`;
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
// Known drugs
|
|
987
|
+
const drugs = d.knownDrugs;
|
|
988
|
+
if (drugs?.rows?.length > 0) {
|
|
989
|
+
block += `\n**Known Drugs** (${drugs.uniqueDrugCount} drugs, ${drugs.uniqueTargetCount} targets):\n`;
|
|
990
|
+
const seen = new Set();
|
|
991
|
+
for (const row of drugs.rows) {
|
|
992
|
+
const drugName = row.drug?.name || 'Unknown';
|
|
993
|
+
if (seen.has(drugName))
|
|
994
|
+
continue;
|
|
995
|
+
seen.add(drugName);
|
|
996
|
+
const phase = row.drug?.maximumClinicalTrialPhase ?? 'N/A';
|
|
997
|
+
const moa = row.mechanismOfAction || '';
|
|
998
|
+
block += `- **${drugName}** (phase ${phase}) — ${moa}\n`;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
output.push(block);
|
|
1002
|
+
}
|
|
1003
|
+
return output.join('\n---\n');
|
|
1004
|
+
}
|
|
1005
|
+
catch (e) {
|
|
1006
|
+
return `Disease info lookup failed: ${e?.message || e}`;
|
|
1007
|
+
}
|
|
1008
|
+
},
|
|
1009
|
+
});
|
|
1010
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
1011
|
+
// 11. Sequence Tools (Local analysis)
|
|
1012
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
1013
|
+
registerTool({
|
|
1014
|
+
name: 'sequence_tools',
|
|
1015
|
+
description: 'Local sequence analysis tools: GC content, reverse complement, translation (standard genetic code), ORF finding, motif search (regex), and molecular weight calculation. No API calls — runs instantly.',
|
|
1016
|
+
parameters: {
|
|
1017
|
+
sequence: { type: 'string', description: 'DNA/RNA/protein sequence (raw or FASTA format)', required: true },
|
|
1018
|
+
operation: { type: 'string', description: 'Operation: "gc_content", "reverse_complement", "translate", "find_orfs", "motif_search", "molecular_weight"', required: true },
|
|
1019
|
+
pattern: { type: 'string', description: 'Regex pattern for motif_search (e.g., "ATG[ATCG]{3,9}TAA")' },
|
|
1020
|
+
},
|
|
1021
|
+
tier: 'free',
|
|
1022
|
+
async execute(args) {
|
|
1023
|
+
const rawSeq = String(args.sequence).trim();
|
|
1024
|
+
const operation = String(args.operation);
|
|
1025
|
+
const pattern = args.pattern ? String(args.pattern) : '';
|
|
1026
|
+
// Clean sequence: remove FASTA header, whitespace, numbers
|
|
1027
|
+
const seq = rawSeq
|
|
1028
|
+
.split('\n')
|
|
1029
|
+
.filter(line => !line.startsWith('>'))
|
|
1030
|
+
.join('')
|
|
1031
|
+
.replace(/[\s\d]/g, '')
|
|
1032
|
+
.toUpperCase();
|
|
1033
|
+
if (seq.length === 0)
|
|
1034
|
+
return 'No valid sequence provided. Remove FASTA headers and whitespace.';
|
|
1035
|
+
const isDna = /^[ATCGN]+$/i.test(seq);
|
|
1036
|
+
const isRna = /^[AUCGN]+$/i.test(seq);
|
|
1037
|
+
const isProtein = /^[ACDEFGHIKLMNPQRSTVWY*]+$/i.test(seq) && !isDna;
|
|
1038
|
+
switch (operation) {
|
|
1039
|
+
case 'gc_content': {
|
|
1040
|
+
if (!isDna && !isRna)
|
|
1041
|
+
return 'GC content requires a DNA or RNA sequence (A, T/U, G, C, N).';
|
|
1042
|
+
const gc = (seq.match(/[GC]/gi) || []).length;
|
|
1043
|
+
const total = seq.replace(/N/gi, '').length;
|
|
1044
|
+
const pct = total > 0 ? ((gc / total) * 100).toFixed(2) : '0';
|
|
1045
|
+
const at = total - gc;
|
|
1046
|
+
let result = `## GC Content Analysis\n\n`;
|
|
1047
|
+
result += `**Sequence length:** ${seq.length} bp\n`;
|
|
1048
|
+
result += `**GC count:** ${gc} | **AT count:** ${at}\n`;
|
|
1049
|
+
result += `**GC%:** ${pct}%\n`;
|
|
1050
|
+
result += `**AT%:** ${(100 - parseFloat(pct)).toFixed(2)}%\n`;
|
|
1051
|
+
// Base composition
|
|
1052
|
+
const counts = {};
|
|
1053
|
+
for (const base of seq) {
|
|
1054
|
+
counts[base] = (counts[base] || 0) + 1;
|
|
1055
|
+
}
|
|
1056
|
+
result += `\n**Base composition:**\n`;
|
|
1057
|
+
for (const [base, count] of Object.entries(counts).sort()) {
|
|
1058
|
+
result += `- ${base}: ${count} (${((count / seq.length) * 100).toFixed(1)}%)\n`;
|
|
1059
|
+
}
|
|
1060
|
+
// Tm estimation (basic: 2*(A+T) + 4*(G+C) for short, or 64.9 + 41*(G+C-16.4)/N)
|
|
1061
|
+
if (seq.length <= 30) {
|
|
1062
|
+
const tm = 2 * at + 4 * gc;
|
|
1063
|
+
result += `\n**Estimated Tm (basic):** ${tm} C (for primers < 30 bp)\n`;
|
|
1064
|
+
}
|
|
1065
|
+
else {
|
|
1066
|
+
const tm = 64.9 + 41 * (gc - 16.4) / total;
|
|
1067
|
+
result += `\n**Estimated Tm (salt-adjusted):** ${tm.toFixed(1)} C\n`;
|
|
1068
|
+
}
|
|
1069
|
+
return result;
|
|
1070
|
+
}
|
|
1071
|
+
case 'reverse_complement': {
|
|
1072
|
+
if (!isDna && !isRna)
|
|
1073
|
+
return 'Reverse complement requires a DNA or RNA sequence.';
|
|
1074
|
+
const complementMap = isDna
|
|
1075
|
+
? { A: 'T', T: 'A', G: 'C', C: 'G', N: 'N' }
|
|
1076
|
+
: { A: 'U', U: 'A', G: 'C', C: 'G', N: 'N' };
|
|
1077
|
+
const complement = seq.split('').map(b => complementMap[b] || 'N').join('');
|
|
1078
|
+
const revComp = complement.split('').reverse().join('');
|
|
1079
|
+
let result = `## Reverse Complement\n\n`;
|
|
1080
|
+
result += `**Input (${isDna ? 'DNA' : 'RNA'}, ${seq.length} bp):**\n`;
|
|
1081
|
+
result += `5'-\`${seq.length > 80 ? seq.slice(0, 40) + '...' + seq.slice(-40) : seq}\`-3'\n\n`;
|
|
1082
|
+
result += `**Complement:**\n`;
|
|
1083
|
+
result += `3'-\`${complement.length > 80 ? complement.slice(0, 40) + '...' + complement.slice(-40) : complement}\`-5'\n\n`;
|
|
1084
|
+
result += `**Reverse complement:**\n`;
|
|
1085
|
+
result += `5'-\`${revComp.length > 80 ? revComp.slice(0, 40) + '...' + revComp.slice(-40) : revComp}\`-3'\n`;
|
|
1086
|
+
return result;
|
|
1087
|
+
}
|
|
1088
|
+
case 'translate': {
|
|
1089
|
+
let dnaSeq = seq;
|
|
1090
|
+
if (isRna)
|
|
1091
|
+
dnaSeq = seq.replace(/U/g, 'T');
|
|
1092
|
+
if (!(/^[ATCGN]+$/i.test(dnaSeq)))
|
|
1093
|
+
return 'Translation requires a DNA or RNA sequence.';
|
|
1094
|
+
// Translate all 3 reading frames
|
|
1095
|
+
const results = [`## Translation (Standard Genetic Code)\n`];
|
|
1096
|
+
results.push(`**Input:** ${dnaSeq.length} bp\n`);
|
|
1097
|
+
for (let frame = 0; frame < 3; frame++) {
|
|
1098
|
+
const protein = [];
|
|
1099
|
+
for (let i = frame; i + 2 < dnaSeq.length; i += 3) {
|
|
1100
|
+
const codon = dnaSeq.slice(i, i + 3);
|
|
1101
|
+
if (codon.includes('N')) {
|
|
1102
|
+
protein.push('X');
|
|
1103
|
+
}
|
|
1104
|
+
else {
|
|
1105
|
+
protein.push(CODON_TABLE[codon] || 'X');
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
const proteinStr = protein.join('');
|
|
1109
|
+
results.push(`**Frame +${frame + 1}:**`);
|
|
1110
|
+
results.push(`\`${proteinStr.length > 120 ? proteinStr.slice(0, 60) + '...' + proteinStr.slice(-60) : proteinStr}\``);
|
|
1111
|
+
results.push(`(${proteinStr.length} aa, ${(proteinStr.match(/\*/g) || []).length} stop codons)\n`);
|
|
1112
|
+
}
|
|
1113
|
+
// Highlight first ORF in frame +1
|
|
1114
|
+
const frame1 = [];
|
|
1115
|
+
for (let i = 0; i + 2 < dnaSeq.length; i += 3) {
|
|
1116
|
+
const codon = dnaSeq.slice(i, i + 3);
|
|
1117
|
+
frame1.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
|
|
1118
|
+
}
|
|
1119
|
+
const protStr = frame1.join('');
|
|
1120
|
+
const orfMatch = protStr.match(/M[^*]+/);
|
|
1121
|
+
if (orfMatch) {
|
|
1122
|
+
results.push(`**Longest ORF (frame +1):** ${orfMatch[0].length} aa starting at M`);
|
|
1123
|
+
results.push(`\`${orfMatch[0].slice(0, 80)}${orfMatch[0].length > 80 ? '...' : ''}\``);
|
|
1124
|
+
}
|
|
1125
|
+
return results.join('\n');
|
|
1126
|
+
}
|
|
1127
|
+
case 'find_orfs': {
|
|
1128
|
+
let dnaSeq = seq;
|
|
1129
|
+
if (isRna)
|
|
1130
|
+
dnaSeq = seq.replace(/U/g, 'T');
|
|
1131
|
+
if (!(/^[ATCGN]+$/i.test(dnaSeq)))
|
|
1132
|
+
return 'ORF finding requires a DNA or RNA sequence.';
|
|
1133
|
+
const minOrfLength = 30; // minimum 30 aa = 90 bp
|
|
1134
|
+
const orfs = [];
|
|
1135
|
+
// Search all 3 forward frames
|
|
1136
|
+
for (let frame = 0; frame < 3; frame++) {
|
|
1137
|
+
const protein = [];
|
|
1138
|
+
for (let i = frame; i + 2 < dnaSeq.length; i += 3) {
|
|
1139
|
+
const codon = dnaSeq.slice(i, i + 3);
|
|
1140
|
+
protein.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
|
|
1141
|
+
}
|
|
1142
|
+
const protStr = protein.join('');
|
|
1143
|
+
// Find all M...* ORFs
|
|
1144
|
+
const orfRe = /M[^*]*/g;
|
|
1145
|
+
let m;
|
|
1146
|
+
while ((m = orfRe.exec(protStr)) !== null) {
|
|
1147
|
+
if (m[0].length >= minOrfLength) {
|
|
1148
|
+
const aaStart = m.index;
|
|
1149
|
+
orfs.push({
|
|
1150
|
+
frame: frame + 1,
|
|
1151
|
+
start: frame + aaStart * 3 + 1, // 1-based nucleotide position
|
|
1152
|
+
end: frame + (aaStart + m[0].length) * 3,
|
|
1153
|
+
length: m[0].length,
|
|
1154
|
+
protein: m[0],
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
// Also search reverse complement
|
|
1160
|
+
const compMap = { A: 'T', T: 'A', G: 'C', C: 'G', N: 'N' };
|
|
1161
|
+
const rcSeq = dnaSeq.split('').map(b => compMap[b] || 'N').reverse().join('');
|
|
1162
|
+
for (let frame = 0; frame < 3; frame++) {
|
|
1163
|
+
const protein = [];
|
|
1164
|
+
for (let i = frame; i + 2 < rcSeq.length; i += 3) {
|
|
1165
|
+
const codon = rcSeq.slice(i, i + 3);
|
|
1166
|
+
protein.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
|
|
1167
|
+
}
|
|
1168
|
+
const protStr = protein.join('');
|
|
1169
|
+
const orfRe = /M[^*]*/g;
|
|
1170
|
+
let m;
|
|
1171
|
+
while ((m = orfRe.exec(protStr)) !== null) {
|
|
1172
|
+
if (m[0].length >= minOrfLength) {
|
|
1173
|
+
const aaStart = m.index;
|
|
1174
|
+
orfs.push({
|
|
1175
|
+
frame: -(frame + 1),
|
|
1176
|
+
start: dnaSeq.length - (frame + (aaStart + m[0].length) * 3) + 1,
|
|
1177
|
+
end: dnaSeq.length - (frame + aaStart * 3),
|
|
1178
|
+
length: m[0].length,
|
|
1179
|
+
protein: m[0],
|
|
1180
|
+
});
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
// Sort by length descending
|
|
1185
|
+
orfs.sort((a, b) => b.length - a.length);
|
|
1186
|
+
let result = `## Open Reading Frames (min 30 aa)\n\n`;
|
|
1187
|
+
result += `**Sequence length:** ${dnaSeq.length} bp\n`;
|
|
1188
|
+
result += `**ORFs found:** ${orfs.length}\n\n`;
|
|
1189
|
+
if (orfs.length === 0) {
|
|
1190
|
+
result += 'No ORFs >= 30 aa found. Try a longer sequence or lower threshold.\n';
|
|
1191
|
+
}
|
|
1192
|
+
else {
|
|
1193
|
+
for (const orf of orfs.slice(0, 15)) {
|
|
1194
|
+
result += `**Frame ${orf.frame > 0 ? '+' : ''}${orf.frame}** | nt ${orf.start}-${orf.end} | **${orf.length} aa** (${orf.length * 3} bp)\n`;
|
|
1195
|
+
result += `\`${orf.protein.slice(0, 60)}${orf.protein.length > 60 ? '...' : ''}\`\n\n`;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
return result;
|
|
1199
|
+
}
|
|
1200
|
+
case 'motif_search': {
|
|
1201
|
+
if (!pattern)
|
|
1202
|
+
return 'Motif search requires a pattern parameter (regex). Example: "ATG[ATCG]{3,9}TAA"';
|
|
1203
|
+
let re;
|
|
1204
|
+
try {
|
|
1205
|
+
re = new RegExp(pattern, 'gi');
|
|
1206
|
+
}
|
|
1207
|
+
catch (err) {
|
|
1208
|
+
return `Invalid regex pattern: ${err?.message || err}`;
|
|
1209
|
+
}
|
|
1210
|
+
const matches = [];
|
|
1211
|
+
let m;
|
|
1212
|
+
while ((m = re.exec(seq)) !== null) {
|
|
1213
|
+
matches.push({ start: m.index + 1, end: m.index + m[0].length, match: m[0] });
|
|
1214
|
+
// Prevent infinite loop on zero-length matches
|
|
1215
|
+
if (m[0].length === 0)
|
|
1216
|
+
re.lastIndex++;
|
|
1217
|
+
}
|
|
1218
|
+
let result = `## Motif Search\n\n`;
|
|
1219
|
+
result += `**Pattern:** \`${pattern}\`\n`;
|
|
1220
|
+
result += `**Sequence length:** ${seq.length}\n`;
|
|
1221
|
+
result += `**Matches found:** ${matches.length}\n\n`;
|
|
1222
|
+
if (matches.length === 0) {
|
|
1223
|
+
result += 'No matches found.\n';
|
|
1224
|
+
}
|
|
1225
|
+
else {
|
|
1226
|
+
for (const match of matches.slice(0, 50)) {
|
|
1227
|
+
const display = match.match.length > 60 ? match.match.slice(0, 60) + '...' : match.match;
|
|
1228
|
+
result += `- **Position ${match.start}-${match.end}:** \`${display}\`\n`;
|
|
1229
|
+
}
|
|
1230
|
+
if (matches.length > 50)
|
|
1231
|
+
result += `\n... and ${matches.length - 50} more matches.\n`;
|
|
1232
|
+
}
|
|
1233
|
+
return result;
|
|
1234
|
+
}
|
|
1235
|
+
case 'molecular_weight': {
|
|
1236
|
+
let result = `## Molecular Weight\n\n`;
|
|
1237
|
+
result += `**Sequence length:** ${seq.length}\n`;
|
|
1238
|
+
if (isProtein) {
|
|
1239
|
+
// Protein MW: sum of AA weights - (n-1) * water (18.02)
|
|
1240
|
+
let mw = 0;
|
|
1241
|
+
let unknowns = 0;
|
|
1242
|
+
for (const aa of seq) {
|
|
1243
|
+
if (aa === '*')
|
|
1244
|
+
continue; // stop codon
|
|
1245
|
+
if (AA_WEIGHTS[aa]) {
|
|
1246
|
+
mw += AA_WEIGHTS[aa];
|
|
1247
|
+
}
|
|
1248
|
+
else {
|
|
1249
|
+
unknowns++;
|
|
1250
|
+
mw += 128.16; // average AA weight
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
// Subtract water for peptide bonds
|
|
1254
|
+
const aas = seq.replace(/\*/g, '').length;
|
|
1255
|
+
mw -= (aas - 1) * 18.02;
|
|
1256
|
+
result += `**Type:** Protein (${aas} amino acids)\n`;
|
|
1257
|
+
result += `**Molecular Weight:** ${mw.toFixed(2)} Da (${(mw / 1000).toFixed(2)} kDa)\n`;
|
|
1258
|
+
if (unknowns > 0)
|
|
1259
|
+
result += `*Note: ${unknowns} unknown residues estimated at 128.16 Da (average)*\n`;
|
|
1260
|
+
// Extinction coefficient estimate (Pace method)
|
|
1261
|
+
const nTrp = (seq.match(/W/g) || []).length;
|
|
1262
|
+
const nTyr = (seq.match(/Y/g) || []).length;
|
|
1263
|
+
const nCys = (seq.match(/C/g) || []).length;
|
|
1264
|
+
const e280 = nTrp * 5500 + nTyr * 1490 + nCys * 125;
|
|
1265
|
+
result += `\n**Extinction coefficient (280nm):** ${e280} M\u207B\u00B9cm\u207B\u00B9\n`;
|
|
1266
|
+
result += `(${nTrp} Trp, ${nTyr} Tyr, ${nCys} Cys)\n`;
|
|
1267
|
+
// Isoelectric point estimate (very rough)
|
|
1268
|
+
const nAsp = (seq.match(/D/g) || []).length;
|
|
1269
|
+
const nGlu = (seq.match(/E/g) || []).length;
|
|
1270
|
+
const nHis = (seq.match(/H/g) || []).length;
|
|
1271
|
+
const nLys = (seq.match(/K/g) || []).length;
|
|
1272
|
+
const nArg = (seq.match(/R/g) || []).length;
|
|
1273
|
+
const negCharge = nAsp + nGlu;
|
|
1274
|
+
const posCharge = nHis + nLys + nArg;
|
|
1275
|
+
result += `\n**Charge residues:** ${posCharge} positive (K:${nLys} R:${nArg} H:${nHis}), ${negCharge} negative (D:${nAsp} E:${nGlu})\n`;
|
|
1276
|
+
}
|
|
1277
|
+
else if (isDna || isRna) {
|
|
1278
|
+
// Nucleic acid MW
|
|
1279
|
+
const weights = isDna ? DNA_WEIGHTS : { A: 347.2, U: 324.2, G: 363.2, C: 323.2 };
|
|
1280
|
+
let mw = 0;
|
|
1281
|
+
for (const base of seq) {
|
|
1282
|
+
mw += weights[base] || 330; // average for N
|
|
1283
|
+
}
|
|
1284
|
+
// Subtract water for phosphodiester bonds, add 5' phosphate
|
|
1285
|
+
mw -= (seq.length - 1) * 18.02;
|
|
1286
|
+
result += `**Type:** ${isDna ? 'DNA' : 'RNA'} (${seq.length} ${isDna ? 'bp' : 'nt'})\n`;
|
|
1287
|
+
result += `**Molecular Weight (ss):** ${mw.toFixed(2)} Da (${(mw / 1000).toFixed(2)} kDa)\n`;
|
|
1288
|
+
if (isDna) {
|
|
1289
|
+
result += `**Molecular Weight (ds):** ~${(mw * 2).toFixed(0)} Da (${((mw * 2) / 1000).toFixed(2)} kDa)\n`;
|
|
1290
|
+
}
|
|
1291
|
+
// Concentration conversion
|
|
1292
|
+
const ugPerOd = isDna ? (seq.length < 25 ? 33 : 50) : 40;
|
|
1293
|
+
result += `\n**1 OD260 =** ~${ugPerOd} ug/mL\n`;
|
|
1294
|
+
}
|
|
1295
|
+
else {
|
|
1296
|
+
result += 'Could not determine sequence type (DNA/RNA/protein). Check for invalid characters.\n';
|
|
1297
|
+
}
|
|
1298
|
+
return result;
|
|
1299
|
+
}
|
|
1300
|
+
default:
|
|
1301
|
+
return `Unknown operation: "${operation}". Supported: gc_content, reverse_complement, translate, find_orfs, motif_search, molecular_weight`;
|
|
1302
|
+
}
|
|
1303
|
+
},
|
|
1304
|
+
});
|
|
1305
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
1306
|
+
// 12. Ecology Data (GBIF Occurrences)
|
|
1307
|
+
// ════════════════════════════════════════════════════════════════════════
|
|
1308
|
+
registerTool({
|
|
1309
|
+
name: 'ecology_data',
|
|
1310
|
+
description: 'Search GBIF for biodiversity occurrence records. Find species observations with location, date, and collection data.',
|
|
1311
|
+
parameters: {
|
|
1312
|
+
species: { type: 'string', description: 'Scientific name (e.g., "Panthera tigris", "Quercus robur")', required: true },
|
|
1313
|
+
country: { type: 'string', description: 'ISO 3166-1 alpha-2 country code (e.g., "US", "BR", "AU")' },
|
|
1314
|
+
limit: { type: 'number', description: 'Max records (default 20, max 100)' },
|
|
1315
|
+
},
|
|
1316
|
+
tier: 'free',
|
|
1317
|
+
async execute(args) {
|
|
1318
|
+
const species = String(args.species);
|
|
1319
|
+
const country = args.country ? String(args.country).toUpperCase() : '';
|
|
1320
|
+
const limit = Math.min(typeof args.limit === 'number' ? args.limit : 20, 100);
|
|
1321
|
+
try {
|
|
1322
|
+
let url = `https://api.gbif.org/v1/occurrence/search?scientificName=${encodeURIComponent(species)}&limit=${limit}&hasCoordinate=true`;
|
|
1323
|
+
if (country)
|
|
1324
|
+
url += `&country=${country}`;
|
|
1325
|
+
const res = await fetch(url, {
|
|
1326
|
+
headers: { 'User-Agent': UA },
|
|
1327
|
+
signal: AbortSignal.timeout(10000),
|
|
1328
|
+
});
|
|
1329
|
+
if (!res.ok)
|
|
1330
|
+
return `GBIF error: HTTP ${res.status}`;
|
|
1331
|
+
const data = await res.json();
|
|
1332
|
+
const results_arr = data.results || [];
|
|
1333
|
+
const totalCount = data.count || 0;
|
|
1334
|
+
if (results_arr.length === 0)
|
|
1335
|
+
return `No GBIF occurrence records for "${species}"${country ? ` in ${country}` : ''}. Try the full scientific name.`;
|
|
1336
|
+
const output = [`## GBIF Occurrences: *${species}*\n`];
|
|
1337
|
+
output.push(`**Total records:** ${totalCount.toLocaleString()}${country ? ` (filtered: ${country})` : ''}\n`);
|
|
1338
|
+
// Summary stats
|
|
1339
|
+
const countries = new Map();
|
|
1340
|
+
const years = new Map();
|
|
1341
|
+
const basisOfRecord = new Map();
|
|
1342
|
+
for (const rec of results_arr) {
|
|
1343
|
+
const c = rec.country || 'Unknown';
|
|
1344
|
+
countries.set(c, (countries.get(c) || 0) + 1);
|
|
1345
|
+
if (rec.year)
|
|
1346
|
+
years.set(rec.year, (years.get(rec.year) || 0) + 1);
|
|
1347
|
+
const basis = rec.basisOfRecord || 'Unknown';
|
|
1348
|
+
basisOfRecord.set(basis, (basisOfRecord.get(basis) || 0) + 1);
|
|
1349
|
+
}
|
|
1350
|
+
// Country distribution
|
|
1351
|
+
const sortedCountries = [...countries.entries()].sort((a, b) => b[1] - a[1]);
|
|
1352
|
+
output.push(`**Countries in sample:** ${sortedCountries.map(([c, n]) => `${c} (${n})`).join(', ')}\n`);
|
|
1353
|
+
// Record types
|
|
1354
|
+
output.push(`**Record types:** ${[...basisOfRecord.entries()].map(([t, n]) => `${t.replace(/_/g, ' ')} (${n})`).join(', ')}\n`);
|
|
1355
|
+
// Year range
|
|
1356
|
+
const yearKeys = [...years.keys()].sort();
|
|
1357
|
+
if (yearKeys.length > 0) {
|
|
1358
|
+
output.push(`**Year range:** ${yearKeys[0]}–${yearKeys[yearKeys.length - 1]}\n`);
|
|
1359
|
+
}
|
|
1360
|
+
// Individual records
|
|
1361
|
+
output.push('\n### Records\n');
|
|
1362
|
+
for (const rec of results_arr.slice(0, 20)) {
|
|
1363
|
+
const name = rec.species || rec.scientificName || species;
|
|
1364
|
+
const lat = rec.decimalLatitude?.toFixed(4) || '?';
|
|
1365
|
+
const lon = rec.decimalLongitude?.toFixed(4) || '?';
|
|
1366
|
+
const date = rec.eventDate || rec.year || 'N/A';
|
|
1367
|
+
const loc = rec.locality || rec.stateProvince || '';
|
|
1368
|
+
const countryName = rec.country || '';
|
|
1369
|
+
const institution = rec.institutionCode || '';
|
|
1370
|
+
const basis = rec.basisOfRecord?.replace(/_/g, ' ') || '';
|
|
1371
|
+
let line = `- **${name}** — ${lat}, ${lon}`;
|
|
1372
|
+
if (countryName)
|
|
1373
|
+
line += ` (${countryName}${loc ? `, ${loc}` : ''})`;
|
|
1374
|
+
if (date)
|
|
1375
|
+
line += ` | ${date}`;
|
|
1376
|
+
if (basis)
|
|
1377
|
+
line += ` | ${basis}`;
|
|
1378
|
+
if (institution)
|
|
1379
|
+
line += ` | ${institution}`;
|
|
1380
|
+
if (rec.gbifID)
|
|
1381
|
+
line += ` | [GBIF:${rec.gbifID}](https://www.gbif.org/occurrence/${rec.gbifID})`;
|
|
1382
|
+
output.push(line);
|
|
1383
|
+
}
|
|
1384
|
+
return output.join('\n');
|
|
1385
|
+
}
|
|
1386
|
+
catch (e) {
|
|
1387
|
+
return `Ecology data lookup failed: ${e?.message || e}`;
|
|
1388
|
+
}
|
|
1389
|
+
},
|
|
1390
|
+
});
|
|
1391
|
+
}
|
|
1392
|
+
//# sourceMappingURL=lab-bio.js.map
|