@kernel.chat/kbot 3.40.0 → 3.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +5 -5
  2. package/dist/agent-teams.d.ts +1 -1
  3. package/dist/agent-teams.d.ts.map +1 -1
  4. package/dist/agent-teams.js +36 -3
  5. package/dist/agent-teams.js.map +1 -1
  6. package/dist/agents/specialists.d.ts.map +1 -1
  7. package/dist/agents/specialists.js +20 -0
  8. package/dist/agents/specialists.js.map +1 -1
  9. package/dist/channels/kbot-channel.js +8 -31
  10. package/dist/channels/kbot-channel.js.map +1 -1
  11. package/dist/cli.js +8 -8
  12. package/dist/digest.js +1 -1
  13. package/dist/digest.js.map +1 -1
  14. package/dist/email-service.d.ts.map +1 -1
  15. package/dist/email-service.js +1 -2
  16. package/dist/email-service.js.map +1 -1
  17. package/dist/episodic-memory.d.ts.map +1 -1
  18. package/dist/episodic-memory.js +14 -0
  19. package/dist/episodic-memory.js.map +1 -1
  20. package/dist/interactive-buttons.d.ts +90 -0
  21. package/dist/interactive-buttons.d.ts.map +1 -0
  22. package/dist/interactive-buttons.js +286 -0
  23. package/dist/interactive-buttons.js.map +1 -0
  24. package/dist/learned-router.d.ts.map +1 -1
  25. package/dist/learned-router.js +29 -0
  26. package/dist/learned-router.js.map +1 -1
  27. package/dist/memory-hotswap.d.ts +58 -0
  28. package/dist/memory-hotswap.d.ts.map +1 -0
  29. package/dist/memory-hotswap.js +288 -0
  30. package/dist/memory-hotswap.js.map +1 -0
  31. package/dist/personal-security.d.ts +142 -0
  32. package/dist/personal-security.d.ts.map +1 -0
  33. package/dist/personal-security.js +1151 -0
  34. package/dist/personal-security.js.map +1 -0
  35. package/dist/side-conversation.d.ts +58 -0
  36. package/dist/side-conversation.d.ts.map +1 -0
  37. package/dist/side-conversation.js +224 -0
  38. package/dist/side-conversation.js.map +1 -0
  39. package/dist/tools/email.d.ts.map +1 -1
  40. package/dist/tools/email.js +2 -3
  41. package/dist/tools/email.js.map +1 -1
  42. package/dist/tools/index.d.ts.map +1 -1
  43. package/dist/tools/index.js +7 -1
  44. package/dist/tools/index.js.map +1 -1
  45. package/dist/tools/lab-bio.d.ts +2 -0
  46. package/dist/tools/lab-bio.d.ts.map +1 -0
  47. package/dist/tools/lab-bio.js +1392 -0
  48. package/dist/tools/lab-bio.js.map +1 -0
  49. package/dist/tools/lab-chem.d.ts +2 -0
  50. package/dist/tools/lab-chem.d.ts.map +1 -0
  51. package/dist/tools/lab-chem.js +1257 -0
  52. package/dist/tools/lab-chem.js.map +1 -0
  53. package/dist/tools/lab-core.d.ts +2 -0
  54. package/dist/tools/lab-core.d.ts.map +1 -0
  55. package/dist/tools/lab-core.js +2452 -0
  56. package/dist/tools/lab-core.js.map +1 -0
  57. package/dist/tools/lab-data.d.ts +2 -0
  58. package/dist/tools/lab-data.d.ts.map +1 -0
  59. package/dist/tools/lab-data.js +2464 -0
  60. package/dist/tools/lab-data.js.map +1 -0
  61. package/dist/tools/lab-earth.d.ts +2 -0
  62. package/dist/tools/lab-earth.d.ts.map +1 -0
  63. package/dist/tools/lab-earth.js +1124 -0
  64. package/dist/tools/lab-earth.js.map +1 -0
  65. package/dist/tools/lab-math.d.ts +2 -0
  66. package/dist/tools/lab-math.d.ts.map +1 -0
  67. package/dist/tools/lab-math.js +3021 -0
  68. package/dist/tools/lab-math.js.map +1 -0
  69. package/dist/tools/lab-physics.d.ts +2 -0
  70. package/dist/tools/lab-physics.d.ts.map +1 -0
  71. package/dist/tools/lab-physics.js +2423 -0
  72. package/dist/tools/lab-physics.js.map +1 -0
  73. package/package.json +2 -3
@@ -0,0 +1,1392 @@
1
+ // kbot Life Sciences Tools — Bioinformatics, genomics, proteomics, clinical research
2
+ // Real API integrations with NCBI, UniProt, PDB, ChEMBL, Reactome, GBIF, Open Targets, ClinicalTrials.gov
3
+ // No external dependencies — all built on native fetch + regex XML parsing.
4
+ import { registerTool } from './index.js';
5
+ const UA = 'KBot/3.0 (Lab Tools)';
6
+ // ── NCBI rate limiter (max 3 requests/sec without API key) ──────────────
7
+ let lastNCBICall = 0;
8
+ async function ncbiThrottle() {
9
+ const now = Date.now();
10
+ const elapsed = now - lastNCBICall;
11
+ if (elapsed < 334) {
12
+ await new Promise(resolve => setTimeout(resolve, 334 - elapsed));
13
+ }
14
+ lastNCBICall = Date.now();
15
+ }
16
+ // ── XML helpers (regex-based, no external parser) ───────────────────────
17
+ function xmlTag(xml, tag) {
18
+ const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'i');
19
+ const m = xml.match(re);
20
+ return m ? m[1].trim() : '';
21
+ }
22
+ function xmlTagAll(xml, tag) {
23
+ const re = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`, 'gi');
24
+ const results = [];
25
+ let m;
26
+ while ((m = re.exec(xml)) !== null) {
27
+ results.push(m[1].trim());
28
+ }
29
+ return results;
30
+ }
31
+ // ── Standard genetic code codon table ───────────────────────────────────
32
+ const CODON_TABLE = {
33
+ TTT: 'F', TTC: 'F', TTA: 'L', TTG: 'L',
34
+ CTT: 'L', CTC: 'L', CTA: 'L', CTG: 'L',
35
+ ATT: 'I', ATC: 'I', ATA: 'I', ATG: 'M',
36
+ GTT: 'V', GTC: 'V', GTA: 'V', GTG: 'V',
37
+ TCT: 'S', TCC: 'S', TCA: 'S', TCG: 'S',
38
+ CCT: 'P', CCC: 'P', CCA: 'P', CCG: 'P',
39
+ ACT: 'T', ACC: 'T', ACA: 'T', ACG: 'T',
40
+ GCT: 'A', GCC: 'A', GCA: 'A', GCG: 'A',
41
+ TAT: 'Y', TAC: 'Y', TAA: '*', TAG: '*',
42
+ CAT: 'H', CAC: 'H', CAA: 'Q', CAG: 'Q',
43
+ AAT: 'N', AAC: 'N', AAA: 'K', AAG: 'K',
44
+ GAT: 'D', GAC: 'D', GAA: 'E', GAG: 'E',
45
+ TGT: 'C', TGC: 'C', TGA: '*', TGG: 'W',
46
+ CGT: 'R', CGC: 'R', CGA: 'R', CGG: 'R',
47
+ AGT: 'S', AGC: 'S', AGA: 'R', AGG: 'R',
48
+ GGT: 'G', GGC: 'G', GGA: 'G', GGG: 'G',
49
+ };
50
+ const AA_WEIGHTS = {
51
+ A: 89.09, R: 174.20, N: 132.12, D: 133.10, C: 121.16,
52
+ E: 147.13, Q: 146.15, G: 75.03, H: 155.16, I: 131.17,
53
+ L: 131.17, K: 146.19, M: 149.21, F: 165.19, P: 115.13,
54
+ S: 105.09, T: 119.12, W: 204.23, Y: 181.19, V: 117.15,
55
+ };
56
+ const DNA_WEIGHTS = {
57
+ A: 331.2, T: 322.2, G: 347.2, C: 307.2,
58
+ };
59
+ // ── Registration ────────────────────────────────────────────────────────
60
+ export function registerLabBioTools() {
61
+ // ════════════════════════════════════════════════════════════════════════
62
+ // 1. PubMed Search
63
+ // ════════════════════════════════════════════════════════════════════════
64
+ registerTool({
65
+ name: 'pubmed_search',
66
+ description: 'Search PubMed for biomedical literature via NCBI E-utilities. Returns titles, authors, journal, year, abstract, DOI, and PMID. Use MeSH terms for precise filtering.',
67
+ parameters: {
68
+ query: { type: 'string', description: 'Search query (e.g., "CRISPR cancer therapy")', required: true },
69
+ mesh_terms: { type: 'string', description: 'Optional MeSH terms to AND with query (e.g., "Neoplasms[MeSH]")' },
70
+ limit: { type: 'number', description: 'Max results (default 10, max 50)' },
71
+ sort: { type: 'string', description: 'Sort order: "relevance" (default) or "date"' },
72
+ },
73
+ tier: 'free',
74
+ async execute(args) {
75
+ const query = String(args.query);
76
+ const meshTerms = args.mesh_terms ? String(args.mesh_terms) : '';
77
+ const limit = Math.min(typeof args.limit === 'number' ? args.limit : 10, 50);
78
+ const sort = String(args.sort || 'relevance');
79
+ let fullQuery = query;
80
+ if (meshTerms)
81
+ fullQuery += ` AND ${meshTerms}`;
82
+ const sortParam = sort === 'date' ? '&sort=pub_date' : '&sort=relevance';
83
+ try {
84
+ // Step 1: esearch to get PMIDs
85
+ await ncbiThrottle();
86
+ const searchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=${limit}${sortParam}&term=${encodeURIComponent(fullQuery)}`;
87
+ const searchRes = await fetch(searchUrl, {
88
+ headers: { 'User-Agent': UA },
89
+ signal: AbortSignal.timeout(10000),
90
+ });
91
+ if (!searchRes.ok)
92
+ return `PubMed search error: HTTP ${searchRes.status}`;
93
+ const searchData = await searchRes.json();
94
+ const idList = searchData?.esearchresult?.idlist || [];
95
+ const totalCount = searchData?.esearchresult?.count || '0';
96
+ if (idList.length === 0)
97
+ return `No PubMed results for "${query}". Try broader terms or check MeSH vocabulary.`;
98
+ // Step 2: efetch to get article details
99
+ await ncbiThrottle();
100
+ const fetchUrl = `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=${idList.join(',')}`;
101
+ const fetchRes = await fetch(fetchUrl, {
102
+ headers: { 'User-Agent': UA },
103
+ signal: AbortSignal.timeout(10000),
104
+ });
105
+ if (!fetchRes.ok)
106
+ return `PubMed fetch error: HTTP ${fetchRes.status}`;
107
+ const xml = await fetchRes.text();
108
+ // Parse articles from XML
109
+ const articles = xmlTagAll(xml, 'PubmedArticle');
110
+ const results = [`## PubMed Results (${totalCount} total, showing ${articles.length})\n`];
111
+ for (const article of articles) {
112
+ const pmid = xmlTag(article, 'PMID');
113
+ const title = xmlTag(article, 'ArticleTitle').replace(/<[^>]+>/g, '');
114
+ const abstractText = xmlTag(article, 'AbstractText').replace(/<[^>]+>/g, '');
115
+ const journal = xmlTag(article, 'Title');
116
+ const year = xmlTag(article, 'Year');
117
+ // Authors
118
+ const authorNodes = xmlTagAll(article, 'Author');
119
+ const authors = authorNodes.slice(0, 5).map(a => {
120
+ const last = xmlTag(a, 'LastName');
121
+ const initials = xmlTag(a, 'Initials');
122
+ return last ? `${last} ${initials}` : xmlTag(a, 'CollectiveName');
123
+ }).filter(Boolean);
124
+ const authorStr = authors.join(', ') + (authorNodes.length > 5 ? ' et al.' : '');
125
+ // DOI
126
+ const articleIdList = xmlTagAll(article, 'ArticleId');
127
+ let doi = '';
128
+ for (const idBlock of articleIdList) {
129
+ if (idBlock.includes('doi')) {
130
+ // The DOI is the text content, but we need to check the IdType attribute
131
+ const doiMatch = article.match(/<ArticleId IdType="doi">([^<]+)<\/ArticleId>/i);
132
+ if (doiMatch)
133
+ doi = doiMatch[1];
134
+ break;
135
+ }
136
+ }
137
+ let entry = `### ${title}\n`;
138
+ entry += `**Authors:** ${authorStr || 'N/A'}\n`;
139
+ entry += `**Journal:** ${journal || 'N/A'} (${year || 'N/A'})\n`;
140
+ entry += `**PMID:** [${pmid}](https://pubmed.ncbi.nlm.nih.gov/${pmid}/)`;
141
+ if (doi)
142
+ entry += ` | **DOI:** [${doi}](https://doi.org/${doi})`;
143
+ entry += '\n';
144
+ if (abstractText) {
145
+ const truncated = abstractText.length > 500 ? abstractText.slice(0, 500) + '...' : abstractText;
146
+ entry += `\n> ${truncated}\n`;
147
+ }
148
+ results.push(entry);
149
+ }
150
+ return results.join('\n---\n');
151
+ }
152
+ catch (e) {
153
+ return `PubMed search failed: ${e?.message || e}`;
154
+ }
155
+ },
156
+ });
157
+ // ════════════════════════════════════════════════════════════════════════
158
+ // 2. Gene Lookup
159
+ // ════════════════════════════════════════════════════════════════════════
160
+ registerTool({
161
+ name: 'gene_lookup',
162
+ description: 'Look up gene information by symbol or name using MyGene.info. Returns function, chromosome location, aliases, and associated diseases.',
163
+ parameters: {
164
+ gene: { type: 'string', description: 'Gene symbol or name (e.g., "TP53", "BRCA1", "tumor protein p53")', required: true },
165
+ organism: { type: 'string', description: 'Organism (default: "human"). Also: "mouse", "rat", "zebrafish", etc.' },
166
+ },
167
+ tier: 'free',
168
+ async execute(args) {
169
+ const gene = String(args.gene);
170
+ const organism = String(args.organism || 'human');
171
+ try {
172
+ const fields = 'symbol,name,summary,genomic_pos,alias,type_of_gene,entrezgene,ensembl.gene,taxid,generif,pathway.kegg';
173
+ const url = `https://mygene.info/v3/query?q=${encodeURIComponent(gene)}&species=${encodeURIComponent(organism)}&fields=${fields}&size=5`;
174
+ const res = await fetch(url, {
175
+ headers: { 'User-Agent': UA },
176
+ signal: AbortSignal.timeout(10000),
177
+ });
178
+ if (!res.ok)
179
+ return `MyGene.info error: HTTP ${res.status}`;
180
+ const data = await res.json();
181
+ const hits = data.hits || [];
182
+ if (hits.length === 0)
183
+ return `No gene found for "${gene}" in ${organism}. Try the official HGNC symbol.`;
184
+ const results = [`## Gene Lookup: "${gene}" (${organism})\n`];
185
+ for (const hit of hits.slice(0, 3)) {
186
+ let entry = `### ${hit.symbol || gene} — ${hit.name || 'Unknown'}\n`;
187
+ entry += `**Type:** ${hit.type_of_gene || 'N/A'}\n`;
188
+ if (hit.entrezgene)
189
+ entry += `**Entrez ID:** [${hit.entrezgene}](https://www.ncbi.nlm.nih.gov/gene/${hit.entrezgene})\n`;
190
+ if (hit.ensembl?.gene)
191
+ entry += `**Ensembl:** ${hit.ensembl.gene}\n`;
192
+ // Genomic position
193
+ if (hit.genomic_pos) {
194
+ const pos = Array.isArray(hit.genomic_pos) ? hit.genomic_pos[0] : hit.genomic_pos;
195
+ if (pos)
196
+ entry += `**Location:** Chr${pos.chr}:${pos.start?.toLocaleString()}-${pos.end?.toLocaleString()} (${pos.strand > 0 ? '+' : '-'} strand)\n`;
197
+ }
198
+ // Aliases
199
+ if (hit.alias) {
200
+ const aliases = Array.isArray(hit.alias) ? hit.alias : [hit.alias];
201
+ entry += `**Aliases:** ${aliases.slice(0, 10).join(', ')}\n`;
202
+ }
203
+ // Summary
204
+ if (hit.summary) {
205
+ const truncated = hit.summary.length > 600 ? hit.summary.slice(0, 600) + '...' : hit.summary;
206
+ entry += `\n**Summary:** ${truncated}\n`;
207
+ }
208
+ // Pathways
209
+ if (hit.pathway?.kegg) {
210
+ const pathways = Array.isArray(hit.pathway.kegg) ? hit.pathway.kegg : [hit.pathway.kegg];
211
+ entry += `\n**KEGG Pathways:**\n`;
212
+ for (const p of pathways.slice(0, 5)) {
213
+ entry += `- ${p.name || p.id}\n`;
214
+ }
215
+ }
216
+ results.push(entry);
217
+ }
218
+ return results.join('\n---\n');
219
+ }
220
+ catch (e) {
221
+ return `Gene lookup failed: ${e?.message || e}`;
222
+ }
223
+ },
224
+ });
225
+ // ════════════════════════════════════════════════════════════════════════
226
+ // 3. Protein Search
227
+ // ════════════════════════════════════════════════════════════════════════
228
+ registerTool({
229
+ name: 'protein_search',
230
+ description: 'Search UniProt for protein information. Returns sequence length, function, subcellular location, and GO terms.',
231
+ parameters: {
232
+ query: { type: 'string', description: 'Protein name, gene symbol, or keyword (e.g., "insulin", "P53_HUMAN")', required: true },
233
+ organism: { type: 'string', description: 'Organism filter (e.g., "Homo sapiens", "Mus musculus")' },
234
+ reviewed: { type: 'boolean', description: 'Only reviewed (Swiss-Prot) entries (default: true)' },
235
+ },
236
+ tier: 'free',
237
+ async execute(args) {
238
+ const query = String(args.query);
239
+ const organism = args.organism ? String(args.organism) : '';
240
+ const reviewed = args.reviewed !== false;
241
+ try {
242
+ let fullQuery = query;
243
+ if (organism)
244
+ fullQuery += ` AND organism_name:"${organism}"`;
245
+ if (reviewed)
246
+ fullQuery += ' AND reviewed:true';
247
+ const url = `https://rest.uniprot.org/uniprotkb/search?query=${encodeURIComponent(fullQuery)}&format=json&size=5`;
248
+ const res = await fetch(url, {
249
+ headers: { 'User-Agent': UA },
250
+ signal: AbortSignal.timeout(10000),
251
+ });
252
+ if (!res.ok)
253
+ return `UniProt error: HTTP ${res.status}`;
254
+ const data = await res.json();
255
+ const results_arr = data.results || [];
256
+ if (results_arr.length === 0)
257
+ return `No UniProt results for "${query}". Try a broader query or set reviewed=false.`;
258
+ const output = [`## UniProt Search: "${query}"\n`];
259
+ for (const entry of results_arr) {
260
+ const accession = entry.primaryAccession || 'N/A';
261
+ const entryName = entry.uniProtkbId || '';
262
+ const proteinName = entry.proteinDescription?.recommendedName?.fullName?.value
263
+ || entry.proteinDescription?.submittedName?.[0]?.fullName?.value
264
+ || 'Unknown';
265
+ const organism_name = entry.organism?.scientificName || '';
266
+ const seqLen = entry.sequence?.length || 0;
267
+ const seqMW = entry.sequence?.molWeight || 0;
268
+ let block = `### ${proteinName}\n`;
269
+ block += `**Accession:** [${accession}](https://www.uniprot.org/uniprot/${accession}) (${entryName})\n`;
270
+ block += `**Organism:** ${organism_name}\n`;
271
+ block += `**Sequence:** ${seqLen} aa | ${(seqMW / 1000).toFixed(1)} kDa\n`;
272
+ // Function
273
+ const funcComments = (entry.comments || []).filter((c) => c.commentType === 'FUNCTION');
274
+ if (funcComments.length > 0) {
275
+ const funcText = funcComments[0].texts?.[0]?.value || '';
276
+ if (funcText) {
277
+ const truncated = funcText.length > 400 ? funcText.slice(0, 400) + '...' : funcText;
278
+ block += `\n**Function:** ${truncated}\n`;
279
+ }
280
+ }
281
+ // Subcellular location
282
+ const locComments = (entry.comments || []).filter((c) => c.commentType === 'SUBCELLULAR LOCATION');
283
+ if (locComments.length > 0) {
284
+ const locations = locComments[0].subcellularLocations?.map((sl) => sl.location?.value).filter(Boolean) || [];
285
+ if (locations.length > 0)
286
+ block += `**Subcellular Location:** ${locations.join(', ')}\n`;
287
+ }
288
+ // GO terms
289
+ const goTerms = (entry.uniProtKBCrossReferences || []).filter((x) => x.database === 'GO');
290
+ if (goTerms.length > 0) {
291
+ const goGroups = { F: [], P: [], C: [] };
292
+ for (const go of goTerms.slice(0, 20)) {
293
+ const name = go.properties?.find((p) => p.key === 'GoTerm')?.value || go.id;
294
+ if (name.startsWith('F:'))
295
+ goGroups.F.push(name.slice(2));
296
+ else if (name.startsWith('P:'))
297
+ goGroups.P.push(name.slice(2));
298
+ else if (name.startsWith('C:'))
299
+ goGroups.C.push(name.slice(2));
300
+ }
301
+ if (goGroups.F.length > 0)
302
+ block += `**Molecular Function:** ${goGroups.F.slice(0, 5).join('; ')}\n`;
303
+ if (goGroups.P.length > 0)
304
+ block += `**Biological Process:** ${goGroups.P.slice(0, 5).join('; ')}\n`;
305
+ if (goGroups.C.length > 0)
306
+ block += `**Cellular Component:** ${goGroups.C.slice(0, 5).join('; ')}\n`;
307
+ }
308
+ output.push(block);
309
+ }
310
+ return output.join('\n---\n');
311
+ }
312
+ catch (e) {
313
+ return `Protein search failed: ${e?.message || e}`;
314
+ }
315
+ },
316
+ });
317
+ // ════════════════════════════════════════════════════════════════════════
318
+ // 4. Protein Structure (PDB)
319
+ // ════════════════════════════════════════════════════════════════════════
320
+ registerTool({
321
+ name: 'protein_structure',
322
+ description: 'Fetch protein 3D structure info from RCSB PDB. Get resolution, experimental method, ligands, and chain details by PDB ID or text search.',
323
+ parameters: {
324
+ pdb_id: { type: 'string', description: 'PDB ID (e.g., "1TUP", "6LU7"). If provided, fetches directly.' },
325
+ query: { type: 'string', description: 'Text search query (e.g., "p53 DNA binding domain"). Used if pdb_id not given.' },
326
+ },
327
+ tier: 'free',
328
+ async execute(args) {
329
+ const pdbId = args.pdb_id ? String(args.pdb_id).toUpperCase() : '';
330
+ const query = args.query ? String(args.query) : '';
331
+ if (!pdbId && !query)
332
+ return 'Provide either pdb_id or query to search PDB structures.';
333
+ try {
334
+ let ids = [];
335
+ if (pdbId) {
336
+ ids = [pdbId];
337
+ }
338
+ else {
339
+ // Text search via RCSB search API
340
+ const searchBody = JSON.stringify({
341
+ query: {
342
+ type: 'terminal',
343
+ service: 'full_text',
344
+ parameters: { value: query },
345
+ },
346
+ return_type: 'entry',
347
+ request_options: { paginate: { start: 0, rows: 5 } },
348
+ });
349
+ const searchRes = await fetch('https://search.rcsb.org/rcsbsearch/v2/query', {
350
+ method: 'POST',
351
+ headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
352
+ body: searchBody,
353
+ signal: AbortSignal.timeout(10000),
354
+ });
355
+ if (!searchRes.ok)
356
+ return `PDB search error: HTTP ${searchRes.status}`;
357
+ const searchData = await searchRes.json();
358
+ ids = (searchData.result_set || []).map((r) => r.identifier).slice(0, 5);
359
+ if (ids.length === 0)
360
+ return `No PDB structures found for "${query}".`;
361
+ }
362
+ const output = [`## PDB Structure${ids.length > 1 ? 's' : ''}\n`];
363
+ for (const id of ids) {
364
+ const res = await fetch(`https://data.rcsb.org/rest/v1/core/entry/${id}`, {
365
+ headers: { 'User-Agent': UA },
366
+ signal: AbortSignal.timeout(10000),
367
+ });
368
+ if (!res.ok) {
369
+ output.push(`**${id}**: Not found (HTTP ${res.status})`);
370
+ continue;
371
+ }
372
+ const data = await res.json();
373
+ const title = data.struct?.title || 'No title';
374
+ const method = data.exptl?.[0]?.method || 'N/A';
375
+ const resolution = data.rcsb_entry_info?.resolution_combined?.[0];
376
+ const deposited = data.rcsb_accession_info?.deposit_date || '';
377
+ const polymerCount = data.rcsb_entry_info?.polymer_entity_count || 0;
378
+ const nonPolymerCount = data.rcsb_entry_info?.nonpolymer_entity_count || 0;
379
+ const citation = data.rcsb_primary_citation;
380
+ let block = `### [${id}](https://www.rcsb.org/structure/${id}) — ${title}\n`;
381
+ block += `**Method:** ${method}`;
382
+ if (resolution)
383
+ block += ` | **Resolution:** ${resolution} A`;
384
+ block += '\n';
385
+ block += `**Deposited:** ${deposited}\n`;
386
+ block += `**Entities:** ${polymerCount} polymer, ${nonPolymerCount} non-polymer (ligands/ions)\n`;
387
+ // Polymer entities (chains)
388
+ if (data.rcsb_entry_info?.polymer_entity_count_protein) {
389
+ block += `**Protein chains:** ${data.rcsb_entry_info.polymer_entity_count_protein}\n`;
390
+ }
391
+ if (data.rcsb_entry_info?.polymer_entity_count_nucleic_acid) {
392
+ block += `**Nucleic acid chains:** ${data.rcsb_entry_info.polymer_entity_count_nucleic_acid}\n`;
393
+ }
394
+ // Primary citation
395
+ if (citation) {
396
+ block += `\n**Citation:** ${citation.title || ''}\n`;
397
+ const authors = citation.rcsb_authors?.slice(0, 3).join(', ') || '';
398
+ if (authors)
399
+ block += `*${authors}${citation.rcsb_authors?.length > 3 ? ' et al.' : ''}* `;
400
+ if (citation.pdbx_database_id_journal)
401
+ block += `${citation.pdbx_database_id_journal} `;
402
+ if (citation.year)
403
+ block += `(${citation.year})`;
404
+ if (citation.pdbx_database_id_DOI)
405
+ block += ` DOI: ${citation.pdbx_database_id_DOI}`;
406
+ block += '\n';
407
+ }
408
+ output.push(block);
409
+ }
410
+ return output.join('\n---\n');
411
+ }
412
+ catch (e) {
413
+ return `PDB lookup failed: ${e?.message || e}`;
414
+ }
415
+ },
416
+ });
417
+ // ════════════════════════════════════════════════════════════════════════
418
+ // 5. BLAST Search
419
+ // ════════════════════════════════════════════════════════════════════════
420
+ registerTool({
421
+ name: 'blast_search',
422
+ description: 'Submit a sequence for NCBI BLAST homology search. Supports blastn (nucleotide), blastp (protein), blastx (translated). Async — submits job, polls for results.',
423
+ parameters: {
424
+ sequence: { type: 'string', description: 'Nucleotide or protein sequence (FASTA or raw)', required: true },
425
+ program: { type: 'string', description: 'BLAST program: "blastn", "blastp", or "blastx"', required: true },
426
+ database: { type: 'string', description: 'Database: "nr" (non-redundant protein), "nt" (nucleotide), "swissprot"', required: true },
427
+ },
428
+ tier: 'free',
429
+ timeout: 180_000,
430
+ async execute(args) {
431
+ const sequence = String(args.sequence).trim();
432
+ const program = String(args.program || 'blastn');
433
+ const database = String(args.database || 'nr');
434
+ if (sequence.length < 10)
435
+ return 'Sequence too short for BLAST. Provide at least 10 residues/bases.';
436
+ // Clean sequence: remove FASTA header if present
437
+ const cleanSeq = sequence.split('\n').filter(line => !line.startsWith('>')).join('');
438
+ try {
439
+ // Step 1: Submit BLAST job
440
+ await ncbiThrottle();
441
+ const submitParams = new URLSearchParams({
442
+ CMD: 'Put',
443
+ PROGRAM: program,
444
+ DATABASE: database,
445
+ QUERY: cleanSeq,
446
+ FORMAT_TYPE: 'XML',
447
+ HITLIST_SIZE: '10',
448
+ });
449
+ const submitRes = await fetch('https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi', {
450
+ method: 'POST',
451
+ headers: { 'User-Agent': UA, 'Content-Type': 'application/x-www-form-urlencoded' },
452
+ body: submitParams.toString(),
453
+ signal: AbortSignal.timeout(30000),
454
+ });
455
+ if (!submitRes.ok)
456
+ return `BLAST submission failed: HTTP ${submitRes.status}`;
457
+ const submitText = await submitRes.text();
458
+ // Extract RID from response
459
+ const ridMatch = submitText.match(/RID\s*=\s*(\S+)/);
460
+ if (!ridMatch)
461
+ return `BLAST submission failed: could not get Request ID.\n\nResponse excerpt:\n${submitText.slice(0, 500)}`;
462
+ const rid = ridMatch[1];
463
+ // Extract estimated wait time
464
+ const rtoeMatch = submitText.match(/RTOE\s*=\s*(\d+)/);
465
+ const rtoe = rtoeMatch ? parseInt(rtoeMatch[1], 10) : 15;
466
+ // Step 2: Poll for results
467
+ const startTime = Date.now();
468
+ const maxWait = 120_000; // 2 minutes max polling
469
+ let waitTime = Math.min(rtoe * 1000, 15000); // Start with estimated wait, cap at 15s
470
+ // Initial wait before first poll
471
+ await new Promise(resolve => setTimeout(resolve, Math.min(waitTime, 10000)));
472
+ let resultXml = '';
473
+ while (Date.now() - startTime < maxWait) {
474
+ await ncbiThrottle();
475
+ const pollUrl = `https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=XML&RID=${rid}`;
476
+ const pollRes = await fetch(pollUrl, {
477
+ headers: { 'User-Agent': UA },
478
+ signal: AbortSignal.timeout(120_000),
479
+ });
480
+ if (!pollRes.ok) {
481
+ await new Promise(resolve => setTimeout(resolve, 5000));
482
+ continue;
483
+ }
484
+ const pollText = await pollRes.text();
485
+ // Check status
486
+ if (pollText.includes('Status=WAITING')) {
487
+ await new Promise(resolve => setTimeout(resolve, 5000));
488
+ continue;
489
+ }
490
+ if (pollText.includes('Status=FAILED'))
491
+ return `BLAST job failed (RID: ${rid}). The sequence may be invalid for ${program}.`;
492
+ if (pollText.includes('Status=UNKNOWN'))
493
+ return `BLAST job expired or unknown (RID: ${rid}).`;
494
+ // If we have actual results (XML with hits)
495
+ if (pollText.includes('<BlastOutput>') || pollText.includes('<Hit>')) {
496
+ resultXml = pollText;
497
+ break;
498
+ }
499
+ // Still processing
500
+ await new Promise(resolve => setTimeout(resolve, 5000));
501
+ }
502
+ if (!resultXml)
503
+ return `BLAST search timed out after ${Math.round(maxWait / 1000)}s. RID: ${rid} — check manually at https://blast.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&RID=${rid}`;
504
+ // Step 3: Parse results
505
+ const hits = xmlTagAll(resultXml, 'Hit');
506
+ if (hits.length === 0)
507
+ return `BLAST completed but found no significant hits for your ${program} search against ${database}.`;
508
+ const output = [`## BLAST Results (${program} vs ${database})\n**RID:** ${rid} | **Hits:** ${hits.length}\n`];
509
+ for (const hit of hits.slice(0, 10)) {
510
+ const hitNum = xmlTag(hit, 'Hit_num');
511
+ const hitDef = xmlTag(hit, 'Hit_def').slice(0, 120);
512
+ const hitAccession = xmlTag(hit, 'Hit_accession');
513
+ const hitLen = xmlTag(hit, 'Hit_len');
514
+ // Best HSP
515
+ const hsps = xmlTagAll(hit, 'Hsp');
516
+ const hsp = hsps[0] || '';
517
+ const evalue = xmlTag(hsp, 'Hsp_evalue');
518
+ const bitScore = xmlTag(hsp, 'Hsp_bit-score');
519
+ const identity = xmlTag(hsp, 'Hsp_identity');
520
+ const alignLen = xmlTag(hsp, 'Hsp_align-len');
521
+ const identPct = alignLen ? ((parseInt(identity, 10) / parseInt(alignLen, 10)) * 100).toFixed(1) : 'N/A';
522
+ let block = `**${hitNum}. ${hitDef}**\n`;
523
+ block += `Accession: ${hitAccession} | Length: ${hitLen}\n`;
524
+ block += `E-value: ${evalue} | Bit score: ${bitScore} | Identity: ${identPct}% (${identity}/${alignLen})\n`;
525
+ output.push(block);
526
+ }
527
+ return output.join('\n---\n');
528
+ }
529
+ catch (e) {
530
+ return `BLAST search failed: ${e?.message || e}`;
531
+ }
532
+ },
533
+ });
534
+ // ════════════════════════════════════════════════════════════════════════
535
+ // 6. Drug Lookup (ChEMBL)
536
+ // ════════════════════════════════════════════════════════════════════════
537
+ registerTool({
538
+ name: 'drug_lookup',
539
+ description: 'Search ChEMBL for drugs and compounds. Returns targets, mechanism of action, clinical phase, and molecular properties.',
540
+ parameters: {
541
+ query: { type: 'string', description: 'Drug name, target, or mechanism (e.g., "imatinib", "EGFR inhibitor")', required: true },
542
+ search_type: { type: 'string', description: 'Search type: "name" (default), "target", or "mechanism"' },
543
+ },
544
+ tier: 'free',
545
+ async execute(args) {
546
+ const query = String(args.query);
547
+ const searchType = String(args.search_type || 'name');
548
+ try {
549
+ let url;
550
+ if (searchType === 'target') {
551
+ url = `https://www.ebi.ac.uk/chembl/api/data/target/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
552
+ }
553
+ else if (searchType === 'mechanism') {
554
+ url = `https://www.ebi.ac.uk/chembl/api/data/mechanism/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
555
+ }
556
+ else {
557
+ url = `https://www.ebi.ac.uk/chembl/api/data/molecule/search?q=${encodeURIComponent(query)}&format=json&limit=10`;
558
+ }
559
+ const res = await fetch(url, {
560
+ headers: { 'User-Agent': UA },
561
+ signal: AbortSignal.timeout(10000),
562
+ });
563
+ if (!res.ok)
564
+ return `ChEMBL error: HTTP ${res.status}`;
565
+ const data = await res.json();
566
+ if (searchType === 'target') {
567
+ const targets = data.targets || [];
568
+ if (targets.length === 0)
569
+ return `No targets found for "${query}" in ChEMBL.`;
570
+ const output = [`## ChEMBL Targets for "${query}"\n`];
571
+ for (const t of targets.slice(0, 5)) {
572
+ let block = `### ${t.pref_name || 'Unknown'}\n`;
573
+ block += `**ChEMBL ID:** ${t.target_chembl_id || 'N/A'}\n`;
574
+ block += `**Type:** ${t.target_type || 'N/A'}\n`;
575
+ block += `**Organism:** ${t.organism || 'N/A'}\n`;
576
+ if (t.target_components?.[0]?.accession) {
577
+ block += `**UniProt:** ${t.target_components[0].accession}\n`;
578
+ }
579
+ output.push(block);
580
+ }
581
+ return output.join('\n---\n');
582
+ }
583
+ if (searchType === 'mechanism') {
584
+ const mechanisms = data.mechanisms || [];
585
+ if (mechanisms.length === 0)
586
+ return `No mechanisms found for "${query}" in ChEMBL.`;
587
+ const output = [`## ChEMBL Mechanisms: "${query}"\n`];
588
+ for (const m of mechanisms.slice(0, 10)) {
589
+ let block = `**${m.molecule_chembl_id}** → ${m.target_chembl_id || 'N/A'}\n`;
590
+ block += `Mechanism: ${m.mechanism_of_action || 'N/A'}\n`;
591
+ block += `Action type: ${m.action_type || 'N/A'}\n`;
592
+ if (m.max_phase !== undefined)
593
+ block += `Max phase: ${m.max_phase}\n`;
594
+ output.push(block);
595
+ }
596
+ return output.join('\n---\n');
597
+ }
598
+ // Default: molecule search
599
+ const molecules = data.molecules || [];
600
+ if (molecules.length === 0)
601
+ return `No molecules found for "${query}" in ChEMBL.`;
602
+ const output = [`## ChEMBL Molecules: "${query}"\n`];
603
+ for (const mol of molecules.slice(0, 5)) {
604
+ const name = mol.pref_name || mol.molecule_chembl_id || 'Unknown';
605
+ const chemblId = mol.molecule_chembl_id || 'N/A';
606
+ const maxPhase = mol.max_phase !== undefined ? mol.max_phase : 'N/A';
607
+ const type = mol.molecule_type || 'N/A';
608
+ const props = mol.molecule_properties || {};
609
+ let block = `### ${name}\n`;
610
+ block += `**ChEMBL ID:** [${chemblId}](https://www.ebi.ac.uk/chembl/compound_report_card/${chemblId}/)\n`;
611
+ block += `**Type:** ${type} | **Max Clinical Phase:** ${maxPhase}\n`;
612
+ if (mol.first_approval)
613
+ block += `**First Approval:** ${mol.first_approval}\n`;
614
+ // Molecular properties
615
+ if (props.full_mwt)
616
+ block += `**MW:** ${props.full_mwt} Da`;
617
+ if (props.alogp)
618
+ block += ` | **ALogP:** ${props.alogp}`;
619
+ if (props.hba)
620
+ block += ` | **HBA:** ${props.hba}`;
621
+ if (props.hbd)
622
+ block += ` | **HBD:** ${props.hbd}`;
623
+ if (props.psa)
624
+ block += ` | **PSA:** ${props.psa}`;
625
+ if (props.full_mwt)
626
+ block += '\n';
627
+ if (props.ro5_violations !== undefined)
628
+ block += `**Lipinski violations:** ${props.ro5_violations}\n`;
629
+ if (mol.molecule_structures?.canonical_smiles) {
630
+ block += `**SMILES:** \`${mol.molecule_structures.canonical_smiles.slice(0, 100)}\`\n`;
631
+ }
632
+ output.push(block);
633
+ }
634
+ return output.join('\n---\n');
635
+ }
636
+ catch (e) {
637
+ return `Drug lookup failed: ${e?.message || e}`;
638
+ }
639
+ },
640
+ });
641
+ // ════════════════════════════════════════════════════════════════════════
642
+ // 7. Pathway Search (Reactome)
643
+ // ════════════════════════════════════════════════════════════════════════
644
+ registerTool({
645
+ name: 'pathway_search',
646
+ description: 'Search Reactome for biological pathways. Returns pathway names, species, summaries, and hierarchical relationships.',
647
+ parameters: {
648
+ query: { type: 'string', description: 'Pathway name or keyword (e.g., "apoptosis", "glycolysis", "MAPK signaling")', required: true },
649
+ organism: { type: 'string', description: 'Species name (default: "Homo sapiens")' },
650
+ },
651
+ tier: 'free',
652
+ async execute(args) {
653
+ const query = String(args.query);
654
+ const organism = String(args.organism || 'Homo sapiens');
655
+ try {
656
+ const url = `https://reactome.org/ContentService/search/query?query=${encodeURIComponent(query)}&species=${encodeURIComponent(organism)}&types=Pathway&cluster=true`;
657
+ const res = await fetch(url, {
658
+ headers: { 'User-Agent': UA, 'Accept': 'application/json' },
659
+ signal: AbortSignal.timeout(10000),
660
+ });
661
+ if (!res.ok)
662
+ return `Reactome error: HTTP ${res.status}`;
663
+ const data = await res.json();
664
+ const groups = data.results || [];
665
+ if (groups.length === 0)
666
+ return `No pathways found for "${query}" in Reactome.`;
667
+ const output = [`## Reactome Pathways: "${query}"\n`];
668
+ let count = 0;
669
+ for (const group of groups) {
670
+ const entries = group.entries || [];
671
+ for (const entry of entries) {
672
+ if (count >= 10)
673
+ break;
674
+ let block = `### ${entry.name || 'Unknown'}\n`;
675
+ block += `**ID:** [${entry.stId}](https://reactome.org/content/detail/${entry.stId})\n`;
676
+ block += `**Species:** ${entry.species?.[0] || organism}\n`;
677
+ if (entry.summation) {
678
+ const summary = entry.summation.replace(/<[^>]+>/g, '');
679
+ const truncated = summary.length > 300 ? summary.slice(0, 300) + '...' : summary;
680
+ block += `\n${truncated}\n`;
681
+ }
682
+ if (entry.compartmentNames?.length > 0) {
683
+ block += `**Compartments:** ${entry.compartmentNames.join(', ')}\n`;
684
+ }
685
+ output.push(block);
686
+ count++;
687
+ }
688
+ if (count >= 10)
689
+ break;
690
+ }
691
+ return output.join('\n---\n');
692
+ }
693
+ catch (e) {
694
+ return `Pathway search failed: ${e?.message || e}`;
695
+ }
696
+ },
697
+ });
698
+ // ════════════════════════════════════════════════════════════════════════
699
+ // 8. Taxonomy Lookup (GBIF)
700
+ // ════════════════════════════════════════════════════════════════════════
701
+ registerTool({
702
+ name: 'taxonomy_lookup',
703
+ description: 'Look up taxonomic classification of any organism via GBIF. Returns full lineage from kingdom to species with taxonomic status.',
704
+ parameters: {
705
+ name: { type: 'string', description: 'Organism name (e.g., "Homo sapiens", "E. coli", "giant panda")', required: true },
706
+ rank: { type: 'string', description: 'Expected rank filter: "species", "genus", "family", etc.' },
707
+ },
708
+ tier: 'free',
709
+ async execute(args) {
710
+ const name = String(args.name);
711
+ const rank = args.rank ? String(args.rank).toUpperCase() : '';
712
+ try {
713
+ let url = `https://api.gbif.org/v1/species/search?q=${encodeURIComponent(name)}&limit=5`;
714
+ if (rank)
715
+ url += `&rank=${rank}`;
716
+ const res = await fetch(url, {
717
+ headers: { 'User-Agent': UA },
718
+ signal: AbortSignal.timeout(10000),
719
+ });
720
+ if (!res.ok)
721
+ return `GBIF error: HTTP ${res.status}`;
722
+ const data = await res.json();
723
+ const results_arr = data.results || [];
724
+ if (results_arr.length === 0)
725
+ return `No taxonomic records for "${name}" in GBIF.`;
726
+ const output = [`## Taxonomy: "${name}"\n`];
727
+ for (const sp of results_arr.slice(0, 3)) {
728
+ let block = `### ${sp.canonicalName || sp.scientificName || name}\n`;
729
+ if (sp.authorship)
730
+ block += `*${sp.authorship}*\n`;
731
+ block += `**Rank:** ${sp.rank || 'N/A'}\n`;
732
+ block += `**Status:** ${sp.taxonomicStatus || 'N/A'}\n`;
733
+ block += `**GBIF Key:** [${sp.key}](https://www.gbif.org/species/${sp.key})\n`;
734
+ // Full lineage
735
+ const lineage = [];
736
+ if (sp.kingdom)
737
+ lineage.push(`Kingdom: ${sp.kingdom}`);
738
+ if (sp.phylum)
739
+ lineage.push(`Phylum: ${sp.phylum}`);
740
+ if (sp.class)
741
+ lineage.push(`Class: ${sp.class}`);
742
+ if (sp.order)
743
+ lineage.push(`Order: ${sp.order}`);
744
+ if (sp.family)
745
+ lineage.push(`Family: ${sp.family}`);
746
+ if (sp.genus)
747
+ lineage.push(`Genus: *${sp.genus}*`);
748
+ if (sp.species)
749
+ lineage.push(`Species: *${sp.species}*`);
750
+ if (lineage.length > 0) {
751
+ block += `\n**Lineage:**\n${lineage.map(l => `- ${l}`).join('\n')}\n`;
752
+ }
753
+ if (sp.vernacularNames?.length > 0) {
754
+ const common = sp.vernacularNames.slice(0, 5).map((v) => v.vernacularName).filter(Boolean);
755
+ if (common.length > 0)
756
+ block += `\n**Common Names:** ${common.join(', ')}\n`;
757
+ }
758
+ if (sp.descriptions?.length > 0) {
759
+ const desc = sp.descriptions[0].description || '';
760
+ if (desc)
761
+ block += `\n${desc.slice(0, 300)}${desc.length > 300 ? '...' : ''}\n`;
762
+ }
763
+ output.push(block);
764
+ }
765
+ return output.join('\n---\n');
766
+ }
767
+ catch (e) {
768
+ return `Taxonomy lookup failed: ${e?.message || e}`;
769
+ }
770
+ },
771
+ });
772
+ // ════════════════════════════════════════════════════════════════════════
773
+ // 9. Clinical Trials
774
+ // ════════════════════════════════════════════════════════════════════════
775
+ registerTool({
776
+ name: 'clinical_trials',
777
+ description: 'Search ClinicalTrials.gov for clinical studies. Filter by condition, drug, status, and phase.',
778
+ parameters: {
779
+ query: { type: 'string', description: 'Search term (e.g., "pembrolizumab melanoma", "COVID-19 vaccine")', required: true },
780
+ status: { type: 'string', description: 'Filter: "recruiting", "completed", "active" (active, not recruiting), "enrolling" (enrolling by invitation)' },
781
+ phase: { type: 'string', description: 'Phase filter: "EARLY_PHASE1", "PHASE1", "PHASE2", "PHASE3", "PHASE4"' },
782
+ },
783
+ tier: 'free',
784
+ async execute(args) {
785
+ const query = String(args.query);
786
+ const status = args.status ? String(args.status) : '';
787
+ const phase = args.phase ? String(args.phase) : '';
788
+ try {
789
+ let url = `https://clinicaltrials.gov/api/v2/studies?query.term=${encodeURIComponent(query)}&pageSize=10`;
790
+ // Map user-friendly status to API values
791
+ if (status) {
792
+ const statusMap = {
793
+ recruiting: 'RECRUITING',
794
+ completed: 'COMPLETED',
795
+ active: 'ACTIVE_NOT_RECRUITING',
796
+ enrolling: 'ENROLLING_BY_INVITATION',
797
+ };
798
+ const mapped = statusMap[status.toLowerCase()] || status.toUpperCase();
799
+ url += `&filter.overallStatus=${mapped}`;
800
+ }
801
+ if (phase) {
802
+ url += `&filter.phase=${phase.toUpperCase()}`;
803
+ }
804
+ const res = await fetch(url, {
805
+ headers: { 'User-Agent': UA },
806
+ signal: AbortSignal.timeout(10000),
807
+ });
808
+ if (!res.ok)
809
+ return `ClinicalTrials.gov error: HTTP ${res.status}`;
810
+ const data = await res.json();
811
+ const studies = data.studies || [];
812
+ if (studies.length === 0)
813
+ return `No clinical trials found for "${query}".`;
814
+ const totalCount = data.totalCount || studies.length;
815
+ const output = [`## Clinical Trials: "${query}" (${totalCount} total)\n`];
816
+ for (const study of studies) {
817
+ const proto = study.protocolSection || {};
818
+ const id_module = proto.identificationModule || {};
819
+ const status_module = proto.statusModule || {};
820
+ const design_module = proto.designModule || {};
821
+ const desc_module = proto.descriptionModule || {};
822
+ const conditions_module = proto.conditionsModule || {};
823
+ const sponsor_module = proto.sponsorCollaboratorsModule || {};
824
+ const nctId = id_module.nctId || 'N/A';
825
+ const title = id_module.officialTitle || id_module.briefTitle || 'Untitled';
826
+ const overallStatus = status_module.overallStatus || 'N/A';
827
+ const phases = design_module.phases?.join(', ') || 'N/A';
828
+ const startDate = status_module.startDateStruct?.date || '';
829
+ const completionDate = status_module.completionDateStruct?.date || '';
830
+ const briefSummary = desc_module.briefSummary || '';
831
+ const conditions = conditions_module.conditions?.join(', ') || '';
832
+ const sponsor = sponsor_module.leadSponsor?.name || '';
833
+ let block = `### ${title.slice(0, 150)}\n`;
834
+ block += `**NCT ID:** [${nctId}](https://clinicaltrials.gov/study/${nctId})\n`;
835
+ block += `**Status:** ${overallStatus} | **Phase:** ${phases}\n`;
836
+ if (sponsor)
837
+ block += `**Sponsor:** ${sponsor}\n`;
838
+ if (conditions)
839
+ block += `**Conditions:** ${conditions}\n`;
840
+ if (startDate)
841
+ block += `**Dates:** ${startDate}${completionDate ? ` → ${completionDate}` : ''}\n`;
842
+ if (briefSummary) {
843
+ const truncated = briefSummary.length > 300 ? briefSummary.slice(0, 300) + '...' : briefSummary;
844
+ block += `\n> ${truncated}\n`;
845
+ }
846
+ output.push(block);
847
+ }
848
+ return output.join('\n---\n');
849
+ }
850
+ catch (e) {
851
+ return `Clinical trials search failed: ${e?.message || e}`;
852
+ }
853
+ },
854
+ });
855
+ // ════════════════════════════════════════════════════════════════════════
856
+ // 10. Disease Info (Open Targets)
857
+ // ════════════════════════════════════════════════════════════════════════
858
+ registerTool({
859
+ name: 'disease_info',
860
+ description: 'Look up disease information from Open Targets Platform. Returns associated genes, drugs, and therapeutic areas via GraphQL.',
861
+ parameters: {
862
+ disease: { type: 'string', description: 'Disease name (e.g., "lung cancer", "Alzheimer", "diabetes mellitus")', required: true },
863
+ },
864
+ tier: 'free',
865
+ async execute(args) {
866
+ const disease = String(args.disease);
867
+ try {
868
+ // Step 1: Search for disease ID
869
+ const searchQuery = `
870
+ query SearchDisease($q: String!) {
871
+ search(queryString: $q, entityNames: ["disease"], page: { index: 0, size: 3 }) {
872
+ hits {
873
+ id
874
+ entity
875
+ name
876
+ description
877
+ }
878
+ total
879
+ }
880
+ }
881
+ `;
882
+ const searchRes = await fetch('https://api.platform.opentargets.org/api/v4/graphql', {
883
+ method: 'POST',
884
+ headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
885
+ body: JSON.stringify({ query: searchQuery, variables: { q: disease } }),
886
+ signal: AbortSignal.timeout(10000),
887
+ });
888
+ if (!searchRes.ok)
889
+ return `Open Targets error: HTTP ${searchRes.status}`;
890
+ const searchData = await searchRes.json();
891
+ const hits = searchData.data?.search?.hits || [];
892
+ const diseaseHits = hits.filter((h) => h.entity === 'disease');
893
+ if (diseaseHits.length === 0)
894
+ return `No disease found for "${disease}" in Open Targets.`;
895
+ const output = [`## Disease Info: "${disease}"\n`];
896
+ for (const hit of diseaseHits.slice(0, 2)) {
897
+ const diseaseId = hit.id;
898
+ // Step 2: Get disease details with associations
899
+ const detailQuery = `
900
+ query DiseaseDetail($id: String!) {
901
+ disease(efoId: $id) {
902
+ id
903
+ name
904
+ description
905
+ therapeuticAreas {
906
+ id
907
+ name
908
+ }
909
+ synonyms {
910
+ terms
911
+ relation
912
+ }
913
+ knownDrugs(size: 10) {
914
+ uniqueTargetCount
915
+ uniqueDrugCount
916
+ rows {
917
+ drug {
918
+ id
919
+ name
920
+ drugType
921
+ maximumClinicalTrialPhase
922
+ }
923
+ mechanismOfAction
924
+ approvedIndications
925
+ }
926
+ }
927
+ associatedTargets(page: { index: 0, size: 10 }) {
928
+ count
929
+ rows {
930
+ target {
931
+ id
932
+ approvedSymbol
933
+ approvedName
934
+ }
935
+ score
936
+ datatypeScores {
937
+ componentId: id
938
+ score
939
+ }
940
+ }
941
+ }
942
+ }
943
+ }
944
+ `;
945
+ const detailRes = await fetch('https://api.platform.opentargets.org/api/v4/graphql', {
946
+ method: 'POST',
947
+ headers: { 'Content-Type': 'application/json', 'User-Agent': UA },
948
+ body: JSON.stringify({ query: detailQuery, variables: { id: diseaseId } }),
949
+ signal: AbortSignal.timeout(10000),
950
+ });
951
+ if (!detailRes.ok)
952
+ continue;
953
+ const detailData = await detailRes.json();
954
+ const d = detailData.data?.disease;
955
+ if (!d)
956
+ continue;
957
+ let block = `### ${d.name}\n`;
958
+ block += `**EFO ID:** [${d.id}](https://platform.opentargets.org/disease/${d.id})\n`;
959
+ if (d.description) {
960
+ const truncated = d.description.length > 500 ? d.description.slice(0, 500) + '...' : d.description;
961
+ block += `\n${truncated}\n`;
962
+ }
963
+ // Therapeutic areas
964
+ if (d.therapeuticAreas?.length > 0) {
965
+ block += `\n**Therapeutic Areas:** ${d.therapeuticAreas.map((t) => t.name).join(', ')}\n`;
966
+ }
967
+ // Synonyms
968
+ if (d.synonyms?.length > 0) {
969
+ const exactSynonyms = d.synonyms
970
+ .filter((s) => s.relation === 'HAS_EXACT_SYNONYM')
971
+ .flatMap((s) => s.terms || [])
972
+ .slice(0, 8);
973
+ if (exactSynonyms.length > 0) {
974
+ block += `**Synonyms:** ${exactSynonyms.join(', ')}\n`;
975
+ }
976
+ }
977
+ // Top associated genes
978
+ const targets = d.associatedTargets;
979
+ if (targets?.rows?.length > 0) {
980
+ block += `\n**Top Associated Genes** (${targets.count} total):\n`;
981
+ for (const row of targets.rows.slice(0, 8)) {
982
+ const t = row.target;
983
+ block += `- **${t.approvedSymbol}** (${t.approvedName}) — score: ${row.score.toFixed(3)}\n`;
984
+ }
985
+ }
986
+ // Known drugs
987
+ const drugs = d.knownDrugs;
988
+ if (drugs?.rows?.length > 0) {
989
+ block += `\n**Known Drugs** (${drugs.uniqueDrugCount} drugs, ${drugs.uniqueTargetCount} targets):\n`;
990
+ const seen = new Set();
991
+ for (const row of drugs.rows) {
992
+ const drugName = row.drug?.name || 'Unknown';
993
+ if (seen.has(drugName))
994
+ continue;
995
+ seen.add(drugName);
996
+ const phase = row.drug?.maximumClinicalTrialPhase ?? 'N/A';
997
+ const moa = row.mechanismOfAction || '';
998
+ block += `- **${drugName}** (phase ${phase}) — ${moa}\n`;
999
+ }
1000
+ }
1001
+ output.push(block);
1002
+ }
1003
+ return output.join('\n---\n');
1004
+ }
1005
+ catch (e) {
1006
+ return `Disease info lookup failed: ${e?.message || e}`;
1007
+ }
1008
+ },
1009
+ });
1010
+ // ════════════════════════════════════════════════════════════════════════
1011
+ // 11. Sequence Tools (Local analysis)
1012
+ // ════════════════════════════════════════════════════════════════════════
1013
+ registerTool({
1014
+ name: 'sequence_tools',
1015
+ description: 'Local sequence analysis tools: GC content, reverse complement, translation (standard genetic code), ORF finding, motif search (regex), and molecular weight calculation. No API calls — runs instantly.',
1016
+ parameters: {
1017
+ sequence: { type: 'string', description: 'DNA/RNA/protein sequence (raw or FASTA format)', required: true },
1018
+ operation: { type: 'string', description: 'Operation: "gc_content", "reverse_complement", "translate", "find_orfs", "motif_search", "molecular_weight"', required: true },
1019
+ pattern: { type: 'string', description: 'Regex pattern for motif_search (e.g., "ATG[ATCG]{3,9}TAA")' },
1020
+ },
1021
+ tier: 'free',
1022
+ async execute(args) {
1023
+ const rawSeq = String(args.sequence).trim();
1024
+ const operation = String(args.operation);
1025
+ const pattern = args.pattern ? String(args.pattern) : '';
1026
+ // Clean sequence: remove FASTA header, whitespace, numbers
1027
+ const seq = rawSeq
1028
+ .split('\n')
1029
+ .filter(line => !line.startsWith('>'))
1030
+ .join('')
1031
+ .replace(/[\s\d]/g, '')
1032
+ .toUpperCase();
1033
+ if (seq.length === 0)
1034
+ return 'No valid sequence provided. Remove FASTA headers and whitespace.';
1035
+ const isDna = /^[ATCGN]+$/i.test(seq);
1036
+ const isRna = /^[AUCGN]+$/i.test(seq);
1037
+ const isProtein = /^[ACDEFGHIKLMNPQRSTVWY*]+$/i.test(seq) && !isDna;
1038
+ switch (operation) {
1039
+ case 'gc_content': {
1040
+ if (!isDna && !isRna)
1041
+ return 'GC content requires a DNA or RNA sequence (A, T/U, G, C, N).';
1042
+ const gc = (seq.match(/[GC]/gi) || []).length;
1043
+ const total = seq.replace(/N/gi, '').length;
1044
+ const pct = total > 0 ? ((gc / total) * 100).toFixed(2) : '0';
1045
+ const at = total - gc;
1046
+ let result = `## GC Content Analysis\n\n`;
1047
+ result += `**Sequence length:** ${seq.length} bp\n`;
1048
+ result += `**GC count:** ${gc} | **AT count:** ${at}\n`;
1049
+ result += `**GC%:** ${pct}%\n`;
1050
+ result += `**AT%:** ${(100 - parseFloat(pct)).toFixed(2)}%\n`;
1051
+ // Base composition
1052
+ const counts = {};
1053
+ for (const base of seq) {
1054
+ counts[base] = (counts[base] || 0) + 1;
1055
+ }
1056
+ result += `\n**Base composition:**\n`;
1057
+ for (const [base, count] of Object.entries(counts).sort()) {
1058
+ result += `- ${base}: ${count} (${((count / seq.length) * 100).toFixed(1)}%)\n`;
1059
+ }
1060
+ // Tm estimation (basic: 2*(A+T) + 4*(G+C) for short, or 64.9 + 41*(G+C-16.4)/N)
1061
+ if (seq.length <= 30) {
1062
+ const tm = 2 * at + 4 * gc;
1063
+ result += `\n**Estimated Tm (basic):** ${tm} C (for primers < 30 bp)\n`;
1064
+ }
1065
+ else {
1066
+ const tm = 64.9 + 41 * (gc - 16.4) / total;
1067
+ result += `\n**Estimated Tm (salt-adjusted):** ${tm.toFixed(1)} C\n`;
1068
+ }
1069
+ return result;
1070
+ }
1071
+ case 'reverse_complement': {
1072
+ if (!isDna && !isRna)
1073
+ return 'Reverse complement requires a DNA or RNA sequence.';
1074
+ const complementMap = isDna
1075
+ ? { A: 'T', T: 'A', G: 'C', C: 'G', N: 'N' }
1076
+ : { A: 'U', U: 'A', G: 'C', C: 'G', N: 'N' };
1077
+ const complement = seq.split('').map(b => complementMap[b] || 'N').join('');
1078
+ const revComp = complement.split('').reverse().join('');
1079
+ let result = `## Reverse Complement\n\n`;
1080
+ result += `**Input (${isDna ? 'DNA' : 'RNA'}, ${seq.length} bp):**\n`;
1081
+ result += `5'-\`${seq.length > 80 ? seq.slice(0, 40) + '...' + seq.slice(-40) : seq}\`-3'\n\n`;
1082
+ result += `**Complement:**\n`;
1083
+ result += `3'-\`${complement.length > 80 ? complement.slice(0, 40) + '...' + complement.slice(-40) : complement}\`-5'\n\n`;
1084
+ result += `**Reverse complement:**\n`;
1085
+ result += `5'-\`${revComp.length > 80 ? revComp.slice(0, 40) + '...' + revComp.slice(-40) : revComp}\`-3'\n`;
1086
+ return result;
1087
+ }
1088
+ case 'translate': {
1089
+ let dnaSeq = seq;
1090
+ if (isRna)
1091
+ dnaSeq = seq.replace(/U/g, 'T');
1092
+ if (!(/^[ATCGN]+$/i.test(dnaSeq)))
1093
+ return 'Translation requires a DNA or RNA sequence.';
1094
+ // Translate all 3 reading frames
1095
+ const results = [`## Translation (Standard Genetic Code)\n`];
1096
+ results.push(`**Input:** ${dnaSeq.length} bp\n`);
1097
+ for (let frame = 0; frame < 3; frame++) {
1098
+ const protein = [];
1099
+ for (let i = frame; i + 2 < dnaSeq.length; i += 3) {
1100
+ const codon = dnaSeq.slice(i, i + 3);
1101
+ if (codon.includes('N')) {
1102
+ protein.push('X');
1103
+ }
1104
+ else {
1105
+ protein.push(CODON_TABLE[codon] || 'X');
1106
+ }
1107
+ }
1108
+ const proteinStr = protein.join('');
1109
+ results.push(`**Frame +${frame + 1}:**`);
1110
+ results.push(`\`${proteinStr.length > 120 ? proteinStr.slice(0, 60) + '...' + proteinStr.slice(-60) : proteinStr}\``);
1111
+ results.push(`(${proteinStr.length} aa, ${(proteinStr.match(/\*/g) || []).length} stop codons)\n`);
1112
+ }
1113
+ // Highlight first ORF in frame +1
1114
+ const frame1 = [];
1115
+ for (let i = 0; i + 2 < dnaSeq.length; i += 3) {
1116
+ const codon = dnaSeq.slice(i, i + 3);
1117
+ frame1.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
1118
+ }
1119
+ const protStr = frame1.join('');
1120
+ const orfMatch = protStr.match(/M[^*]+/);
1121
+ if (orfMatch) {
1122
+ results.push(`**Longest ORF (frame +1):** ${orfMatch[0].length} aa starting at M`);
1123
+ results.push(`\`${orfMatch[0].slice(0, 80)}${orfMatch[0].length > 80 ? '...' : ''}\``);
1124
+ }
1125
+ return results.join('\n');
1126
+ }
1127
+ case 'find_orfs': {
1128
+ let dnaSeq = seq;
1129
+ if (isRna)
1130
+ dnaSeq = seq.replace(/U/g, 'T');
1131
+ if (!(/^[ATCGN]+$/i.test(dnaSeq)))
1132
+ return 'ORF finding requires a DNA or RNA sequence.';
1133
+ const minOrfLength = 30; // minimum 30 aa = 90 bp
1134
+ const orfs = [];
1135
+ // Search all 3 forward frames
1136
+ for (let frame = 0; frame < 3; frame++) {
1137
+ const protein = [];
1138
+ for (let i = frame; i + 2 < dnaSeq.length; i += 3) {
1139
+ const codon = dnaSeq.slice(i, i + 3);
1140
+ protein.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
1141
+ }
1142
+ const protStr = protein.join('');
1143
+ // Find all M...* ORFs
1144
+ const orfRe = /M[^*]*/g;
1145
+ let m;
1146
+ while ((m = orfRe.exec(protStr)) !== null) {
1147
+ if (m[0].length >= minOrfLength) {
1148
+ const aaStart = m.index;
1149
+ orfs.push({
1150
+ frame: frame + 1,
1151
+ start: frame + aaStart * 3 + 1, // 1-based nucleotide position
1152
+ end: frame + (aaStart + m[0].length) * 3,
1153
+ length: m[0].length,
1154
+ protein: m[0],
1155
+ });
1156
+ }
1157
+ }
1158
+ }
1159
+ // Also search reverse complement
1160
+ const compMap = { A: 'T', T: 'A', G: 'C', C: 'G', N: 'N' };
1161
+ const rcSeq = dnaSeq.split('').map(b => compMap[b] || 'N').reverse().join('');
1162
+ for (let frame = 0; frame < 3; frame++) {
1163
+ const protein = [];
1164
+ for (let i = frame; i + 2 < rcSeq.length; i += 3) {
1165
+ const codon = rcSeq.slice(i, i + 3);
1166
+ protein.push(codon.includes('N') ? 'X' : (CODON_TABLE[codon] || 'X'));
1167
+ }
1168
+ const protStr = protein.join('');
1169
+ const orfRe = /M[^*]*/g;
1170
+ let m;
1171
+ while ((m = orfRe.exec(protStr)) !== null) {
1172
+ if (m[0].length >= minOrfLength) {
1173
+ const aaStart = m.index;
1174
+ orfs.push({
1175
+ frame: -(frame + 1),
1176
+ start: dnaSeq.length - (frame + (aaStart + m[0].length) * 3) + 1,
1177
+ end: dnaSeq.length - (frame + aaStart * 3),
1178
+ length: m[0].length,
1179
+ protein: m[0],
1180
+ });
1181
+ }
1182
+ }
1183
+ }
1184
+ // Sort by length descending
1185
+ orfs.sort((a, b) => b.length - a.length);
1186
+ let result = `## Open Reading Frames (min 30 aa)\n\n`;
1187
+ result += `**Sequence length:** ${dnaSeq.length} bp\n`;
1188
+ result += `**ORFs found:** ${orfs.length}\n\n`;
1189
+ if (orfs.length === 0) {
1190
+ result += 'No ORFs >= 30 aa found. Try a longer sequence or lower threshold.\n';
1191
+ }
1192
+ else {
1193
+ for (const orf of orfs.slice(0, 15)) {
1194
+ result += `**Frame ${orf.frame > 0 ? '+' : ''}${orf.frame}** | nt ${orf.start}-${orf.end} | **${orf.length} aa** (${orf.length * 3} bp)\n`;
1195
+ result += `\`${orf.protein.slice(0, 60)}${orf.protein.length > 60 ? '...' : ''}\`\n\n`;
1196
+ }
1197
+ }
1198
+ return result;
1199
+ }
1200
+ case 'motif_search': {
1201
+ if (!pattern)
1202
+ return 'Motif search requires a pattern parameter (regex). Example: "ATG[ATCG]{3,9}TAA"';
1203
+ let re;
1204
+ try {
1205
+ re = new RegExp(pattern, 'gi');
1206
+ }
1207
+ catch (err) {
1208
+ return `Invalid regex pattern: ${err?.message || err}`;
1209
+ }
1210
+ const matches = [];
1211
+ let m;
1212
+ while ((m = re.exec(seq)) !== null) {
1213
+ matches.push({ start: m.index + 1, end: m.index + m[0].length, match: m[0] });
1214
+ // Prevent infinite loop on zero-length matches
1215
+ if (m[0].length === 0)
1216
+ re.lastIndex++;
1217
+ }
1218
+ let result = `## Motif Search\n\n`;
1219
+ result += `**Pattern:** \`${pattern}\`\n`;
1220
+ result += `**Sequence length:** ${seq.length}\n`;
1221
+ result += `**Matches found:** ${matches.length}\n\n`;
1222
+ if (matches.length === 0) {
1223
+ result += 'No matches found.\n';
1224
+ }
1225
+ else {
1226
+ for (const match of matches.slice(0, 50)) {
1227
+ const display = match.match.length > 60 ? match.match.slice(0, 60) + '...' : match.match;
1228
+ result += `- **Position ${match.start}-${match.end}:** \`${display}\`\n`;
1229
+ }
1230
+ if (matches.length > 50)
1231
+ result += `\n... and ${matches.length - 50} more matches.\n`;
1232
+ }
1233
+ return result;
1234
+ }
1235
+ case 'molecular_weight': {
1236
+ let result = `## Molecular Weight\n\n`;
1237
+ result += `**Sequence length:** ${seq.length}\n`;
1238
+ if (isProtein) {
1239
+ // Protein MW: sum of AA weights - (n-1) * water (18.02)
1240
+ let mw = 0;
1241
+ let unknowns = 0;
1242
+ for (const aa of seq) {
1243
+ if (aa === '*')
1244
+ continue; // stop codon
1245
+ if (AA_WEIGHTS[aa]) {
1246
+ mw += AA_WEIGHTS[aa];
1247
+ }
1248
+ else {
1249
+ unknowns++;
1250
+ mw += 128.16; // average AA weight
1251
+ }
1252
+ }
1253
+ // Subtract water for peptide bonds
1254
+ const aas = seq.replace(/\*/g, '').length;
1255
+ mw -= (aas - 1) * 18.02;
1256
+ result += `**Type:** Protein (${aas} amino acids)\n`;
1257
+ result += `**Molecular Weight:** ${mw.toFixed(2)} Da (${(mw / 1000).toFixed(2)} kDa)\n`;
1258
+ if (unknowns > 0)
1259
+ result += `*Note: ${unknowns} unknown residues estimated at 128.16 Da (average)*\n`;
1260
+ // Extinction coefficient estimate (Pace method)
1261
+ const nTrp = (seq.match(/W/g) || []).length;
1262
+ const nTyr = (seq.match(/Y/g) || []).length;
1263
+ const nCys = (seq.match(/C/g) || []).length;
1264
+ const e280 = nTrp * 5500 + nTyr * 1490 + nCys * 125;
1265
+ result += `\n**Extinction coefficient (280nm):** ${e280} M\u207B\u00B9cm\u207B\u00B9\n`;
1266
+ result += `(${nTrp} Trp, ${nTyr} Tyr, ${nCys} Cys)\n`;
1267
+ // Isoelectric point estimate (very rough)
1268
+ const nAsp = (seq.match(/D/g) || []).length;
1269
+ const nGlu = (seq.match(/E/g) || []).length;
1270
+ const nHis = (seq.match(/H/g) || []).length;
1271
+ const nLys = (seq.match(/K/g) || []).length;
1272
+ const nArg = (seq.match(/R/g) || []).length;
1273
+ const negCharge = nAsp + nGlu;
1274
+ const posCharge = nHis + nLys + nArg;
1275
+ result += `\n**Charge residues:** ${posCharge} positive (K:${nLys} R:${nArg} H:${nHis}), ${negCharge} negative (D:${nAsp} E:${nGlu})\n`;
1276
+ }
1277
+ else if (isDna || isRna) {
1278
+ // Nucleic acid MW
1279
+ const weights = isDna ? DNA_WEIGHTS : { A: 347.2, U: 324.2, G: 363.2, C: 323.2 };
1280
+ let mw = 0;
1281
+ for (const base of seq) {
1282
+ mw += weights[base] || 330; // average for N
1283
+ }
1284
+ // Subtract water for phosphodiester bonds, add 5' phosphate
1285
+ mw -= (seq.length - 1) * 18.02;
1286
+ result += `**Type:** ${isDna ? 'DNA' : 'RNA'} (${seq.length} ${isDna ? 'bp' : 'nt'})\n`;
1287
+ result += `**Molecular Weight (ss):** ${mw.toFixed(2)} Da (${(mw / 1000).toFixed(2)} kDa)\n`;
1288
+ if (isDna) {
1289
+ result += `**Molecular Weight (ds):** ~${(mw * 2).toFixed(0)} Da (${((mw * 2) / 1000).toFixed(2)} kDa)\n`;
1290
+ }
1291
+ // Concentration conversion
1292
+ const ugPerOd = isDna ? (seq.length < 25 ? 33 : 50) : 40;
1293
+ result += `\n**1 OD260 =** ~${ugPerOd} ug/mL\n`;
1294
+ }
1295
+ else {
1296
+ result += 'Could not determine sequence type (DNA/RNA/protein). Check for invalid characters.\n';
1297
+ }
1298
+ return result;
1299
+ }
1300
+ default:
1301
+ return `Unknown operation: "${operation}". Supported: gc_content, reverse_complement, translate, find_orfs, motif_search, molecular_weight`;
1302
+ }
1303
+ },
1304
+ });
1305
+ // ════════════════════════════════════════════════════════════════════════
1306
+ // 12. Ecology Data (GBIF Occurrences)
1307
+ // ════════════════════════════════════════════════════════════════════════
1308
+ registerTool({
1309
+ name: 'ecology_data',
1310
+ description: 'Search GBIF for biodiversity occurrence records. Find species observations with location, date, and collection data.',
1311
+ parameters: {
1312
+ species: { type: 'string', description: 'Scientific name (e.g., "Panthera tigris", "Quercus robur")', required: true },
1313
+ country: { type: 'string', description: 'ISO 3166-1 alpha-2 country code (e.g., "US", "BR", "AU")' },
1314
+ limit: { type: 'number', description: 'Max records (default 20, max 100)' },
1315
+ },
1316
+ tier: 'free',
1317
+ async execute(args) {
1318
+ const species = String(args.species);
1319
+ const country = args.country ? String(args.country).toUpperCase() : '';
1320
+ const limit = Math.min(typeof args.limit === 'number' ? args.limit : 20, 100);
1321
+ try {
1322
+ let url = `https://api.gbif.org/v1/occurrence/search?scientificName=${encodeURIComponent(species)}&limit=${limit}&hasCoordinate=true`;
1323
+ if (country)
1324
+ url += `&country=${country}`;
1325
+ const res = await fetch(url, {
1326
+ headers: { 'User-Agent': UA },
1327
+ signal: AbortSignal.timeout(10000),
1328
+ });
1329
+ if (!res.ok)
1330
+ return `GBIF error: HTTP ${res.status}`;
1331
+ const data = await res.json();
1332
+ const results_arr = data.results || [];
1333
+ const totalCount = data.count || 0;
1334
+ if (results_arr.length === 0)
1335
+ return `No GBIF occurrence records for "${species}"${country ? ` in ${country}` : ''}. Try the full scientific name.`;
1336
+ const output = [`## GBIF Occurrences: *${species}*\n`];
1337
+ output.push(`**Total records:** ${totalCount.toLocaleString()}${country ? ` (filtered: ${country})` : ''}\n`);
1338
+ // Summary stats
1339
+ const countries = new Map();
1340
+ const years = new Map();
1341
+ const basisOfRecord = new Map();
1342
+ for (const rec of results_arr) {
1343
+ const c = rec.country || 'Unknown';
1344
+ countries.set(c, (countries.get(c) || 0) + 1);
1345
+ if (rec.year)
1346
+ years.set(rec.year, (years.get(rec.year) || 0) + 1);
1347
+ const basis = rec.basisOfRecord || 'Unknown';
1348
+ basisOfRecord.set(basis, (basisOfRecord.get(basis) || 0) + 1);
1349
+ }
1350
+ // Country distribution
1351
+ const sortedCountries = [...countries.entries()].sort((a, b) => b[1] - a[1]);
1352
+ output.push(`**Countries in sample:** ${sortedCountries.map(([c, n]) => `${c} (${n})`).join(', ')}\n`);
1353
+ // Record types
1354
+ output.push(`**Record types:** ${[...basisOfRecord.entries()].map(([t, n]) => `${t.replace(/_/g, ' ')} (${n})`).join(', ')}\n`);
1355
+ // Year range
1356
+ const yearKeys = [...years.keys()].sort();
1357
+ if (yearKeys.length > 0) {
1358
+ output.push(`**Year range:** ${yearKeys[0]}–${yearKeys[yearKeys.length - 1]}\n`);
1359
+ }
1360
+ // Individual records
1361
+ output.push('\n### Records\n');
1362
+ for (const rec of results_arr.slice(0, 20)) {
1363
+ const name = rec.species || rec.scientificName || species;
1364
+ const lat = rec.decimalLatitude?.toFixed(4) || '?';
1365
+ const lon = rec.decimalLongitude?.toFixed(4) || '?';
1366
+ const date = rec.eventDate || rec.year || 'N/A';
1367
+ const loc = rec.locality || rec.stateProvince || '';
1368
+ const countryName = rec.country || '';
1369
+ const institution = rec.institutionCode || '';
1370
+ const basis = rec.basisOfRecord?.replace(/_/g, ' ') || '';
1371
+ let line = `- **${name}** — ${lat}, ${lon}`;
1372
+ if (countryName)
1373
+ line += ` (${countryName}${loc ? `, ${loc}` : ''})`;
1374
+ if (date)
1375
+ line += ` | ${date}`;
1376
+ if (basis)
1377
+ line += ` | ${basis}`;
1378
+ if (institution)
1379
+ line += ` | ${institution}`;
1380
+ if (rec.gbifID)
1381
+ line += ` | [GBIF:${rec.gbifID}](https://www.gbif.org/occurrence/${rec.gbifID})`;
1382
+ output.push(line);
1383
+ }
1384
+ return output.join('\n');
1385
+ }
1386
+ catch (e) {
1387
+ return `Ecology data lookup failed: ${e?.message || e}`;
1388
+ }
1389
+ },
1390
+ });
1391
+ }
1392
+ //# sourceMappingURL=lab-bio.js.map