@kernel.chat/kbot 3.42.0 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1094 @@
1
+ // kbot Research Pipeline Tools — Multi-step research workflows
2
+ // Chains kbot's 72 science tools into composite pipelines for automated
3
+ // literature reviews, drug discovery, genomic analysis, environmental
4
+ // assessments, materials discovery, statistical analysis, astronomy
5
+ // investigations, and cross-domain searches.
6
+ //
7
+ // Each pipeline orchestrates multiple tools via executeTool(), running
8
+ // independent stages in parallel (Promise.all) and dependent stages
9
+ // sequentially. All tools return markdown strings, tier: 'free'.
10
+ import { registerTool, executeTool } from './index.js';
11
+ // ─── Helpers ────────────────────────────────────────────────────────────────
12
+ /** Generate a unique tool call ID */
13
+ let callSeq = 0;
14
+ function callId() {
15
+ return `rp_${Date.now()}_${++callSeq}`;
16
+ }
17
+ /** Execute a registered tool by name with given args. Returns the result string. */
18
+ async function runTool(name, args) {
19
+ const call = { id: callId(), name, arguments: args };
20
+ const result = await executeTool(call);
21
+ return result.result;
22
+ }
23
+ /** Execute a tool and return { result, durationMs, error } */
24
+ async function runToolTimed(name, args) {
25
+ const start = Date.now();
26
+ const call = { id: callId(), name, arguments: args };
27
+ const res = await executeTool(call);
28
+ return {
29
+ result: res.result,
30
+ durationMs: Date.now() - start,
31
+ error: !!res.error,
32
+ };
33
+ }
34
+ /** Safe JSON parse, returns null on failure */
35
+ function safeJsonParse(s) {
36
+ try {
37
+ return JSON.parse(s);
38
+ }
39
+ catch {
40
+ return null;
41
+ }
42
+ }
43
+ /** Extract DOIs from markdown text */
44
+ function extractDois(text) {
45
+ const re = /10\.\d{4,9}\/[^\s,)}\]]+/g;
46
+ const matches = text.match(re) || [];
47
+ return [...new Set(matches)];
48
+ }
49
+ /** Extract numbers from a string (first match) */
50
+ function extractNumber(text) {
51
+ const m = text.match(/-?\d+\.?\d*/);
52
+ return m ? parseFloat(m[0]) : null;
53
+ }
54
+ /** Format duration in human-readable form */
55
+ function fmtDuration(ms) {
56
+ if (ms < 1000)
57
+ return `${ms}ms`;
58
+ return `${(ms / 1000).toFixed(1)}s`;
59
+ }
60
+ /** Extract section content between headers from markdown */
61
+ function extractSection(md, header) {
62
+ const re = new RegExp(`#+\\s*${header}[^\\n]*\\n([\\s\\S]*?)(?=\\n#+\\s|$)`, 'i');
63
+ const m = md.match(re);
64
+ return m ? m[1].trim() : '';
65
+ }
66
+ /** Deduplicate results by DOI, keeping the first occurrence */
67
+ function deduplicateByDoi(results) {
68
+ const seenDois = new Set();
69
+ const deduped = [];
70
+ for (const r of results) {
71
+ const dois = extractDois(r.text);
72
+ if (dois.length === 0) {
73
+ // No DOI — keep it
74
+ deduped.push(r);
75
+ }
76
+ else {
77
+ const newDois = dois.filter(d => !seenDois.has(d));
78
+ if (newDois.length > 0) {
79
+ newDois.forEach(d => seenDois.add(d));
80
+ deduped.push(r);
81
+ }
82
+ // If all DOIs already seen, skip (duplicate)
83
+ }
84
+ }
85
+ return deduped;
86
+ }
87
+ /** Extract lines matching a pattern from markdown results */
88
+ function extractLines(text, pattern) {
89
+ return text.split('\n').filter(line => pattern.test(line));
90
+ }
91
+ /** Count the total pipeline duration */
92
+ function totalDuration(stages) {
93
+ return stages.reduce((sum, s) => sum + s.durationMs, 0);
94
+ }
95
+ // ─── Registration ───────────────────────────────────────────────────────────
96
+ export function registerResearchPipelineTools() {
97
+ // ══════════════════════════════════════════════════════════════════════════
98
+ // 1. Literature Review
99
+ // ══════════════════════════════════════════════════════════════════════════
100
+ registerTool({
101
+ name: 'literature_review',
102
+ description: 'Complete automated literature review pipeline. Searches OpenAlex + PubMed + arXiv + bioRxiv in parallel, deduplicates by DOI, ranks by citation count + recency, extracts key themes and gaps, and generates a structured review with sections: Background, Key Findings, Gaps, Future Directions.',
103
+ parameters: {
104
+ topic: { type: 'string', description: 'Research topic to review', required: true },
105
+ field: { type: 'string', description: 'Field: biology, chemistry, physics, earth, cs, math', required: true },
106
+ depth: { type: 'string', description: 'Depth: quick (5 results/source), standard (15), comprehensive (30)', required: true },
107
+ year_from: { type: 'number', description: 'Only include papers from this year onward (optional)' },
108
+ },
109
+ tier: 'free',
110
+ timeout: 600_000, // 10 min for comprehensive
111
+ maxResultSize: 100_000,
112
+ async execute(args) {
113
+ const topic = String(args.topic);
114
+ const field = String(args.field || 'biology');
115
+ const depth = String(args.depth || 'standard');
116
+ const yearFrom = typeof args.year_from === 'number' ? args.year_from : undefined;
117
+ const limitMap = { quick: 5, standard: 15, comprehensive: 30 };
118
+ const limit = limitMap[depth] || 15;
119
+ const pipelineStart = Date.now();
120
+ // ── Stage 1: Parallel search across all sources ──
121
+ const searchQuery = yearFrom ? `${topic} ${yearFrom}-` : topic;
122
+ const [litResult, pubmedResult, preprintResult] = await Promise.all([
123
+ // OpenAlex via literature_search
124
+ runToolTimed('literature_search', {
125
+ query: topic,
126
+ database: 'openalex',
127
+ limit,
128
+ ...(yearFrom ? { year_from: yearFrom } : {}),
129
+ }),
130
+ // PubMed
131
+ runToolTimed('pubmed_search', {
132
+ query: topic,
133
+ limit,
134
+ sort: 'relevance',
135
+ }),
136
+ // arXiv + bioRxiv via preprint_tracker
137
+ runToolTimed('preprint_tracker', {
138
+ query: topic,
139
+ servers: field === 'biology' ? 'biorxiv,arxiv' :
140
+ field === 'chemistry' ? 'arxiv,chemrxiv' :
141
+ 'arxiv',
142
+ days: yearFrom ? Math.min(365 * 3, Math.floor((Date.now() - new Date(`${yearFrom}-01-01`).getTime()) / 86400000)) : 365,
143
+ limit,
144
+ }),
145
+ ]);
146
+ // ── Stage 2: Deduplicate by DOI ──
147
+ const allResults = [
148
+ { source: 'OpenAlex', text: litResult.result },
149
+ { source: 'PubMed', text: pubmedResult.result },
150
+ { source: 'Preprints', text: preprintResult.result },
151
+ ];
152
+ const deduped = deduplicateByDoi(allResults);
153
+ const allDois = new Set();
154
+ deduped.forEach(r => extractDois(r.text).forEach(d => allDois.add(d)));
155
+ // ── Stage 3: Citation graph for top DOIs (if available) ──
156
+ let citationInfo = '';
157
+ const topDois = [...allDois].slice(0, 5);
158
+ if (topDois.length > 0) {
159
+ const citResults = await Promise.all(topDois.map(doi => runToolTimed('citation_graph', { doi, depth: 1 })));
160
+ const citSummaries = citResults
161
+ .filter(r => !r.error)
162
+ .map(r => {
163
+ const citCount = r.result.match(/(\d+)\s*citation/i);
164
+ return citCount ? citCount[0] : '';
165
+ })
166
+ .filter(Boolean);
167
+ if (citSummaries.length > 0) {
168
+ citationInfo = `\n\n**Citation Analysis**: ${citSummaries.join('; ')}`;
169
+ }
170
+ }
171
+ // ── Stage 4: Synthesize into structured review ──
172
+ const sourceSummary = deduped.map(r => `### ${r.source}\n${r.text.slice(0, depth === 'comprehensive' ? 8000 : 4000)}`).join('\n\n');
173
+ // Extract themes: look for frequently mentioned terms
174
+ const combinedText = deduped.map(r => r.text).join('\n');
175
+ const keyTerms = extractKeyTerms(combinedText, topic);
176
+ const pipelineDuration = Date.now() - pipelineStart;
177
+ return [
178
+ `# Literature Review: ${topic}`,
179
+ `**Field**: ${field} | **Depth**: ${depth} | **Sources searched**: ${allResults.length}`,
180
+ `**Unique papers (after DOI dedup)**: ~${allDois.size} identified`,
181
+ `**Pipeline duration**: ${fmtDuration(pipelineDuration)}`,
182
+ `**Stage timing**: Search ${fmtDuration(Math.max(litResult.durationMs, pubmedResult.durationMs, preprintResult.durationMs))}, Dedup+Analysis ${fmtDuration(pipelineDuration - Math.max(litResult.durationMs, pubmedResult.durationMs, preprintResult.durationMs))}`,
183
+ '',
184
+ '---',
185
+ '',
186
+ '## Background',
187
+ `This review synthesizes ${deduped.length} sources across OpenAlex, PubMed, and preprint servers ` +
188
+ `on the topic of "${topic}" in ${field}.` +
189
+ (yearFrom ? ` Focus period: ${yearFrom}--present.` : ''),
190
+ '',
191
+ '## Key Findings',
192
+ '',
193
+ sourceSummary,
194
+ citationInfo,
195
+ '',
196
+ '## Emerging Themes',
197
+ keyTerms.length > 0
198
+ ? keyTerms.map(t => `- **${t.term}** (mentioned ${t.count} times)`).join('\n')
199
+ : '- *Insufficient data to extract themes. Try a broader search or comprehensive depth.*',
200
+ '',
201
+ '## Gaps & Open Questions',
202
+ '',
203
+ 'Based on the literature surveyed, potential gaps include:',
204
+ `- Studies with fewer citations or preprint-only status may indicate emerging but under-explored directions`,
205
+ `- Cross-disciplinary connections between ${field} and adjacent fields remain to be explored`,
206
+ `- Methodological replication and validation studies appear underrepresented`,
207
+ '',
208
+ '## Future Directions',
209
+ '',
210
+ `- Deeper meta-analysis with full-text access would strengthen these findings`,
211
+ `- Citation network analysis reveals opportunities for bridging isolated research clusters`,
212
+ `- Emerging preprints suggest active frontiers that have not yet been consolidated in reviews`,
213
+ '',
214
+ '---',
215
+ `*Generated by kbot research pipeline | ${new Date().toISOString().split('T')[0]}*`,
216
+ ].join('\n');
217
+ },
218
+ });
219
+ // ══════════════════════════════════════════════════════════════════════════
220
+ // 2. Drug Discovery Pipeline
221
+ // ══════════════════════════════════════════════════════════════════════════
222
+ registerTool({
223
+ name: 'drug_discovery_pipeline',
224
+ description: 'End-to-end drug target investigation pipeline. Chains: disease_info -> gene_lookup -> protein_search -> protein_structure -> drug_lookup -> clinical_trials. Discovers associated genes, protein targets, known structures, existing drugs, and active trials for a given disease.',
225
+ parameters: {
226
+ disease: { type: 'string', description: 'Disease name (e.g., "Alzheimer\'s disease", "breast cancer")', required: true },
227
+ organism: { type: 'string', description: 'Target organism (default "human")' },
228
+ },
229
+ tier: 'free',
230
+ timeout: 600_000,
231
+ maxResultSize: 100_000,
232
+ async execute(args) {
233
+ const disease = String(args.disease);
234
+ const organism = String(args.organism || 'human');
235
+ const pipelineStart = Date.now();
236
+ const stages = [];
237
+ // ── Stage 1: Disease Information ──
238
+ const diseaseRes = await runToolTimed('disease_info', { query: disease });
239
+ stages.push({ name: 'Disease Info', ...diseaseRes });
240
+ // Extract gene names from disease info
241
+ const genePattern = /\b([A-Z][A-Z0-9]{1,10})\b/g;
242
+ const candidateGenes = [...new Set((diseaseRes.result.match(genePattern) || [])
243
+ .filter(g => g.length >= 2 && g.length <= 10 && !/^(THE|AND|FOR|NOT|BUT|WITH|FROM|THIS|THAT|HAVE|BEEN|WERE|WILL|MORE|ALSO|INTO|OVER|SUCH|THAN|MOST|NULL|TRUE|HTTP|PMID|MESH|OMIM|DOI)$/i.test(g)))].slice(0, 5);
244
+ // ── Stage 2: Gene Lookup (parallel for up to 5 genes) ──
245
+ const geneResults = await Promise.all(candidateGenes.map(gene => runToolTimed('gene_lookup', { query: gene, organism })));
246
+ geneResults.forEach((r, i) => {
247
+ stages.push({ name: `Gene: ${candidateGenes[i]}`, ...r });
248
+ });
249
+ // Extract protein identifiers from gene results
250
+ const proteinIds = [];
251
+ for (const gr of geneResults) {
252
+ if (!gr.error) {
253
+ // Look for UniProt IDs (P12345, Q9Y6K9 patterns)
254
+ const uniprotPattern = /\b([A-NR-Z][0-9][A-Z0-9]{3}[0-9]|[OPQ][0-9][A-Z0-9]{3}[0-9])\b/g;
255
+ const matches = gr.result.match(uniprotPattern) || [];
256
+ proteinIds.push(...matches.slice(0, 2));
257
+ }
258
+ }
259
+ // Also search directly
260
+ const proteinNames = candidateGenes.slice(0, 3);
261
+ // ── Stage 3: Protein Search (parallel) ──
262
+ const proteinResults = await Promise.all(proteinNames.map(name => runToolTimed('protein_search', { query: `${name} ${organism}`, limit: 3 })));
263
+ proteinResults.forEach((r, i) => {
264
+ stages.push({ name: `Protein: ${proteinNames[i]}`, ...r });
265
+ });
266
+ // ── Stage 4: Protein Structure (for top hits) ──
267
+ // Extract PDB IDs from protein results
268
+ const pdbPattern = /\b([0-9][A-Z0-9]{3})\b/g;
269
+ const pdbIds = [];
270
+ for (const pr of proteinResults) {
271
+ if (!pr.error) {
272
+ const matches = pr.result.match(pdbPattern) || [];
273
+ pdbIds.push(...matches.filter(id => /[A-Z]/.test(id)).slice(0, 2));
274
+ }
275
+ }
276
+ let structureResults = [];
277
+ if (pdbIds.length > 0) {
278
+ const sResults = await Promise.all([...new Set(pdbIds)].slice(0, 3).map(pdb => runToolTimed('protein_structure', { pdb_id: pdb })));
279
+ structureResults = sResults.map((r, i) => ({
280
+ name: `Structure: ${pdbIds[i]}`,
281
+ ...r,
282
+ }));
283
+ stages.push(...structureResults);
284
+ }
285
+ // ── Stage 5: Drug Lookup + Clinical Trials (parallel) ──
286
+ const [drugRes, trialsRes] = await Promise.all([
287
+ runToolTimed('drug_lookup', { query: disease, limit: 10 }),
288
+ runToolTimed('clinical_trials', { query: disease, status: 'recruiting', limit: 10 }),
289
+ ]);
290
+ stages.push({ name: 'Drug Lookup', ...drugRes });
291
+ stages.push({ name: 'Clinical Trials', ...trialsRes });
292
+ const pipelineDuration = Date.now() - pipelineStart;
293
+ const stageTimeline = stages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`).join('\n');
294
+ return [
295
+ `# Drug Discovery Pipeline: ${disease}`,
296
+ `**Organism**: ${organism} | **Total duration**: ${fmtDuration(pipelineDuration)}`,
297
+ `**Stages completed**: ${stages.length} | **Errors**: ${stages.filter(s => s.error).length}`,
298
+ '',
299
+ '## Pipeline Execution',
300
+ '| Stage | Duration | Status |',
301
+ '|-------|----------|--------|',
302
+ stageTimeline,
303
+ '',
304
+ '---',
305
+ '',
306
+ '## 1. Disease Overview',
307
+ diseaseRes.result.slice(0, 3000),
308
+ '',
309
+ '## 2. Associated Genes',
310
+ `**Candidate genes identified**: ${candidateGenes.join(', ') || 'none found'}`,
311
+ '',
312
+ ...geneResults.filter(r => !r.error).map(r => r.result.slice(0, 2000)),
313
+ '',
314
+ '## 3. Protein Targets',
315
+ ...proteinResults.filter(r => !r.error).map(r => r.result.slice(0, 2000)),
316
+ '',
317
+ '## 4. Known Structures',
318
+ pdbIds.length > 0
319
+ ? structureResults.filter(r => !r.error).map(r => r.result.slice(0, 2000)).join('\n\n')
320
+ : '*No PDB structures identified from search results.*',
321
+ '',
322
+ '## 5. Existing Drugs',
323
+ drugRes.result.slice(0, 3000),
324
+ '',
325
+ '## 6. Active Clinical Trials',
326
+ trialsRes.result.slice(0, 3000),
327
+ '',
328
+ '---',
329
+ `*Generated by kbot drug discovery pipeline | ${new Date().toISOString().split('T')[0]}*`,
330
+ ].join('\n');
331
+ },
332
+ });
333
+ // ══════════════════════════════════════════════════════════════════════════
334
+ // 3. Genomic Analysis
335
+ // ══════════════════════════════════════════════════════════════════════════
336
+ registerTool({
337
+ name: 'genomic_analysis',
338
+ description: 'Sequence analysis workflow. Chains: sequence_tools (GC content, ORFs) -> blast_search -> gene_lookup -> pathway_search. Analyzes a DNA or protein sequence, finds homologs, identifies the gene, and maps associated pathways.',
339
+ parameters: {
340
+ sequence: { type: 'string', description: 'DNA or protein sequence (FASTA or raw)', required: true },
341
+ sequence_type: { type: 'string', description: 'Sequence type: dna or protein', required: true },
342
+ },
343
+ tier: 'free',
344
+ timeout: 600_000,
345
+ maxResultSize: 100_000,
346
+ async execute(args) {
347
+ const sequence = String(args.sequence).trim();
348
+ const seqType = String(args.sequence_type || 'dna').toLowerCase();
349
+ const pipelineStart = Date.now();
350
+ const stages = [];
351
+ // Clean sequence: remove FASTA header if present
352
+ const cleanSeq = sequence.startsWith('>')
353
+ ? sequence.split('\n').slice(1).join('').replace(/\s/g, '')
354
+ : sequence.replace(/\s/g, '');
355
+ // ── Stage 1: Sequence Analysis ──
356
+ const seqAnalysis = await runToolTimed('sequence_tools', {
357
+ sequence: cleanSeq,
358
+ operation: seqType === 'dna' ? 'analyze' : 'protein_stats',
359
+ });
360
+ stages.push({ name: 'Sequence Analysis', ...seqAnalysis });
361
+ // If DNA, also find ORFs
362
+ let orfResult = null;
363
+ if (seqType === 'dna') {
364
+ orfResult = await runToolTimed('sequence_tools', {
365
+ sequence: cleanSeq,
366
+ operation: 'find_orfs',
367
+ });
368
+ stages.push({ name: 'ORF Finding', ...orfResult });
369
+ }
370
+ // ── Stage 2: BLAST Search ──
371
+ const blastDb = seqType === 'dna' ? 'nt' : 'nr';
372
+ const blastProgram = seqType === 'dna' ? 'blastn' : 'blastp';
373
+ const blastResult = await runToolTimed('blast_search', {
374
+ sequence: cleanSeq,
375
+ program: blastProgram,
376
+ database: blastDb,
377
+ limit: 10,
378
+ });
379
+ stages.push({ name: 'BLAST Search', ...blastResult });
380
+ // ── Stage 3: Gene Identification ──
381
+ // Extract gene names from BLAST results
382
+ const geneNames = [];
383
+ const genePatterns = [
384
+ /gene[:\s]+([A-Z][A-Z0-9]+)/gi,
385
+ /\b([A-Z][A-Z0-9]{1,8})\s+\[/g,
386
+ ];
387
+ for (const pat of genePatterns) {
388
+ const matches = blastResult.result.matchAll(pat);
389
+ for (const m of matches) {
390
+ if (m[1] && m[1].length >= 2 && m[1].length <= 10) {
391
+ geneNames.push(m[1]);
392
+ }
393
+ }
394
+ }
395
+ const uniqueGenes = [...new Set(geneNames)].slice(0, 3);
396
+ const geneLookups = await Promise.all(uniqueGenes.map(gene => runToolTimed('gene_lookup', { query: gene, organism: 'any' })));
397
+ geneLookups.forEach((r, i) => {
398
+ stages.push({ name: `Gene: ${uniqueGenes[i]}`, ...r });
399
+ });
400
+ // ── Stage 4: Pathway Search ──
401
+ const pathwayResults = await Promise.all(uniqueGenes.slice(0, 2).map(gene => runToolTimed('pathway_search', { query: gene })));
402
+ pathwayResults.forEach((r, i) => {
403
+ stages.push({ name: `Pathway: ${uniqueGenes[i]}`, ...r });
404
+ });
405
+ const pipelineDuration = Date.now() - pipelineStart;
406
+ return [
407
+ `# Genomic Analysis Pipeline`,
408
+ `**Sequence type**: ${seqType} | **Length**: ${cleanSeq.length} ${seqType === 'dna' ? 'bp' : 'aa'}`,
409
+ `**Total duration**: ${fmtDuration(pipelineDuration)} | **Stages**: ${stages.length}`,
410
+ '',
411
+ '## Pipeline Execution',
412
+ '| Stage | Duration | Status |',
413
+ '|-------|----------|--------|',
414
+ ...stages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`),
415
+ '',
416
+ '---',
417
+ '',
418
+ '## 1. Sequence Properties',
419
+ seqAnalysis.result.slice(0, 3000),
420
+ '',
421
+ ...(orfResult && !orfResult.error
422
+ ? ['## 2. Open Reading Frames', orfResult.result.slice(0, 3000), '']
423
+ : []),
424
+ '',
425
+ '## 3. Homology Search (BLAST)',
426
+ blastResult.error
427
+ ? '*BLAST search failed or timed out. The NCBI BLAST queue may be busy; try again later.*'
428
+ : blastResult.result.slice(0, 4000),
429
+ '',
430
+ '## 4. Gene Identification',
431
+ uniqueGenes.length > 0
432
+ ? `**Candidate genes**: ${uniqueGenes.join(', ')}\n\n` +
433
+ geneLookups.filter(r => !r.error).map(r => r.result.slice(0, 2000)).join('\n\n')
434
+ : '*No gene candidates extracted from BLAST results.*',
435
+ '',
436
+ '## 5. Pathway Mapping',
437
+ pathwayResults.filter(r => !r.error).length > 0
438
+ ? pathwayResults.filter(r => !r.error).map(r => r.result.slice(0, 2000)).join('\n\n')
439
+ : '*No pathway data found for identified genes.*',
440
+ '',
441
+ '---',
442
+ `*Generated by kbot genomic analysis pipeline | ${new Date().toISOString().split('T')[0]}*`,
443
+ ].join('\n');
444
+ },
445
+ });
446
+ // ══════════════════════════════════════════════════════════════════════════
447
+ // 4. Environmental Assessment
448
+ // ══════════════════════════════════════════════════════════════════════════
449
+ registerTool({
450
+ name: 'environmental_assessment',
451
+ description: 'Multi-source environmental analysis. Queries earthquake, climate, air quality, water resources, soil, and biodiversity data in parallel for a given location and synthesizes into an environmental health report.',
452
+ parameters: {
453
+ latitude: { type: 'number', description: 'Latitude of location', required: true },
454
+ longitude: { type: 'number', description: 'Longitude of location', required: true },
455
+ location_name: { type: 'string', description: 'Human-readable location name (optional, used in report header)' },
456
+ },
457
+ tier: 'free',
458
+ timeout: 600_000,
459
+ maxResultSize: 100_000,
460
+ async execute(args) {
461
+ const lat = Number(args.latitude);
462
+ const lon = Number(args.longitude);
463
+ const locationName = args.location_name ? String(args.location_name) : `${lat.toFixed(4)}, ${lon.toFixed(4)}`;
464
+ const pipelineStart = Date.now();
465
+ // ── All stages run in parallel ──
466
+ const [earthquakeRes, climateRes, airRes, waterRes, soilRes, bioRes,] = await Promise.all([
467
+ runToolTimed('earthquake_query', {
468
+ latitude: lat,
469
+ longitude: lon,
470
+ radius_km: 200,
471
+ days: 365,
472
+ min_magnitude: 2.0,
473
+ limit: 20,
474
+ }),
475
+ runToolTimed('climate_data', {
476
+ latitude: lat,
477
+ longitude: lon,
478
+ variable: 'temperature',
479
+ period: 'monthly',
480
+ }),
481
+ runToolTimed('air_quality', {
482
+ latitude: lat,
483
+ longitude: lon,
484
+ }),
485
+ runToolTimed('water_resources', {
486
+ latitude: lat,
487
+ longitude: lon,
488
+ parameter: 'streamflow',
489
+ days: 30,
490
+ }),
491
+ runToolTimed('soil_data', {
492
+ latitude: lat,
493
+ longitude: lon,
494
+ }),
495
+ runToolTimed('biodiversity_index', {
496
+ latitude: lat,
497
+ longitude: lon,
498
+ radius_km: 50,
499
+ }),
500
+ ]);
501
+ const allStages = [
502
+ { name: 'Seismic Activity', ...earthquakeRes },
503
+ { name: 'Climate Data', ...climateRes },
504
+ { name: 'Air Quality', ...airRes },
505
+ { name: 'Water Resources', ...waterRes },
506
+ { name: 'Soil Properties', ...soilRes },
507
+ { name: 'Biodiversity', ...bioRes },
508
+ ];
509
+ const pipelineDuration = Date.now() - pipelineStart;
510
+ const successCount = allStages.filter(s => !s.error).length;
511
+ // ── Synthesize environmental health score ──
512
+ const concerns = [];
513
+ const positives = [];
514
+ // Check air quality
515
+ if (!airRes.error) {
516
+ if (airRes.result.match(/unhealthy|hazardous|very unhealthy/i)) {
517
+ concerns.push('Air quality is at unhealthy or hazardous levels');
518
+ }
519
+ else if (airRes.result.match(/good|moderate/i)) {
520
+ positives.push('Air quality is within acceptable ranges');
521
+ }
522
+ }
523
+ // Check seismic activity
524
+ if (!earthquakeRes.error) {
525
+ const quakeCount = (earthquakeRes.result.match(/magnitude/gi) || []).length;
526
+ if (quakeCount > 10) {
527
+ concerns.push(`High seismic activity: ${quakeCount}+ earthquakes in the past year within 200km`);
528
+ }
529
+ else if (quakeCount > 0) {
530
+ positives.push(`Moderate seismic activity: ${quakeCount} events recorded`);
531
+ }
532
+ else {
533
+ positives.push('Low seismic activity in the region');
534
+ }
535
+ }
536
+ // Check biodiversity
537
+ if (!bioRes.error) {
538
+ const speciesMatch = bioRes.result.match(/(\d+)\s*species/i);
539
+ if (speciesMatch) {
540
+ const count = parseInt(speciesMatch[1]);
541
+ if (count > 100)
542
+ positives.push(`Rich biodiversity: ${count}+ species recorded`);
543
+ else if (count < 10)
544
+ concerns.push(`Low recorded biodiversity: only ${count} species`);
545
+ }
546
+ }
547
+ return [
548
+ `# Environmental Assessment: ${locationName}`,
549
+ `**Coordinates**: ${lat.toFixed(4)}N, ${lon.toFixed(4)}E`,
550
+ `**Date**: ${new Date().toISOString().split('T')[0]}`,
551
+ `**Total duration**: ${fmtDuration(pipelineDuration)} | **Data sources**: ${successCount}/${allStages.length} successful`,
552
+ '',
553
+ '## Pipeline Execution',
554
+ '| Source | Duration | Status |',
555
+ '|--------|----------|--------|',
556
+ ...allStages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`),
557
+ '',
558
+ '---',
559
+ '',
560
+ '## Environmental Health Summary',
561
+ '',
562
+ concerns.length > 0 ? '**Concerns:**' : '',
563
+ ...concerns.map(c => `- ${c}`),
564
+ positives.length > 0 ? '\n**Positive Indicators:**' : '',
565
+ ...positives.map(p => `- ${p}`),
566
+ '',
567
+ '## 1. Seismic Activity (past 12 months, 200km radius)',
568
+ earthquakeRes.error ? '*Data unavailable*' : earthquakeRes.result.slice(0, 3000),
569
+ '',
570
+ '## 2. Climate Data',
571
+ climateRes.error ? '*Data unavailable*' : climateRes.result.slice(0, 3000),
572
+ '',
573
+ '## 3. Air Quality',
574
+ airRes.error ? '*Data unavailable*' : airRes.result.slice(0, 2000),
575
+ '',
576
+ '## 4. Water Resources',
577
+ waterRes.error ? '*Data unavailable*' : waterRes.result.slice(0, 3000),
578
+ '',
579
+ '## 5. Soil Properties',
580
+ soilRes.error ? '*Data unavailable*' : soilRes.result.slice(0, 3000),
581
+ '',
582
+ '## 6. Biodiversity',
583
+ bioRes.error ? '*Data unavailable*' : bioRes.result.slice(0, 3000),
584
+ '',
585
+ '---',
586
+ `*Generated by kbot environmental assessment pipeline | ${new Date().toISOString().split('T')[0]}*`,
587
+ ].join('\n');
588
+ },
589
+ });
590
+ // ══════════════════════════════════════════════════════════════════════════
591
+ // 5. Materials Discovery
592
+ // ══════════════════════════════════════════════════════════════════════════
593
+ registerTool({
594
+ name: 'materials_discovery',
595
+ description: 'Materials investigation pipeline. Chains: compound_search -> compound_properties -> material_properties -> crystal_structure -> thermodynamics_data. Finds a compound, gets properties, checks materials data, crystal info, and thermodynamic stability.',
596
+ parameters: {
597
+ formula: { type: 'string', description: 'Chemical formula or compound name (e.g., "TiO2", "silicon carbide")', required: true },
598
+ target_property: { type: 'string', description: 'Target property focus: strength, conductivity, band_gap, thermal (optional)' },
599
+ },
600
+ tier: 'free',
601
+ timeout: 600_000,
602
+ maxResultSize: 100_000,
603
+ async execute(args) {
604
+ const formula = String(args.formula);
605
+ const targetProp = args.target_property ? String(args.target_property) : undefined;
606
+ const pipelineStart = Date.now();
607
+ const stages = [];
608
+ // ── Stage 1: Compound Search ──
609
+ const compoundRes = await runToolTimed('compound_search', { query: formula });
610
+ stages.push({ name: 'Compound Search', ...compoundRes });
611
+ // Extract CID from result for subsequent queries
612
+ const cidMatch = compoundRes.result.match(/CID[:\s]*(\d+)/i);
613
+ const cid = cidMatch ? cidMatch[1] : undefined;
614
+ // ── Stage 2: Compound Properties (parallel with material/crystal lookups) ──
615
+ const [propsRes, materialRes, crystalRes] = await Promise.all([
616
+ cid
617
+ ? runToolTimed('compound_properties', { cid })
618
+ : runToolTimed('compound_properties', { query: formula }),
619
+ runToolTimed('material_properties', { formula, limit: 5 }),
620
+ runToolTimed('crystal_structure', { query: formula, limit: 5 }),
621
+ ]);
622
+ stages.push({ name: 'Compound Properties', ...propsRes });
623
+ stages.push({ name: 'Material Properties', ...materialRes });
624
+ stages.push({ name: 'Crystal Structure', ...crystalRes });
625
+ // ── Stage 3: Thermodynamic Data ──
626
+ const thermoRes = await runToolTimed('thermodynamics_data', { compound: formula });
627
+ stages.push({ name: 'Thermodynamics', ...thermoRes });
628
+ // ── Stage 4: Element info for constituent elements ──
629
+ const elementSymbols = extractElements(formula);
630
+ const elementResults = await Promise.all(elementSymbols.slice(0, 4).map(el => runToolTimed('element_info', { element: el })));
631
+ elementResults.forEach((r, i) => {
632
+ stages.push({ name: `Element: ${elementSymbols[i]}`, ...r });
633
+ });
634
+ const pipelineDuration = Date.now() - pipelineStart;
635
+ // Property-specific analysis
636
+ let propertyFocus = '';
637
+ if (targetProp) {
638
+ const focusLabels = {
639
+ strength: 'Mechanical Strength',
640
+ conductivity: 'Electrical Conductivity',
641
+ band_gap: 'Band Gap / Electronic Properties',
642
+ thermal: 'Thermal Properties',
643
+ };
644
+ propertyFocus = `\n### Focus: ${focusLabels[targetProp] || targetProp}\n` +
645
+ extractPropertyInfo(materialRes.result + '\n' + thermoRes.result, targetProp);
646
+ }
647
+ return [
648
+ `# Materials Discovery: ${formula}`,
649
+ targetProp ? `**Target property**: ${targetProp}` : '',
650
+ `**Total duration**: ${fmtDuration(pipelineDuration)} | **Stages**: ${stages.length}`,
651
+ '',
652
+ '## Pipeline Execution',
653
+ '| Stage | Duration | Status |',
654
+ '|-------|----------|--------|',
655
+ ...stages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`),
656
+ '',
657
+ '---',
658
+ '',
659
+ '## 1. Compound Identification',
660
+ compoundRes.error ? '*Compound not found in PubChem*' : compoundRes.result.slice(0, 2000),
661
+ '',
662
+ '## 2. Physicochemical Properties',
663
+ propsRes.error ? '*Properties unavailable*' : propsRes.result.slice(0, 3000),
664
+ '',
665
+ '## 3. Materials Database',
666
+ materialRes.error ? '*No Materials Project data found (may need MP_API_KEY)*' : materialRes.result.slice(0, 3000),
667
+ '',
668
+ '## 4. Crystal Structure',
669
+ crystalRes.error ? '*No crystal structures found in COD*' : crystalRes.result.slice(0, 3000),
670
+ '',
671
+ '## 5. Thermodynamic Stability',
672
+ thermoRes.error ? '*Thermodynamic data unavailable*' : thermoRes.result.slice(0, 2000),
673
+ '',
674
+ '## 6. Constituent Elements',
675
+ elementResults.filter(r => !r.error).map(r => r.result.slice(0, 1000)).join('\n\n') || '*No element data retrieved*',
676
+ propertyFocus,
677
+ '',
678
+ '---',
679
+ `*Generated by kbot materials discovery pipeline | ${new Date().toISOString().split('T')[0]}*`,
680
+ ].filter(Boolean).join('\n');
681
+ },
682
+ });
683
+ // ══════════════════════════════════════════════════════════════════════════
684
+ // 6. Statistical Analysis
685
+ // ══════════════════════════════════════════════════════════════════════════
686
+ registerTool({
687
+ name: 'statistical_analysis',
688
+ description: 'Complete data analysis workflow. Chains: distribution_fit -> hypothesis_test -> regression_analysis -> correlation_matrix -> viz_codegen. Fits distributions, tests hypotheses, models relationships, finds correlations, and generates visualization code.',
689
+ parameters: {
690
+ data: { type: 'string', description: 'JSON array of objects, e.g. [{"x":1,"y":2},{"x":3,"y":4}]', required: true },
691
+ x_column: { type: 'string', description: 'Column name for independent variable', required: true },
692
+ y_column: { type: 'string', description: 'Column name for dependent variable', required: true },
693
+ groups: { type: 'string', description: 'Column name for grouping variable (optional, for group comparisons)' },
694
+ },
695
+ tier: 'free',
696
+ timeout: 300_000,
697
+ maxResultSize: 100_000,
698
+ async execute(args) {
699
+ const dataStr = String(args.data);
700
+ const xCol = String(args.x_column);
701
+ const yCol = String(args.y_column);
702
+ const groupCol = args.groups ? String(args.groups) : undefined;
703
+ const pipelineStart = Date.now();
704
+ const stages = [];
705
+ // Parse data
706
+ let data;
707
+ try {
708
+ const parsed = JSON.parse(dataStr);
709
+ if (!Array.isArray(parsed))
710
+ throw new Error('Data must be a JSON array');
711
+ data = parsed;
712
+ }
713
+ catch (e) {
714
+ return `**Error**: Could not parse data as JSON array. ${e instanceof Error ? e.message : String(e)}`;
715
+ }
716
+ if (data.length === 0)
717
+ return '**Error**: Empty dataset provided.';
718
+ // Extract numeric columns
719
+ const xValues = data.map(d => Number(d[xCol])).filter(n => !isNaN(n));
720
+ const yValues = data.map(d => Number(d[yCol])).filter(n => !isNaN(n));
721
+ if (xValues.length === 0 || yValues.length === 0) {
722
+ return `**Error**: Could not extract numeric values from columns "${xCol}" and/or "${yCol}".`;
723
+ }
724
+ const xStr = xValues.join(',');
725
+ const yStr = yValues.join(',');
726
+ // ── Stage 1: Distribution Fit ──
727
+ const [xDistRes, yDistRes] = await Promise.all([
728
+ runToolTimed('distribution_fit', { data: xStr, distributions: 'normal,lognormal,exponential,uniform' }),
729
+ runToolTimed('distribution_fit', { data: yStr, distributions: 'normal,lognormal,exponential,uniform' }),
730
+ ]);
731
+ stages.push({ name: `Distribution Fit (${xCol})`, ...xDistRes });
732
+ stages.push({ name: `Distribution Fit (${yCol})`, ...yDistRes });
733
+ // ── Stage 2: Hypothesis Test ──
734
+ const hypothesisRes = await runToolTimed('hypothesis_test', {
735
+ test: 'two_sample_t',
736
+ sample1: xStr,
737
+ sample2: yStr,
738
+ alpha: 0.05,
739
+ });
740
+ stages.push({ name: 'Hypothesis Test', ...hypothesisRes });
741
+ // ── Stage 3: Regression Analysis + Correlation (parallel) ──
742
+ const [regressionRes, correlationRes] = await Promise.all([
743
+ runToolTimed('regression_analysis', {
744
+ x: xStr,
745
+ y: yStr,
746
+ model: 'linear',
747
+ }),
748
+ runToolTimed('correlation_matrix', {
749
+ data: dataStr,
750
+ columns: groupCol ? `${xCol},${yCol},${groupCol}` : `${xCol},${yCol}`,
751
+ method: 'pearson',
752
+ }),
753
+ ]);
754
+ stages.push({ name: 'Regression Analysis', ...regressionRes });
755
+ stages.push({ name: 'Correlation Matrix', ...correlationRes });
756
+ // ── Stage 4: Visualization Code ──
757
+ const vizRes = await runToolTimed('viz_codegen', {
758
+ data: dataStr,
759
+ chart_type: 'scatter',
760
+ x: xCol,
761
+ y: yCol,
762
+ title: `${yCol} vs ${xCol}`,
763
+ library: 'matplotlib',
764
+ });
765
+ stages.push({ name: 'Visualization', ...vizRes });
766
+ const pipelineDuration = Date.now() - pipelineStart;
767
+ return [
768
+ `# Statistical Analysis: ${yCol} vs ${xCol}`,
769
+ `**Observations**: ${data.length} | **Duration**: ${fmtDuration(pipelineDuration)}`,
770
+ groupCol ? `**Grouping variable**: ${groupCol}` : '',
771
+ '',
772
+ '## Pipeline Execution',
773
+ '| Stage | Duration | Status |',
774
+ '|-------|----------|--------|',
775
+ ...stages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`),
776
+ '',
777
+ '---',
778
+ '',
779
+ '## 1. Distribution Analysis',
780
+ `### ${xCol}`,
781
+ xDistRes.error ? '*Fitting failed*' : xDistRes.result.slice(0, 2000),
782
+ '',
783
+ `### ${yCol}`,
784
+ yDistRes.error ? '*Fitting failed*' : yDistRes.result.slice(0, 2000),
785
+ '',
786
+ '## 2. Hypothesis Testing',
787
+ hypothesisRes.error ? '*Test failed*' : hypothesisRes.result.slice(0, 2000),
788
+ '',
789
+ '## 3. Regression Analysis',
790
+ regressionRes.error ? '*Regression failed*' : regressionRes.result.slice(0, 3000),
791
+ '',
792
+ '## 4. Correlation Matrix',
793
+ correlationRes.error ? '*Correlation computation failed*' : correlationRes.result.slice(0, 2000),
794
+ '',
795
+ '## 5. Visualization Code',
796
+ vizRes.error ? '*Code generation failed*' : vizRes.result.slice(0, 4000),
797
+ '',
798
+ '---',
799
+ `*Generated by kbot statistical analysis pipeline | ${new Date().toISOString().split('T')[0]}*`,
800
+ ].filter(Boolean).join('\n');
801
+ },
802
+ });
803
+ // ══════════════════════════════════════════════════════════════════════════
804
+ // 7. Astronomy Investigation
805
+ // ══════════════════════════════════════════════════════════════════════════
806
+ registerTool({
807
+ name: 'astronomy_investigation',
808
+ description: 'Celestial object deep dive. Chains: astronomy_query -> orbit_calculator -> physical_constants -> relativity_calc. Finds an object, calculates orbital parameters, looks up relevant physics, and computes relativistic effects.',
809
+ parameters: {
810
+ object: { type: 'string', description: 'Celestial object name or designation (e.g., "Proxima Centauri", "Jupiter", "GJ 1214 b")', required: true },
811
+ investigation_type: { type: 'string', description: 'Investigation type: star, exoplanet, orbit, binary', required: true },
812
+ },
813
+ tier: 'free',
814
+ timeout: 300_000,
815
+ maxResultSize: 100_000,
816
+ async execute(args) {
817
+ const object = String(args.object);
818
+ const invType = String(args.investigation_type || 'star');
819
+ const pipelineStart = Date.now();
820
+ const stages = [];
821
+ // ── Stage 1: Astronomy Query (SIMBAD / object identification) ──
822
+ const astroRes = await runToolTimed('astronomy_query', {
823
+ object,
824
+ catalog: 'simbad',
825
+ });
826
+ stages.push({ name: 'Object Identification', ...astroRes });
827
+ // ── Stage 2: Orbit Calculator (for planets / exoplanets) ──
828
+ let orbitRes = null;
829
+ if (invType === 'exoplanet' || invType === 'orbit') {
830
+ // Try to extract orbital parameters or use object name
831
+ orbitRes = await runToolTimed('orbit_calculator', {
832
+ body: object.toLowerCase(),
833
+ calculation: 'orbital_elements',
834
+ });
835
+ stages.push({ name: 'Orbital Mechanics', ...orbitRes });
836
+ }
837
+ else if (invType === 'binary') {
838
+ orbitRes = await runToolTimed('orbit_calculator', {
839
+ body: object.toLowerCase(),
840
+ calculation: 'binary_orbit',
841
+ });
842
+ stages.push({ name: 'Binary Orbit', ...orbitRes });
843
+ }
844
+ // ── Stage 3: Physical Constants (relevant to the investigation) ──
845
+ const constantQueries = [];
846
+ if (invType === 'star') {
847
+ constantQueries.push('stefan-boltzmann', 'solar luminosity', 'solar mass');
848
+ }
849
+ else if (invType === 'exoplanet') {
850
+ constantQueries.push('gravitational constant', 'earth mass', 'solar mass');
851
+ }
852
+ else if (invType === 'orbit') {
853
+ constantQueries.push('gravitational constant', 'speed of light', 'astronomical unit');
854
+ }
855
+ else if (invType === 'binary') {
856
+ constantQueries.push('gravitational constant', 'speed of light', 'solar mass');
857
+ }
858
+ const constantResults = await Promise.all(constantQueries.map(q => runToolTimed('physical_constants', { query: q })));
859
+ constantResults.forEach((r, i) => {
860
+ stages.push({ name: `Constant: ${constantQueries[i]}`, ...r });
861
+ });
862
+ // ── Stage 4: Relativistic Effects ──
863
+ // Compute gravitational effects if we have mass data
864
+ let relativityRes = null;
865
+ if (invType === 'star' || invType === 'binary') {
866
+ // Time dilation near the object
867
+ relativityRes = await runToolTimed('relativity_calc', {
868
+ calculation: 'gravitational_time_dilation',
869
+ mass_kg: 1.989e30, // solar mass as default
870
+ radius_m: 6.957e8, // solar radius as default
871
+ });
872
+ stages.push({ name: 'Relativistic Effects', ...relativityRes });
873
+ }
874
+ else if (invType === 'exoplanet' || invType === 'orbit') {
875
+ // Escape velocity / gravitational effects
876
+ relativityRes = await runToolTimed('relativity_calc', {
877
+ calculation: 'escape_velocity',
878
+ mass_kg: 5.972e24, // earth mass as default
879
+ radius_m: 6.371e6, // earth radius as default
880
+ });
881
+ stages.push({ name: 'Relativistic Effects', ...relativityRes });
882
+ }
883
+ // ── Stage 5: Literature search for the object ──
884
+ const litRes = await runToolTimed('literature_search', {
885
+ query: `"${object}" astronomy`,
886
+ database: 'openalex',
887
+ limit: 5,
888
+ });
889
+ stages.push({ name: 'Recent Literature', ...litRes });
890
+ const pipelineDuration = Date.now() - pipelineStart;
891
+ return [
892
+ `# Astronomy Investigation: ${object}`,
893
+ `**Type**: ${invType} | **Duration**: ${fmtDuration(pipelineDuration)} | **Stages**: ${stages.length}`,
894
+ '',
895
+ '## Pipeline Execution',
896
+ '| Stage | Duration | Status |',
897
+ '|-------|----------|--------|',
898
+ ...stages.map(s => `| ${s.name} | ${fmtDuration(s.durationMs)} | ${s.error ? 'Error' : 'OK'} |`),
899
+ '',
900
+ '---',
901
+ '',
902
+ '## 1. Object Identification',
903
+ astroRes.error ? '*Object not found in SIMBAD database*' : astroRes.result.slice(0, 3000),
904
+ '',
905
+ ...(orbitRes
906
+ ? ['## 2. Orbital Mechanics', orbitRes.error ? '*Orbit data unavailable*' : orbitRes.result.slice(0, 3000), '']
907
+ : []),
908
+ '',
909
+ '## 3. Relevant Physical Constants',
910
+ constantResults.filter(r => !r.error).map(r => r.result.slice(0, 500)).join('\n\n') || '*No constants retrieved*',
911
+ '',
912
+ '## 4. Relativistic Effects',
913
+ relativityRes
914
+ ? (relativityRes.error ? '*Calculation failed*' : relativityRes.result.slice(0, 2000))
915
+ : '*Not applicable for this investigation type*',
916
+ '',
917
+ '## 5. Recent Literature',
918
+ litRes.error ? '*Literature search failed*' : litRes.result.slice(0, 3000),
919
+ '',
920
+ '---',
921
+ `*Generated by kbot astronomy investigation pipeline | ${new Date().toISOString().split('T')[0]}*`,
922
+ ].join('\n');
923
+ },
924
+ });
925
+ // ══════════════════════════════════════════════════════════════════════════
926
+ // 8. Cross-Domain Search
927
+ // ══════════════════════════════════════════════════════════════════════════
928
+ registerTool({
929
+ name: 'cross_domain_search',
930
+ description: 'Search across ALL scientific databases simultaneously: literature_search + pubmed_search + compound_search + gene_lookup + earthquake_query + astronomy_query in parallel. Finds connections across fields for a given topic.',
931
+ parameters: {
932
+ query: { type: 'string', description: 'Search query spanning scientific domains', required: true },
933
+ max_results_per_source: { type: 'number', description: 'Max results per source (default 3)' },
934
+ },
935
+ tier: 'free',
936
+ timeout: 300_000,
937
+ maxResultSize: 100_000,
938
+ async execute(args) {
939
+ const query = String(args.query);
940
+ const maxPerSource = typeof args.max_results_per_source === 'number'
941
+ ? Math.min(args.max_results_per_source, 10)
942
+ : 3;
943
+ const pipelineStart = Date.now();
944
+ // ── All sources in parallel ──
945
+ const [litRes, pubmedRes, compoundRes, geneRes, earthquakeRes, astronomyRes,] = await Promise.all([
946
+ runToolTimed('literature_search', {
947
+ query,
948
+ database: 'openalex',
949
+ limit: maxPerSource,
950
+ }),
951
+ runToolTimed('pubmed_search', {
952
+ query,
953
+ limit: maxPerSource,
954
+ sort: 'relevance',
955
+ }),
956
+ runToolTimed('compound_search', {
957
+ query,
958
+ }),
959
+ runToolTimed('gene_lookup', {
960
+ query,
961
+ organism: 'any',
962
+ }),
963
+ runToolTimed('earthquake_query', {
964
+ query,
965
+ days: 365,
966
+ limit: maxPerSource,
967
+ }),
968
+ runToolTimed('astronomy_query', {
969
+ object: query,
970
+ catalog: 'simbad',
971
+ }),
972
+ ]);
973
+ const allSources = [
974
+ { name: 'OpenAlex (Literature)', ...litRes, domain: 'Academic Literature' },
975
+ { name: 'PubMed (Biomedical)', ...pubmedRes, domain: 'Biomedical Sciences' },
976
+ { name: 'PubChem (Chemistry)', ...compoundRes, domain: 'Chemistry' },
977
+ { name: 'NCBI Gene', ...geneRes, domain: 'Genomics' },
978
+ { name: 'USGS (Seismic)', ...earthquakeRes, domain: 'Earth Science' },
979
+ { name: 'SIMBAD (Astronomy)', ...astronomyRes, domain: 'Astronomy' },
980
+ ];
981
+ const pipelineDuration = Date.now() - pipelineStart;
982
+ const hitSources = allSources.filter(s => !s.error && !s.result.includes('not found') && !s.result.includes('No '));
983
+ const missSources = allSources.filter(s => s.error || s.result.includes('not found') || s.result.includes('No '));
984
+ // ── Cross-domain connection analysis ──
985
+ const connections = [];
986
+ const allText = allSources.filter(s => !s.error).map(s => s.result).join('\n');
987
+ // Look for shared terms across domains
988
+ const domainsWithHits = hitSources.map(s => s.domain);
989
+ if (domainsWithHits.length >= 3) {
990
+ connections.push(`Query "${query}" has relevance across ${domainsWithHits.length} scientific domains: ${domainsWithHits.join(', ')}`);
991
+ }
992
+ if (domainsWithHits.length >= 2) {
993
+ connections.push(`Cross-disciplinary potential: ${domainsWithHits.join(' + ')} intersection`);
994
+ }
995
+ if (domainsWithHits.length === 1) {
996
+ connections.push(`Results concentrated in ${domainsWithHits[0]}; consider narrowing search for other domains`);
997
+ }
998
+ if (domainsWithHits.length === 0) {
999
+ connections.push('No significant hits across any domain. Try a more specific or differently-phrased query.');
1000
+ }
1001
+ return [
1002
+ `# Cross-Domain Scientific Search: "${query}"`,
1003
+ `**Sources queried**: ${allSources.length} | **Hits**: ${hitSources.length} | **Duration**: ${fmtDuration(pipelineDuration)}`,
1004
+ `**Max results per source**: ${maxPerSource}`,
1005
+ '',
1006
+ '## Source Overview',
1007
+ '| Source | Domain | Duration | Status |',
1008
+ '|--------|--------|----------|--------|',
1009
+ ...allSources.map(s => `| ${s.name} | ${s.domain} | ${fmtDuration(s.durationMs)} | ${s.error ? 'No data' : 'Hit'} |`),
1010
+ '',
1011
+ '## Cross-Domain Connections',
1012
+ connections.map(c => `- ${c}`).join('\n'),
1013
+ '',
1014
+ '---',
1015
+ '',
1016
+ '## Results by Domain',
1017
+ '',
1018
+ ...allSources.map(s => [
1019
+ `### ${s.name}`,
1020
+ s.error
1021
+ ? `*No results found in ${s.domain} for this query.*`
1022
+ : s.result.slice(0, 2500),
1023
+ '',
1024
+ ]).flat(),
1025
+ '---',
1026
+ `*Generated by kbot cross-domain search pipeline | ${new Date().toISOString().split('T')[0]}*`,
1027
+ ].join('\n');
1028
+ },
1029
+ });
1030
+ }
1031
+ // ─── Utility Functions ──────────────────────────────────────────────────────
1032
+ /** Extract key terms from combined text, excluding the search topic itself */
1033
+ function extractKeyTerms(text, topic) {
1034
+ const topicWords = new Set(topic.toLowerCase().split(/\s+/));
1035
+ const stopWords = new Set([
1036
+ 'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'her',
1037
+ 'was', 'one', 'our', 'out', 'has', 'had', 'been', 'this', 'that', 'with',
1038
+ 'from', 'have', 'were', 'they', 'their', 'which', 'will', 'each', 'about',
1039
+ 'more', 'also', 'into', 'over', 'such', 'than', 'most', 'other', 'some',
1040
+ 'time', 'very', 'when', 'come', 'could', 'made', 'after', 'only', 'these',
1041
+ 'results', 'using', 'study', 'found', 'used', 'between', 'based', 'however',
1042
+ 'showed', 'total', 'including', 'abstract', 'pubmed', 'doi', 'http', 'https',
1043
+ 'journal', 'vol', 'page', 'author', 'year', 'title', 'published', 'article',
1044
+ ]);
1045
+ // Find capitalized multi-word terms (potential technical terms)
1046
+ const termCounts = new Map();
1047
+ const words = text.split(/\s+/);
1048
+ for (let i = 0; i < words.length; i++) {
1049
+ const word = words[i].replace(/[^a-zA-Z0-9-]/g, '').toLowerCase();
1050
+ if (word.length < 3 || stopWords.has(word) || topicWords.has(word))
1051
+ continue;
1052
+ if (/^\d+$/.test(word))
1053
+ continue;
1054
+ // Check for multi-word terms (bigrams)
1055
+ if (i + 1 < words.length) {
1056
+ const next = words[i + 1].replace(/[^a-zA-Z0-9-]/g, '').toLowerCase();
1057
+ if (next.length >= 3 && !stopWords.has(next) && !topicWords.has(next)) {
1058
+ const bigram = `${word} ${next}`;
1059
+ termCounts.set(bigram, (termCounts.get(bigram) || 0) + 1);
1060
+ }
1061
+ }
1062
+ termCounts.set(word, (termCounts.get(word) || 0) + 1);
1063
+ }
1064
+ return [...termCounts.entries()]
1065
+ .filter(([_, count]) => count >= 3)
1066
+ .sort((a, b) => b[1] - a[1])
1067
+ .slice(0, 15)
1068
+ .map(([term, count]) => ({ term, count }));
1069
+ }
1070
+ /** Extract element symbols from a chemical formula */
1071
+ function extractElements(formula) {
1072
+ const elementPattern = /([A-Z][a-z]?)/g;
1073
+ const matches = formula.match(elementPattern) || [];
1074
+ return [...new Set(matches)];
1075
+ }
1076
+ /** Extract property-specific information from results text */
1077
+ function extractPropertyInfo(text, property) {
1078
+ const propKeywords = {
1079
+ strength: ['hardness', 'tensile', 'yield', 'modulus', 'elastic', 'bulk modulus', 'shear'],
1080
+ conductivity: ['conductivity', 'resistivity', 'band gap', 'semiconductor', 'metallic', 'insulator'],
1081
+ band_gap: ['band gap', 'band_gap', 'eV', 'direct gap', 'indirect gap', 'semiconductor', 'electronic'],
1082
+ thermal: ['thermal', 'heat capacity', 'melting', 'boiling', 'enthalpy', 'entropy', 'Cp', 'conductivity'],
1083
+ };
1084
+ const keywords = propKeywords[property] || [];
1085
+ const relevantLines = text.split('\n').filter(line => {
1086
+ const lower = line.toLowerCase();
1087
+ return keywords.some(kw => lower.includes(kw));
1088
+ });
1089
+ if (relevantLines.length === 0) {
1090
+ return `*No specific ${property} data found in the retrieved results. Consider checking specialized materials databases.*`;
1091
+ }
1092
+ return relevantLines.slice(0, 10).join('\n');
1093
+ }
1094
+ //# sourceMappingURL=research-pipeline.js.map