@kernel.chat/kbot 3.42.0 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1165 @@
1
+ // kbot Research Notebook — Reproducible computation tracking & shareable notebooks
2
+ // Tracks every research step for full provenance. Exports to Jupyter, R Markdown,
3
+ // Markdown, HTML, and LaTeX. Zero external deps.
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ import { homedir } from 'node:os';
7
+ import { randomUUID } from 'node:crypto';
8
+ import { registerTool } from './index.js';
9
+ // ── Storage helpers ──────────────────────────────────────────────────────────
10
+ const NOTEBOOKS_DIR = join(homedir(), '.kbot', 'research-notebooks');
11
+ function ensureDir() {
12
+ if (!existsSync(NOTEBOOKS_DIR)) {
13
+ mkdirSync(NOTEBOOKS_DIR, { recursive: true });
14
+ }
15
+ }
16
+ function notebookPath(id) {
17
+ return join(NOTEBOOKS_DIR, `${id}.json`);
18
+ }
19
+ function loadNotebook(id) {
20
+ const p = notebookPath(id);
21
+ if (!existsSync(p))
22
+ return null;
23
+ try {
24
+ return JSON.parse(readFileSync(p, 'utf-8'));
25
+ }
26
+ catch {
27
+ return null;
28
+ }
29
+ }
30
+ function saveNotebook(nb) {
31
+ ensureDir();
32
+ writeFileSync(notebookPath(nb.id), JSON.stringify(nb, null, 2));
33
+ }
34
+ function loadAllNotebooks() {
35
+ ensureDir();
36
+ const files = readdirSync(NOTEBOOKS_DIR).filter(f => f.endsWith('.json'));
37
+ const notebooks = [];
38
+ for (const f of files) {
39
+ try {
40
+ const nb = JSON.parse(readFileSync(join(NOTEBOOKS_DIR, f), 'utf-8'));
41
+ notebooks.push(nb);
42
+ }
43
+ catch { /* skip corrupt files */ }
44
+ }
45
+ return notebooks;
46
+ }
47
+ const VALID_STEP_TYPES = ['observation', 'hypothesis', 'experiment', 'analysis', 'result', 'conclusion', 'note'];
48
+ function extractReferences(nb) {
49
+ const refs = [];
50
+ const seen = new Set();
51
+ for (const step of nb.steps) {
52
+ const text = `${step.content} ${step.data || ''}`;
53
+ // DOIs
54
+ const doiMatches = text.match(/\b10\.\d{4,}\/[^\s,;]+/g) || [];
55
+ for (const doi of doiMatches) {
56
+ const cleaned = doi.replace(/[.)>\]]+$/, '');
57
+ if (!seen.has(`doi:${cleaned}`)) {
58
+ seen.add(`doi:${cleaned}`);
59
+ refs.push({ type: 'doi', value: cleaned, stepId: step.id });
60
+ }
61
+ }
62
+ // PMIDs
63
+ const pmidMatches = text.match(/PMID[:\s]*(\d{6,})/gi) || [];
64
+ for (const m of pmidMatches) {
65
+ const id = m.replace(/PMID[:\s]*/i, '');
66
+ if (!seen.has(`pmid:${id}`)) {
67
+ seen.add(`pmid:${id}`);
68
+ refs.push({ type: 'pmid', value: id, stepId: step.id });
69
+ }
70
+ }
71
+ // URLs (non-DOI)
72
+ const urlMatches = text.match(/https?:\/\/[^\s,;)>\]]+/g) || [];
73
+ for (const url of urlMatches) {
74
+ const cleaned = url.replace(/[.)>\]]+$/, '');
75
+ if (!seen.has(`url:${cleaned}`) && !cleaned.includes('doi.org')) {
76
+ seen.add(`url:${cleaned}`);
77
+ refs.push({ type: 'url', value: cleaned, stepId: step.id });
78
+ }
79
+ }
80
+ }
81
+ return refs;
82
+ }
83
+ function formatCitation(ref, style, index) {
84
+ const num = index + 1;
85
+ switch (style) {
86
+ case 'apa':
87
+ if (ref.type === 'doi')
88
+ return `[${num}] https://doi.org/${ref.value}`;
89
+ if (ref.type === 'pmid')
90
+ return `[${num}] PubMed ID: ${ref.value}. https://pubmed.ncbi.nlm.nih.gov/${ref.value}/`;
91
+ return `[${num}] Retrieved from ${ref.value}`;
92
+ case 'mla':
93
+ if (ref.type === 'doi')
94
+ return `${num}. Web. doi:${ref.value}.`;
95
+ if (ref.type === 'pmid')
96
+ return `${num}. PubMed, PMID ${ref.value}.`;
97
+ return `${num}. Web. <${ref.value}>.`;
98
+ case 'chicago':
99
+ if (ref.type === 'doi')
100
+ return `${num}. https://doi.org/${ref.value}.`;
101
+ if (ref.type === 'pmid')
102
+ return `${num}. PubMed PMID: ${ref.value}. https://pubmed.ncbi.nlm.nih.gov/${ref.value}/.`;
103
+ return `${num}. ${ref.value}.`;
104
+ case 'bibtex': {
105
+ const key = ref.type === 'doi' ? ref.value.replace(/[^a-zA-Z0-9]/g, '_') : `ref_${num}`;
106
+ if (ref.type === 'doi')
107
+ return `@article{${key},\n doi = {${ref.value}},\n url = {https://doi.org/${ref.value}}\n}`;
108
+ if (ref.type === 'pmid')
109
+ return `@article{pmid_${ref.value},\n note = {PMID: ${ref.value}},\n url = {https://pubmed.ncbi.nlm.nih.gov/${ref.value}/}\n}`;
110
+ return `@misc{${key},\n url = {${ref.value}}\n}`;
111
+ }
112
+ case 'vancouver':
113
+ if (ref.type === 'doi')
114
+ return `${num}. doi: ${ref.value}.`;
115
+ if (ref.type === 'pmid')
116
+ return `${num}. PubMed PMID: ${ref.value}.`;
117
+ return `${num}. Available from: ${ref.value}.`;
118
+ default:
119
+ return `[${num}] ${ref.value}`;
120
+ }
121
+ }
122
+ // ── Export helpers ────────────────────────────────────────────────────────────
123
+ function stepIcon(type) {
124
+ const icons = {
125
+ observation: 'eye',
126
+ hypothesis: 'lightbulb',
127
+ experiment: 'flask',
128
+ analysis: 'chart',
129
+ result: 'check',
130
+ conclusion: 'star',
131
+ note: 'memo',
132
+ };
133
+ return icons[type] || 'dot';
134
+ }
135
+ function stepToMarkdown(step) {
136
+ const lines = [];
137
+ const ts = new Date(step.timestamp).toLocaleString();
138
+ lines.push(`### ${step.type.charAt(0).toUpperCase() + step.type.slice(1)} — ${ts}`);
139
+ if (step.toolUsed)
140
+ lines.push(`> Tool: \`${step.toolUsed}\``);
141
+ lines.push('');
142
+ lines.push(step.content);
143
+ if (step.data) {
144
+ lines.push('');
145
+ lines.push('```');
146
+ lines.push(step.data);
147
+ lines.push('```');
148
+ }
149
+ if (step.duration_ms != null) {
150
+ lines.push(`\n*Duration: ${step.duration_ms}ms*`);
151
+ }
152
+ lines.push('');
153
+ return lines.join('\n');
154
+ }
155
+ function isCodeStep(step) {
156
+ return ['experiment', 'analysis'].includes(step.type) && !!step.toolUsed;
157
+ }
158
+ function stepToPythonCode(step) {
159
+ const lines = [];
160
+ lines.push(`# Step: ${step.type} — ${step.toolUsed || 'manual'}`);
161
+ // Reconstruct plausible Python code from tool usage
162
+ const tool = step.toolUsed || '';
163
+ if (tool.includes('regression') || tool.includes('stats')) {
164
+ lines.push('from scipy import stats');
165
+ lines.push('import numpy as np');
166
+ lines.push('');
167
+ lines.push('# Recreating analysis step');
168
+ if (step.data) {
169
+ lines.push(`data = """${step.data.slice(0, 500)}"""`);
170
+ lines.push('# Parse and analyze data');
171
+ }
172
+ lines.push(`# ${step.content}`);
173
+ }
174
+ else if (tool.includes('plot') || tool.includes('chart') || tool.includes('visual')) {
175
+ lines.push('import matplotlib.pyplot as plt');
176
+ lines.push('');
177
+ lines.push(`# ${step.content}`);
178
+ if (step.data) {
179
+ lines.push(`data = """${step.data.slice(0, 500)}"""`);
180
+ }
181
+ }
182
+ else if (tool.includes('fetch') || tool.includes('search') || tool.includes('web')) {
183
+ lines.push('import requests');
184
+ lines.push('');
185
+ lines.push(`# ${step.content}`);
186
+ if (step.toolArgs) {
187
+ const argsStr = JSON.stringify(step.toolArgs, null, 2);
188
+ lines.push(`# Tool arguments: ${argsStr}`);
189
+ }
190
+ }
191
+ else if (tool.includes('bash') || tool.includes('shell')) {
192
+ lines.push('import subprocess');
193
+ lines.push('');
194
+ lines.push(`# ${step.content}`);
195
+ if (step.data) {
196
+ lines.push(`result = """${step.data.slice(0, 500)}"""`);
197
+ }
198
+ }
199
+ else {
200
+ lines.push('');
201
+ lines.push(`# ${step.content}`);
202
+ if (step.toolArgs) {
203
+ lines.push(`# Tool: ${tool}`);
204
+ lines.push(`# Args: ${JSON.stringify(step.toolArgs)}`);
205
+ }
206
+ if (step.data) {
207
+ lines.push(`data = """${step.data.slice(0, 500)}"""`);
208
+ }
209
+ }
210
+ return lines.join('\n');
211
+ }
212
+ function stepToRCode(step) {
213
+ const lines = [];
214
+ lines.push(`# Step: ${step.type} — ${step.toolUsed || 'manual'}`);
215
+ const tool = step.toolUsed || '';
216
+ if (tool.includes('regression') || tool.includes('stats')) {
217
+ lines.push('library(stats)');
218
+ lines.push('');
219
+ lines.push(`# ${step.content}`);
220
+ if (step.data) {
221
+ lines.push(`data_raw <- "${step.data.slice(0, 300)}"`);
222
+ lines.push('# Parse and analyze data');
223
+ }
224
+ }
225
+ else if (tool.includes('plot') || tool.includes('chart') || tool.includes('visual')) {
226
+ lines.push('library(ggplot2)');
227
+ lines.push('');
228
+ lines.push(`# ${step.content}`);
229
+ }
230
+ else {
231
+ lines.push('');
232
+ lines.push(`# ${step.content}`);
233
+ if (step.data) {
234
+ lines.push(`data_raw <- "${step.data.slice(0, 300)}"`);
235
+ }
236
+ }
237
+ return lines.join('\n');
238
+ }
239
+ function exportJupyter(nb) {
240
+ const cells = [];
241
+ // Title cell
242
+ cells.push({
243
+ cell_type: 'markdown',
244
+ metadata: {},
245
+ source: [
246
+ `# ${nb.title}\n`,
247
+ `\n`,
248
+ `${nb.description}\n`,
249
+ `\n`,
250
+ `**Field**: ${nb.field || 'General'}\n`,
251
+ `**Tags**: ${nb.tags.join(', ') || 'none'}\n`,
252
+ `**Created**: ${nb.created}\n`,
253
+ `**Modified**: ${nb.modified}\n`,
254
+ ],
255
+ });
256
+ for (const step of nb.steps) {
257
+ if (isCodeStep(step)) {
258
+ // Markdown description
259
+ cells.push({
260
+ cell_type: 'markdown',
261
+ metadata: {},
262
+ source: [`## ${step.type.charAt(0).toUpperCase() + step.type.slice(1)}\n`, `\n`, `${step.content}\n`],
263
+ });
264
+ // Code cell
265
+ const code = stepToPythonCode(step);
266
+ cells.push({
267
+ cell_type: 'code',
268
+ metadata: {},
269
+ source: code.split('\n').map((l, i, a) => i < a.length - 1 ? l + '\n' : l),
270
+ execution_count: null,
271
+ outputs: [],
272
+ });
273
+ }
274
+ else {
275
+ cells.push({
276
+ cell_type: 'markdown',
277
+ metadata: {},
278
+ source: [
279
+ `## ${step.type.charAt(0).toUpperCase() + step.type.slice(1)}\n`,
280
+ `\n`,
281
+ `${step.content}\n`,
282
+ ...(step.data ? [`\n`, '```\n', `${step.data}\n`, '```\n'] : []),
283
+ ...(step.toolUsed ? [`\n`, `*Tool: ${step.toolUsed}*\n`] : []),
284
+ ],
285
+ });
286
+ }
287
+ }
288
+ const ipynb = {
289
+ nbformat: 4,
290
+ nbformat_minor: 5,
291
+ metadata: {
292
+ kernelspec: {
293
+ display_name: 'Python 3',
294
+ language: 'python',
295
+ name: 'python3',
296
+ },
297
+ language_info: {
298
+ name: 'python',
299
+ version: '3.11.0',
300
+ },
301
+ kbot: {
302
+ notebook_id: nb.id,
303
+ exported: new Date().toISOString(),
304
+ },
305
+ },
306
+ cells,
307
+ };
308
+ return JSON.stringify(ipynb, null, 2);
309
+ }
310
+ function exportRMarkdown(nb) {
311
+ const lines = [];
312
+ // YAML front matter
313
+ lines.push('---');
314
+ lines.push(`title: "${nb.title}"`);
315
+ lines.push(`date: "${nb.created}"`);
316
+ lines.push('output:');
317
+ lines.push(' html_document:');
318
+ lines.push(' toc: true');
319
+ lines.push(' toc_float: true');
320
+ if (nb.field)
321
+ lines.push(`params:`);
322
+ if (nb.field)
323
+ lines.push(` field: "${nb.field}"`);
324
+ lines.push('---');
325
+ lines.push('');
326
+ lines.push(`# ${nb.title}`);
327
+ lines.push('');
328
+ lines.push(nb.description);
329
+ lines.push('');
330
+ for (const step of nb.steps) {
331
+ const ts = new Date(step.timestamp).toLocaleString();
332
+ lines.push(`## ${step.type.charAt(0).toUpperCase() + step.type.slice(1)} — ${ts}`);
333
+ lines.push('');
334
+ lines.push(step.content);
335
+ lines.push('');
336
+ if (isCodeStep(step)) {
337
+ lines.push('```{r}');
338
+ lines.push(stepToRCode(step));
339
+ lines.push('```');
340
+ lines.push('');
341
+ }
342
+ else if (step.data) {
343
+ lines.push('```');
344
+ lines.push(step.data);
345
+ lines.push('```');
346
+ lines.push('');
347
+ }
348
+ if (step.toolUsed) {
349
+ lines.push(`*Tool: ${step.toolUsed}*`);
350
+ lines.push('');
351
+ }
352
+ }
353
+ return lines.join('\n');
354
+ }
355
+ function exportMarkdown(nb) {
356
+ const lines = [];
357
+ lines.push(`# ${nb.title}`);
358
+ lines.push('');
359
+ lines.push(nb.description);
360
+ lines.push('');
361
+ lines.push(`**Field**: ${nb.field || 'General'} | **Tags**: ${nb.tags.join(', ') || 'none'}`);
362
+ lines.push(`**Created**: ${nb.created} | **Modified**: ${nb.modified}`);
363
+ lines.push('');
364
+ lines.push('---');
365
+ lines.push('');
366
+ for (const step of nb.steps) {
367
+ lines.push(stepToMarkdown(step));
368
+ }
369
+ return lines.join('\n');
370
+ }
371
+ function escapeHtml(text) {
372
+ return text
373
+ .replace(/&/g, '&amp;')
374
+ .replace(/</g, '&lt;')
375
+ .replace(/>/g, '&gt;')
376
+ .replace(/"/g, '&quot;');
377
+ }
378
+ function exportHtml(nb) {
379
+ const stepTypeColor = {
380
+ observation: '#4a90d9',
381
+ hypothesis: '#e8a838',
382
+ experiment: '#7b68ee',
383
+ analysis: '#50c878',
384
+ result: '#ff6b6b',
385
+ conclusion: '#6b5b95',
386
+ note: '#999',
387
+ };
388
+ let stepsHtml = '';
389
+ for (const step of nb.steps) {
390
+ const ts = new Date(step.timestamp).toLocaleString();
391
+ const color = stepTypeColor[step.type] || '#666';
392
+ const label = step.type.charAt(0).toUpperCase() + step.type.slice(1);
393
+ stepsHtml += `
394
+ <div style="margin-bottom:24px;padding:16px;border-left:4px solid ${color};background:#fafafa;border-radius:0 6px 6px 0;">
395
+ <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px;">
396
+ <span style="font-weight:bold;color:${color};font-size:14px;text-transform:uppercase;letter-spacing:0.5px;">${escapeHtml(label)}</span>
397
+ <span style="color:#888;font-size:12px;">${escapeHtml(ts)}</span>
398
+ </div>
399
+ <p style="margin:0 0 8px 0;line-height:1.6;">${escapeHtml(step.content)}</p>
400
+ ${step.toolUsed ? `<div style="font-size:12px;color:#666;margin-top:4px;">Tool: <code>${escapeHtml(step.toolUsed)}</code></div>` : ''}
401
+ ${step.data ? `<pre style="background:#f0f0f0;padding:12px;border-radius:4px;overflow-x:auto;font-size:13px;margin-top:8px;">${escapeHtml(step.data)}</pre>` : ''}
402
+ ${step.duration_ms != null ? `<div style="font-size:11px;color:#aaa;margin-top:4px;">Duration: ${step.duration_ms}ms</div>` : ''}
403
+ </div>`;
404
+ }
405
+ return `<!DOCTYPE html>
406
+ <html lang="en">
407
+ <head>
408
+ <meta charset="UTF-8">
409
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
410
+ <title>${escapeHtml(nb.title)}</title>
411
+ <style>
412
+ * { box-sizing: border-box; margin: 0; padding: 0; }
413
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 0 auto; padding: 40px 20px; color: #333; line-height: 1.6; }
414
+ h1 { margin-bottom: 8px; }
415
+ .meta { color: #666; font-size: 14px; margin-bottom: 24px; }
416
+ .meta span { margin-right: 16px; }
417
+ hr { border: none; border-top: 1px solid #eee; margin: 24px 0; }
418
+ code { background: #f0f0f0; padding: 2px 6px; border-radius: 3px; font-size: 13px; }
419
+ pre { white-space: pre-wrap; word-wrap: break-word; }
420
+ </style>
421
+ </head>
422
+ <body>
423
+ <h1>${escapeHtml(nb.title)}</h1>
424
+ <div class="meta">
425
+ <span><strong>Field:</strong> ${escapeHtml(nb.field || 'General')}</span>
426
+ <span><strong>Tags:</strong> ${escapeHtml(nb.tags.join(', ') || 'none')}</span>
427
+ <span><strong>Steps:</strong> ${nb.steps.length}</span>
428
+ </div>
429
+ <p>${escapeHtml(nb.description)}</p>
430
+ <hr>
431
+ ${stepsHtml}
432
+ <hr>
433
+ <div style="font-size:12px;color:#aaa;text-align:center;margin-top:24px;">
434
+ Generated by K:BOT Research Notebook &mdash; ${new Date().toISOString()}
435
+ </div>
436
+ </body>
437
+ </html>`;
438
+ }
439
+ function escapeLatex(text) {
440
+ return text
441
+ .replace(/\\/g, '\\textbackslash{}')
442
+ .replace(/[&%$#_{}]/g, m => '\\' + m)
443
+ .replace(/~/g, '\\textasciitilde{}')
444
+ .replace(/\^/g, '\\textasciicircum{}');
445
+ }
446
+ function exportLatex(nb) {
447
+ const lines = [];
448
+ lines.push('\\documentclass[12pt,a4paper]{article}');
449
+ lines.push('\\usepackage[utf8]{inputenc}');
450
+ lines.push('\\usepackage[T1]{fontenc}');
451
+ lines.push('\\usepackage{geometry}');
452
+ lines.push('\\geometry{margin=1in}');
453
+ lines.push('\\usepackage{hyperref}');
454
+ lines.push('\\usepackage{listings}');
455
+ lines.push('\\usepackage{xcolor}');
456
+ lines.push('\\usepackage{graphicx}');
457
+ lines.push('');
458
+ lines.push('\\lstset{');
459
+ lines.push(' basicstyle=\\ttfamily\\small,');
460
+ lines.push(' breaklines=true,');
461
+ lines.push(' frame=single,');
462
+ lines.push(' backgroundcolor=\\color{gray!10}');
463
+ lines.push('}');
464
+ lines.push('');
465
+ lines.push(`\\title{${escapeLatex(nb.title)}}`);
466
+ lines.push(`\\date{${escapeLatex(nb.created)}}`);
467
+ lines.push('');
468
+ lines.push('\\begin{document}');
469
+ lines.push('\\maketitle');
470
+ lines.push('');
471
+ lines.push('\\begin{abstract}');
472
+ lines.push(escapeLatex(nb.description));
473
+ lines.push('\\end{abstract}');
474
+ lines.push('');
475
+ if (nb.field || nb.tags.length > 0) {
476
+ lines.push('\\noindent');
477
+ if (nb.field)
478
+ lines.push(`\\textbf{Field:} ${escapeLatex(nb.field)} \\\\`);
479
+ if (nb.tags.length > 0)
480
+ lines.push(`\\textbf{Tags:} ${escapeLatex(nb.tags.join(', '))} \\\\`);
481
+ lines.push('');
482
+ }
483
+ lines.push('\\tableofcontents');
484
+ lines.push('\\newpage');
485
+ lines.push('');
486
+ for (const step of nb.steps) {
487
+ const ts = new Date(step.timestamp).toLocaleString();
488
+ const label = step.type.charAt(0).toUpperCase() + step.type.slice(1);
489
+ lines.push(`\\section{${escapeLatex(label)}}`);
490
+ lines.push(`\\label{step:${step.id}}`);
491
+ lines.push(`{\\small\\textit{${escapeLatex(ts)}}}`);
492
+ lines.push('');
493
+ lines.push(escapeLatex(step.content));
494
+ lines.push('');
495
+ if (step.toolUsed) {
496
+ lines.push(`\\noindent\\textbf{Tool:} \\texttt{${escapeLatex(step.toolUsed)}}`);
497
+ lines.push('');
498
+ }
499
+ if (step.data) {
500
+ lines.push('\\begin{lstlisting}');
501
+ lines.push(step.data);
502
+ lines.push('\\end{lstlisting}');
503
+ lines.push('');
504
+ }
505
+ if (step.duration_ms != null) {
506
+ lines.push(`{\\footnotesize Duration: ${step.duration_ms}ms}`);
507
+ lines.push('');
508
+ }
509
+ }
510
+ lines.push('\\end{document}');
511
+ return lines.join('\n');
512
+ }
513
+ function buildProvenanceDAG(nb) {
514
+ const nodes = nb.steps.map(step => ({
515
+ stepId: step.id,
516
+ stepType: step.type,
517
+ tool: step.toolUsed,
518
+ inputsFrom: [],
519
+ outputsTo: [],
520
+ hasData: !!step.data,
521
+ hasToolArgs: !!step.toolArgs && Object.keys(step.toolArgs).length > 0,
522
+ }));
523
+ // Build edges: experiments/analyses consume earlier observations/hypotheses/results
524
+ // Results flow from experiments/analyses
525
+ // Conclusions consume results
526
+ const dataProducers = new Set();
527
+ for (let i = 0; i < nb.steps.length; i++) {
528
+ const step = nb.steps[i];
529
+ const node = nodes[i];
530
+ // Steps that produce data
531
+ if (step.data && ['experiment', 'analysis', 'result', 'observation'].includes(step.type)) {
532
+ dataProducers.add(step.id);
533
+ }
534
+ // Link experiments/analyses to prior observations/hypotheses
535
+ if (['experiment', 'analysis'].includes(step.type)) {
536
+ for (let j = i - 1; j >= 0; j--) {
537
+ const prior = nb.steps[j];
538
+ if (['observation', 'hypothesis'].includes(prior.type)) {
539
+ node.inputsFrom.push(prior.id);
540
+ nodes[j].outputsTo.push(step.id);
541
+ }
542
+ // Link to prior data-producing steps
543
+ if (prior.data && dataProducers.has(prior.id) && prior.id !== step.id) {
544
+ if (!node.inputsFrom.includes(prior.id)) {
545
+ node.inputsFrom.push(prior.id);
546
+ nodes[j].outputsTo.push(step.id);
547
+ }
548
+ }
549
+ }
550
+ }
551
+ // Results consume experiments/analyses
552
+ if (step.type === 'result') {
553
+ for (let j = i - 1; j >= 0; j--) {
554
+ const prior = nb.steps[j];
555
+ if (['experiment', 'analysis'].includes(prior.type)) {
556
+ node.inputsFrom.push(prior.id);
557
+ nodes[j].outputsTo.push(step.id);
558
+ break; // link to most recent experiment/analysis
559
+ }
560
+ }
561
+ }
562
+ // Conclusions consume results
563
+ if (step.type === 'conclusion') {
564
+ for (let j = i - 1; j >= 0; j--) {
565
+ const prior = nb.steps[j];
566
+ if (prior.type === 'result') {
567
+ node.inputsFrom.push(prior.id);
568
+ nodes[j].outputsTo.push(step.id);
569
+ }
570
+ }
571
+ }
572
+ }
573
+ return nodes;
574
+ }
575
+ // ── Tool Registration ────────────────────────────────────────────────────────
576
+ export function registerResearchNotebookTools() {
577
+ // ── 1. notebook_create ─────────────────────────────────────────────────
578
+ registerTool({
579
+ name: 'notebook_create',
580
+ description: 'Create a new research notebook for tracking computations, experiments, and findings. Returns a notebook ID for subsequent logging.',
581
+ parameters: {
582
+ title: { type: 'string', description: 'Title of the research notebook', required: true },
583
+ description: { type: 'string', description: 'Description of the research purpose and scope', required: true },
584
+ field: { type: 'string', description: 'Research field (e.g., biology, physics, data-science)' },
585
+ tags: { type: 'string', description: 'Comma-separated tags for categorization' },
586
+ },
587
+ tier: 'free',
588
+ async execute(args) {
589
+ const title = String(args.title).trim();
590
+ const description = String(args.description).trim();
591
+ if (!title)
592
+ return 'Error: title is required';
593
+ if (!description)
594
+ return 'Error: description is required';
595
+ const now = new Date().toISOString();
596
+ const id = randomUUID().slice(0, 8);
597
+ const nb = {
598
+ id,
599
+ title,
600
+ description,
601
+ field: args.field ? String(args.field).trim() : undefined,
602
+ tags: args.tags ? String(args.tags).split(',').map(t => t.trim()).filter(Boolean) : [],
603
+ created: now,
604
+ modified: now,
605
+ steps: [],
606
+ };
607
+ saveNotebook(nb);
608
+ return [
609
+ `## Notebook Created`,
610
+ '',
611
+ `**ID**: \`${id}\``,
612
+ `**Title**: ${title}`,
613
+ `**Description**: ${description}`,
614
+ nb.field ? `**Field**: ${nb.field}` : '',
615
+ nb.tags.length > 0 ? `**Tags**: ${nb.tags.join(', ')}` : '',
616
+ '',
617
+ `Use \`notebook_log\` with notebook_id \`${id}\` to record research steps.`,
618
+ ].filter(Boolean).join('\n');
619
+ },
620
+ });
621
+ // ── 2. notebook_log ────────────────────────────────────────────────────
622
+ registerTool({
623
+ name: 'notebook_log',
624
+ description: 'Log a research step (observation, hypothesis, experiment, analysis, result, conclusion, or note) into a notebook. Timestamps every entry and tracks tool usage for reproducibility.',
625
+ parameters: {
626
+ notebook_id: { type: 'string', description: 'Notebook ID to log into', required: true },
627
+ step_type: { type: 'string', description: 'Step type: observation, hypothesis, experiment, analysis, result, conclusion, or note', required: true },
628
+ content: { type: 'string', description: 'Description of what was done or observed', required: true },
629
+ tool_used: { type: 'string', description: 'Name of the tool used for this step (e.g., web_search, regression_analysis)' },
630
+ data: { type: 'string', description: 'Raw data, output, or results to store for reproducibility' },
631
+ },
632
+ tier: 'free',
633
+ async execute(args) {
634
+ const notebookId = String(args.notebook_id).trim();
635
+ const nb = loadNotebook(notebookId);
636
+ if (!nb)
637
+ return `Error: Notebook \`${notebookId}\` not found`;
638
+ const stepType = String(args.step_type).trim().toLowerCase();
639
+ if (!VALID_STEP_TYPES.includes(stepType)) {
640
+ return `Error: Invalid step_type "${stepType}". Valid types: ${VALID_STEP_TYPES.join(', ')}`;
641
+ }
642
+ const content = String(args.content).trim();
643
+ if (!content)
644
+ return 'Error: content is required';
645
+ const step = {
646
+ id: randomUUID().slice(0, 8),
647
+ timestamp: new Date().toISOString(),
648
+ type: stepType,
649
+ content,
650
+ toolUsed: args.tool_used ? String(args.tool_used).trim() : undefined,
651
+ data: args.data ? String(args.data) : undefined,
652
+ };
653
+ nb.steps.push(step);
654
+ nb.modified = new Date().toISOString();
655
+ saveNotebook(nb);
656
+ const typeIcon = {
657
+ observation: '[OBS]',
658
+ hypothesis: '[HYP]',
659
+ experiment: '[EXP]',
660
+ analysis: '[ANA]',
661
+ result: '[RES]',
662
+ conclusion: '[CON]',
663
+ note: '[NOTE]',
664
+ };
665
+ return [
666
+ `${typeIcon[stepType]} Step logged to **${nb.title}**`,
667
+ '',
668
+ `**Step ID**: \`${step.id}\``,
669
+ `**Type**: ${stepType}`,
670
+ `**Content**: ${content.slice(0, 200)}${content.length > 200 ? '...' : ''}`,
671
+ step.toolUsed ? `**Tool**: ${step.toolUsed}` : '',
672
+ step.data ? `**Data**: ${step.data.slice(0, 100)}${step.data.length > 100 ? '...' : ''}` : '',
673
+ '',
674
+ `Notebook now has ${nb.steps.length} step${nb.steps.length === 1 ? '' : 's'}.`,
675
+ ].filter(Boolean).join('\n');
676
+ },
677
+ });
678
+ // ── 3. notebook_list ───────────────────────────────────────────────────
679
+ registerTool({
680
+ name: 'notebook_list',
681
+ description: 'List all research notebooks. Filter by field, sort by date or title.',
682
+ parameters: {
683
+ field: { type: 'string', description: 'Filter notebooks by research field' },
684
+ sort: { type: 'string', description: 'Sort order: date (default) or title', default: 'date' },
685
+ },
686
+ tier: 'free',
687
+ async execute(args) {
688
+ let notebooks = loadAllNotebooks();
689
+ if (notebooks.length === 0) {
690
+ return 'No research notebooks found. Use `notebook_create` to start one.';
691
+ }
692
+ // Filter by field
693
+ if (args.field) {
694
+ const field = String(args.field).toLowerCase();
695
+ notebooks = notebooks.filter(nb => nb.field?.toLowerCase().includes(field));
696
+ if (notebooks.length === 0) {
697
+ return `No notebooks found in field "${args.field}".`;
698
+ }
699
+ }
700
+ // Sort
701
+ const sortBy = String(args.sort || 'date').toLowerCase();
702
+ if (sortBy === 'title') {
703
+ notebooks.sort((a, b) => a.title.localeCompare(b.title));
704
+ }
705
+ else {
706
+ notebooks.sort((a, b) => new Date(b.modified).getTime() - new Date(a.modified).getTime());
707
+ }
708
+ const lines = ['## Research Notebooks', ''];
709
+ lines.push(`| ID | Title | Steps | Field | Tags | Modified |`);
710
+ lines.push(`|----|-------|-------|-------|------|----------|`);
711
+ for (const nb of notebooks) {
712
+ const modified = new Date(nb.modified).toLocaleDateString();
713
+ lines.push(`| \`${nb.id}\` | ${nb.title} | ${nb.steps.length} | ${nb.field || '-'} | ${nb.tags.join(', ') || '-'} | ${modified} |`);
714
+ }
715
+ lines.push('');
716
+ lines.push(`**Total**: ${notebooks.length} notebook${notebooks.length === 1 ? '' : 's'}`);
717
+ return lines.join('\n');
718
+ },
719
+ });
720
+ // ── 4. notebook_view ───────────────────────────────────────────────────
721
+ registerTool({
722
+ name: 'notebook_view',
723
+ description: 'View a research notebook contents. Formats: full (all steps with data), summary (key findings only), timeline (chronological step list).',
724
+ parameters: {
725
+ notebook_id: { type: 'string', description: 'Notebook ID to view', required: true },
726
+ format: { type: 'string', description: 'View format: full, summary, or timeline', default: 'full' },
727
+ },
728
+ tier: 'free',
729
+ async execute(args) {
730
+ const notebookId = String(args.notebook_id).trim();
731
+ const nb = loadNotebook(notebookId);
732
+ if (!nb)
733
+ return `Error: Notebook \`${notebookId}\` not found`;
734
+ const format = String(args.format || 'full').toLowerCase();
735
+ const header = [
736
+ `# ${nb.title}`,
737
+ '',
738
+ nb.description,
739
+ '',
740
+ `**Field**: ${nb.field || 'General'} | **Tags**: ${nb.tags.join(', ') || 'none'}`,
741
+ `**Created**: ${nb.created} | **Steps**: ${nb.steps.length}`,
742
+ '',
743
+ '---',
744
+ '',
745
+ ].join('\n');
746
+ if (nb.steps.length === 0) {
747
+ return header + '\n*No steps recorded yet. Use `notebook_log` to add research steps.*';
748
+ }
749
+ if (format === 'summary') {
750
+ // Show only observations, results, conclusions
751
+ const keyTypes = ['observation', 'result', 'conclusion'];
752
+ const keySteps = nb.steps.filter(s => keyTypes.includes(s.type));
753
+ if (keySteps.length === 0) {
754
+ return header + '*No key findings yet (observations, results, or conclusions).*';
755
+ }
756
+ const lines = keySteps.map(step => {
757
+ const label = step.type.charAt(0).toUpperCase() + step.type.slice(1);
758
+ return `### ${label}\n${step.content}${step.data ? `\n\`\`\`\n${step.data}\n\`\`\`` : ''}\n`;
759
+ });
760
+ return header + lines.join('\n');
761
+ }
762
+ if (format === 'timeline') {
763
+ const lines = nb.steps.map((step, i) => {
764
+ const ts = new Date(step.timestamp).toLocaleString();
765
+ const label = step.type.toUpperCase().padEnd(11);
766
+ const tool = step.toolUsed ? ` [${step.toolUsed}]` : '';
767
+ return `${String(i + 1).padStart(3)}. ${ts} ${label} ${step.content.slice(0, 80)}${step.content.length > 80 ? '...' : ''}${tool}`;
768
+ });
769
+ return header + lines.join('\n');
770
+ }
771
+ // Full view
772
+ const stepsMarkdown = nb.steps.map(step => stepToMarkdown(step)).join('\n');
773
+ return header + stepsMarkdown;
774
+ },
775
+ });
776
+ // ── 5. notebook_export ─────────────────────────────────────────────────
777
+ registerTool({
778
+ name: 'notebook_export',
779
+ description: 'Export a research notebook as a reproducible document. Formats: jupyter (.ipynb with Python code cells), rmarkdown (.Rmd with R code chunks), markdown, html (self-contained), latex (journal-ready).',
780
+ parameters: {
781
+ notebook_id: { type: 'string', description: 'Notebook ID to export', required: true },
782
+ format: { type: 'string', description: 'Export format: jupyter, rmarkdown, markdown, html, or latex', required: true },
783
+ },
784
+ tier: 'free',
785
+ async execute(args) {
786
+ const notebookId = String(args.notebook_id).trim();
787
+ const nb = loadNotebook(notebookId);
788
+ if (!nb)
789
+ return `Error: Notebook \`${notebookId}\` not found`;
790
+ const format = String(args.format).toLowerCase().trim();
791
+ const validFormats = ['jupyter', 'rmarkdown', 'markdown', 'html', 'latex'];
792
+ if (!validFormats.includes(format)) {
793
+ return `Error: Invalid format "${format}". Valid formats: ${validFormats.join(', ')}`;
794
+ }
795
+ if (nb.steps.length === 0) {
796
+ return `Error: Notebook \`${notebookId}\` has no steps to export.`;
797
+ }
798
+ let content;
799
+ let ext;
800
+ switch (format) {
801
+ case 'jupyter':
802
+ content = exportJupyter(nb);
803
+ ext = 'ipynb';
804
+ break;
805
+ case 'rmarkdown':
806
+ content = exportRMarkdown(nb);
807
+ ext = 'Rmd';
808
+ break;
809
+ case 'markdown':
810
+ content = exportMarkdown(nb);
811
+ ext = 'md';
812
+ break;
813
+ case 'html':
814
+ content = exportHtml(nb);
815
+ ext = 'html';
816
+ break;
817
+ case 'latex':
818
+ content = exportLatex(nb);
819
+ ext = 'tex';
820
+ break;
821
+ default:
822
+ return `Error: Unknown format "${format}"`;
823
+ }
824
+ // Write the exported file next to the notebook
825
+ const sanitizedTitle = nb.title.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 50);
826
+ const exportPath = join(NOTEBOOKS_DIR, `${sanitizedTitle}_${nb.id}.${ext}`);
827
+ writeFileSync(exportPath, content);
828
+ return [
829
+ `## Notebook Exported`,
830
+ '',
831
+ `**Format**: ${format}`,
832
+ `**File**: \`${exportPath}\``,
833
+ `**Steps**: ${nb.steps.length}`,
834
+ `**Size**: ${(content.length / 1024).toFixed(1)} KB`,
835
+ '',
836
+ format === 'jupyter' ? 'Open with `jupyter notebook` or JupyterLab to run the Python cells.' : '',
837
+ format === 'rmarkdown' ? 'Open with RStudio or knit with `rmarkdown::render()`.' : '',
838
+ format === 'html' ? 'Open in any web browser. Self-contained, no external dependencies.' : '',
839
+ format === 'latex' ? 'Compile with `pdflatex` or `xelatex`. Suitable for journal submission.' : '',
840
+ format === 'markdown' ? 'Compatible with any Markdown viewer or converter.' : '',
841
+ ].filter(Boolean).join('\n');
842
+ },
843
+ });
844
+ // ── 6. notebook_search ─────────────────────────────────────────────────
845
+ registerTool({
846
+ name: 'notebook_search',
847
+ description: 'Search across all research notebooks for specific content, tools, or findings.',
848
+ parameters: {
849
+ query: { type: 'string', description: 'Search query', required: true },
850
+ search_in: { type: 'string', description: 'Where to search: all, titles, content, tools, or tags', default: 'all' },
851
+ },
852
+ tier: 'free',
853
+ async execute(args) {
854
+ const query = String(args.query).toLowerCase().trim();
855
+ if (!query)
856
+ return 'Error: query is required';
857
+ const searchIn = String(args.search_in || 'all').toLowerCase();
858
+ const validTargets = ['all', 'titles', 'content', 'tools', 'tags'];
859
+ if (!validTargets.includes(searchIn)) {
860
+ return `Error: Invalid search_in "${searchIn}". Valid: ${validTargets.join(', ')}`;
861
+ }
862
+ const notebooks = loadAllNotebooks();
863
+ if (notebooks.length === 0)
864
+ return 'No notebooks to search.';
865
+ const hits = [];
866
+ for (const nb of notebooks) {
867
+ // Search titles
868
+ if ((searchIn === 'all' || searchIn === 'titles') && nb.title.toLowerCase().includes(query)) {
869
+ hits.push({
870
+ notebookId: nb.id,
871
+ notebookTitle: nb.title,
872
+ location: 'title',
873
+ match: nb.title,
874
+ });
875
+ }
876
+ // Search description
877
+ if ((searchIn === 'all' || searchIn === 'content') && nb.description.toLowerCase().includes(query)) {
878
+ hits.push({
879
+ notebookId: nb.id,
880
+ notebookTitle: nb.title,
881
+ location: 'description',
882
+ match: nb.description.slice(0, 150),
883
+ });
884
+ }
885
+ // Search tags
886
+ if ((searchIn === 'all' || searchIn === 'tags')) {
887
+ for (const tag of nb.tags) {
888
+ if (tag.toLowerCase().includes(query)) {
889
+ hits.push({
890
+ notebookId: nb.id,
891
+ notebookTitle: nb.title,
892
+ location: 'tag',
893
+ match: tag,
894
+ });
895
+ break;
896
+ }
897
+ }
898
+ }
899
+ // Search steps
900
+ for (const step of nb.steps) {
901
+ // Content search
902
+ if ((searchIn === 'all' || searchIn === 'content') && step.content.toLowerCase().includes(query)) {
903
+ hits.push({
904
+ notebookId: nb.id,
905
+ notebookTitle: nb.title,
906
+ location: `step ${step.id} (${step.type})`,
907
+ match: step.content.slice(0, 150),
908
+ });
909
+ }
910
+ // Data search
911
+ if ((searchIn === 'all' || searchIn === 'content') && step.data?.toLowerCase().includes(query)) {
912
+ hits.push({
913
+ notebookId: nb.id,
914
+ notebookTitle: nb.title,
915
+ location: `step ${step.id} data`,
916
+ match: (step.data || '').slice(0, 150),
917
+ });
918
+ }
919
+ // Tool search
920
+ if ((searchIn === 'all' || searchIn === 'tools') && step.toolUsed?.toLowerCase().includes(query)) {
921
+ hits.push({
922
+ notebookId: nb.id,
923
+ notebookTitle: nb.title,
924
+ location: `step ${step.id}`,
925
+ match: `Tool: ${step.toolUsed}`,
926
+ });
927
+ }
928
+ }
929
+ }
930
+ if (hits.length === 0) {
931
+ return `No results for "${query}" in ${searchIn === 'all' ? 'any notebooks' : searchIn}.`;
932
+ }
933
+ // Group by notebook
934
+ const grouped = new Map();
935
+ for (const hit of hits) {
936
+ const key = hit.notebookId;
937
+ if (!grouped.has(key))
938
+ grouped.set(key, []);
939
+ grouped.get(key).push(hit);
940
+ }
941
+ const lines = [
942
+ `## Search Results for "${query}"`,
943
+ '',
944
+ `**Found**: ${hits.length} match${hits.length === 1 ? '' : 'es'} across ${grouped.size} notebook${grouped.size === 1 ? '' : 's'}`,
945
+ '',
946
+ ];
947
+ for (const [nbId, nbHits] of grouped) {
948
+ const title = nbHits[0].notebookTitle;
949
+ lines.push(`### ${title} (\`${nbId}\`)`);
950
+ for (const hit of nbHits.slice(0, 10)) {
951
+ lines.push(`- **${hit.location}**: ${hit.match}${hit.match.length >= 150 ? '...' : ''}`);
952
+ }
953
+ if (nbHits.length > 10) {
954
+ lines.push(`- *+${nbHits.length - 10} more matches*`);
955
+ }
956
+ lines.push('');
957
+ }
958
+ return lines.join('\n');
959
+ },
960
+ });
961
+ // ── 7. notebook_cite ───────────────────────────────────────────────────
962
+ registerTool({
963
+ name: 'notebook_cite',
964
+ description: 'Generate citations for papers and data sources referenced in a research notebook. Scans for DOIs, PMIDs, and URLs, then formats them in the requested citation style.',
965
+ parameters: {
966
+ notebook_id: { type: 'string', description: 'Notebook ID to generate citations for', required: true },
967
+ style: { type: 'string', description: 'Citation style: apa, mla, chicago, bibtex, or vancouver', required: true },
968
+ },
969
+ tier: 'free',
970
+ async execute(args) {
971
+ const notebookId = String(args.notebook_id).trim();
972
+ const nb = loadNotebook(notebookId);
973
+ if (!nb)
974
+ return `Error: Notebook \`${notebookId}\` not found`;
975
+ const style = String(args.style).toLowerCase().trim();
976
+ const validStyles = ['apa', 'mla', 'chicago', 'bibtex', 'vancouver'];
977
+ if (!validStyles.includes(style)) {
978
+ return `Error: Invalid style "${style}". Valid styles: ${validStyles.join(', ')}`;
979
+ }
980
+ const refs = extractReferences(nb);
981
+ if (refs.length === 0) {
982
+ return [
983
+ `## Citations — ${nb.title}`,
984
+ '',
985
+ 'No DOIs, PMIDs, or URLs found in notebook steps.',
986
+ '',
987
+ 'To generate citations, include references in your step content or data:',
988
+ '- DOIs: `10.1234/example.2024`',
989
+ '- PubMed IDs: `PMID: 12345678`',
990
+ '- URLs: `https://example.com/paper`',
991
+ ].join('\n');
992
+ }
993
+ const lines = [
994
+ `## References — ${nb.title}`,
995
+ `**Style**: ${style.toUpperCase()} | **Sources found**: ${refs.length}`,
996
+ '',
997
+ ];
998
+ if (style === 'bibtex') {
999
+ lines.push('```bibtex');
1000
+ for (let i = 0; i < refs.length; i++) {
1001
+ lines.push(formatCitation(refs[i], style, i));
1002
+ lines.push('');
1003
+ }
1004
+ lines.push('```');
1005
+ }
1006
+ else {
1007
+ for (let i = 0; i < refs.length; i++) {
1008
+ lines.push(formatCitation(refs[i], style, i));
1009
+ }
1010
+ }
1011
+ // Breakdown by type
1012
+ const dois = refs.filter(r => r.type === 'doi').length;
1013
+ const pmids = refs.filter(r => r.type === 'pmid').length;
1014
+ const urls = refs.filter(r => r.type === 'url').length;
1015
+ lines.push('');
1016
+ lines.push('---');
1017
+ lines.push(`**DOIs**: ${dois} | **PMIDs**: ${pmids} | **URLs**: ${urls}`);
1018
+ return lines.join('\n');
1019
+ },
1020
+ });
1021
+ // ── 8. notebook_provenance ─────────────────────────────────────────────
1022
+ registerTool({
1023
+ name: 'notebook_provenance',
1024
+ description: 'Generate a complete data provenance report for a research notebook. Traces every data input, transformation, and output. Builds a DAG of data flow and flags unlogged modifications.',
1025
+ parameters: {
1026
+ notebook_id: { type: 'string', description: 'Notebook ID to trace provenance for', required: true },
1027
+ },
1028
+ tier: 'free',
1029
+ async execute(args) {
1030
+ const notebookId = String(args.notebook_id).trim();
1031
+ const nb = loadNotebook(notebookId);
1032
+ if (!nb)
1033
+ return `Error: Notebook \`${notebookId}\` not found`;
1034
+ if (nb.steps.length === 0) {
1035
+ return `Notebook \`${notebookId}\` has no steps. Nothing to trace.`;
1036
+ }
1037
+ const dag = buildProvenanceDAG(nb);
1038
+ const lines = [
1039
+ `# Data Provenance Report`,
1040
+ `## ${nb.title}`,
1041
+ '',
1042
+ `**Notebook ID**: \`${nb.id}\``,
1043
+ `**Steps**: ${nb.steps.length}`,
1044
+ `**Period**: ${nb.created} to ${nb.modified}`,
1045
+ '',
1046
+ '---',
1047
+ '',
1048
+ ];
1049
+ // ── Tool Usage Summary ──
1050
+ const toolCounts = new Map();
1051
+ for (const step of nb.steps) {
1052
+ if (step.toolUsed) {
1053
+ toolCounts.set(step.toolUsed, (toolCounts.get(step.toolUsed) || 0) + 1);
1054
+ }
1055
+ }
1056
+ lines.push('## Tool Usage');
1057
+ if (toolCounts.size === 0) {
1058
+ lines.push('No tools recorded in any steps.');
1059
+ }
1060
+ else {
1061
+ lines.push('| Tool | Uses | Steps |');
1062
+ lines.push('|------|------|-------|');
1063
+ const sortedTools = Array.from(toolCounts.entries()).sort((a, b) => b[1] - a[1]);
1064
+ for (const [tool, count] of sortedTools) {
1065
+ const stepIds = nb.steps.filter(s => s.toolUsed === tool).map(s => s.id).join(', ');
1066
+ lines.push(`| \`${tool}\` | ${count} | ${stepIds} |`);
1067
+ }
1068
+ }
1069
+ lines.push('');
1070
+ // ── Execution Order ──
1071
+ lines.push('## Execution Order');
1072
+ for (let i = 0; i < nb.steps.length; i++) {
1073
+ const step = nb.steps[i];
1074
+ const ts = new Date(step.timestamp).toLocaleString();
1075
+ const tool = step.toolUsed ? ` via \`${step.toolUsed}\`` : '';
1076
+ const data = step.data ? ' [has data]' : '';
1077
+ const duration = step.duration_ms != null ? ` (${step.duration_ms}ms)` : '';
1078
+ lines.push(`${i + 1}. **${step.type}** \`${step.id}\` — ${ts}${tool}${data}${duration}`);
1079
+ }
1080
+ lines.push('');
1081
+ // ── Data Flow DAG ──
1082
+ lines.push('## Data Flow (DAG)');
1083
+ lines.push('```');
1084
+ for (const node of dag) {
1085
+ const step = nb.steps.find(s => s.id === node.stepId);
1086
+ const label = `[${node.stepType}] ${step.content.slice(0, 40)}${step.content.length > 40 ? '...' : ''}`;
1087
+ if (node.inputsFrom.length === 0 && node.outputsTo.length === 0) {
1088
+ lines.push(` (${node.stepId}) ${label} [isolated]`);
1089
+ }
1090
+ else {
1091
+ if (node.inputsFrom.length > 0) {
1092
+ for (const from of node.inputsFrom) {
1093
+ lines.push(` (${from}) --> (${node.stepId}) ${label}`);
1094
+ }
1095
+ }
1096
+ if (node.outputsTo.length > 0 && node.inputsFrom.length === 0) {
1097
+ lines.push(` (${node.stepId}) ${label} --> [${node.outputsTo.join(', ')}]`);
1098
+ }
1099
+ }
1100
+ }
1101
+ lines.push('```');
1102
+ lines.push('');
1103
+ // ── Data Lineage ──
1104
+ lines.push('## Data Lineage');
1105
+ const dataSteps = nb.steps.filter(s => s.data);
1106
+ if (dataSteps.length === 0) {
1107
+ lines.push('No data recorded in any steps.');
1108
+ }
1109
+ else {
1110
+ for (const step of dataSteps) {
1111
+ const node = dag.find(n => n.stepId === step.id);
1112
+ lines.push(`### Step \`${step.id}\` — ${step.type}`);
1113
+ lines.push(`- **Data size**: ${step.data.length} chars`);
1114
+ if (step.toolUsed)
1115
+ lines.push(`- **Produced by**: \`${step.toolUsed}\``);
1116
+ if (node.inputsFrom.length > 0)
1117
+ lines.push(`- **Inputs from**: ${node.inputsFrom.map(id => `\`${id}\``).join(', ')}`);
1118
+ if (node.outputsTo.length > 0)
1119
+ lines.push(`- **Consumed by**: ${node.outputsTo.map(id => `\`${id}\``).join(', ')}`);
1120
+ lines.push('');
1121
+ }
1122
+ }
1123
+ // ── Integrity Flags ──
1124
+ lines.push('## Integrity Flags');
1125
+ const flags = [];
1126
+ // Flag steps with data but no tool recorded
1127
+ const untrackedData = nb.steps.filter(s => s.data && !s.toolUsed);
1128
+ if (untrackedData.length > 0) {
1129
+ flags.push(`- **Untracked data source**: ${untrackedData.length} step(s) have data but no tool recorded: ${untrackedData.map(s => `\`${s.id}\``).join(', ')}`);
1130
+ }
1131
+ // Flag experiments/analyses without input links
1132
+ const orphanedAnalyses = dag.filter(n => ['experiment', 'analysis'].includes(n.stepType) && n.inputsFrom.length === 0);
1133
+ if (orphanedAnalyses.length > 0) {
1134
+ flags.push(`- **Orphaned analyses**: ${orphanedAnalyses.length} experiment/analysis step(s) have no traced inputs: ${orphanedAnalyses.map(n => `\`${n.stepId}\``).join(', ')}`);
1135
+ }
1136
+ // Flag conclusions without supporting results
1137
+ const unsupportedConclusions = dag.filter(n => n.stepType === 'conclusion' && n.inputsFrom.length === 0);
1138
+ if (unsupportedConclusions.length > 0) {
1139
+ flags.push(`- **Unsupported conclusions**: ${unsupportedConclusions.length} conclusion(s) have no linked results: ${unsupportedConclusions.map(n => `\`${n.stepId}\``).join(', ')}`);
1140
+ }
1141
+ // Flag large time gaps between consecutive steps
1142
+ for (let i = 1; i < nb.steps.length; i++) {
1143
+ const prev = new Date(nb.steps[i - 1].timestamp).getTime();
1144
+ const curr = new Date(nb.steps[i].timestamp).getTime();
1145
+ const gapHours = (curr - prev) / (1000 * 60 * 60);
1146
+ if (gapHours > 24) {
1147
+ flags.push(`- **Time gap**: ${gapHours.toFixed(1)} hours between steps \`${nb.steps[i - 1].id}\` and \`${nb.steps[i].id}\` — potential unlogged work`);
1148
+ }
1149
+ }
1150
+ if (flags.length === 0) {
1151
+ lines.push('No integrity issues detected. All steps have proper provenance tracking.');
1152
+ }
1153
+ else {
1154
+ for (const flag of flags) {
1155
+ lines.push(flag);
1156
+ }
1157
+ }
1158
+ lines.push('');
1159
+ lines.push('---');
1160
+ lines.push(`*Provenance report generated ${new Date().toISOString()}*`);
1161
+ return lines.join('\n');
1162
+ },
1163
+ });
1164
+ }
1165
+ //# sourceMappingURL=research-notebook.js.map