@barivia/barsom-mcp 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -6
- package/dist/index.d.ts +1 -1
- package/dist/index.js +418 -246
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* BARIVIA_API_URL as environment variables.
|
|
8
8
|
*
|
|
9
9
|
* Usage (in MCP client config, e.g. Cursor / Claude Desktop):
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
* {
|
|
12
12
|
* "mcpServers": {
|
|
13
13
|
* "analytics-engine": {
|
|
@@ -259,68 +259,146 @@ registerAppTool(server, "explore_som", {
|
|
|
259
259
|
download_urls: data.download_urls,
|
|
260
260
|
}),
|
|
261
261
|
});
|
|
262
|
-
const imgExt = summary.output_format ?? "
|
|
262
|
+
const imgExt = summary.output_format ?? "pdf";
|
|
263
263
|
await tryAttachImage(content, job_id, `combined.${imgExt}`);
|
|
264
264
|
return { content };
|
|
265
265
|
});
|
|
266
|
-
// ----
|
|
267
|
-
server.tool("
|
|
268
|
-
|
|
269
|
-
PREFER file_path over csv_data: when the user points to a local file, use file_path.
|
|
270
|
-
The MCP reads the file directly — no need to pass large CSV strings through the LLM.
|
|
271
|
-
|
|
272
|
-
BEST FOR: Tabular data with numeric columns (sensor readings, financial data, process
|
|
273
|
-
measurements, survey results). CSV with header row required.
|
|
274
|
-
NOT FOR: Images, text documents, or pre-trained embeddings.
|
|
275
|
-
|
|
276
|
-
TIMING: Upload is near-instant for datasets under 100MB.
|
|
266
|
+
// ---- datasets ----
|
|
267
|
+
server.tool("datasets", `Manage datasets: upload, preview, subset, or delete.
|
|
277
268
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
4. "Should any features be weighted more heavily?"
|
|
283
|
-
5. "Do any columns have very skewed distributions?" (suggest transforms)
|
|
269
|
+
action=upload: Upload a CSV for SOM analysis. Prefer file_path over csv_data so the MCP reads the file directly. Returns dataset ID and metadata. Then use datasets(action=preview) before train_som.
|
|
270
|
+
action=preview: Show columns, stats, sample rows, cyclic/datetime detections. ALWAYS preview before train_som on an unfamiliar dataset.
|
|
271
|
+
action=subset: Create a new dataset from a subset of an existing one (by row range and/or column filter). Use to train on a slice (e.g. first 2000 rows, or region=Europe) without re-uploading. Requires name and at least one of row_range or filter. row_range: [start, end] 1-based inclusive. filter: { column, op, value } with op in eq, in, gt, lt, gte, lte.
|
|
272
|
+
action=delete: Remove a dataset and free the slot.
|
|
284
273
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
274
|
+
BEST FOR: Tabular numeric data. CSV with header required. Use list(type=datasets) to see existing datasets. To train on a subset, use datasets(action=subset) then train_som on the new dataset_id, or pass row_range in train_som params.`, {
|
|
275
|
+
action: z
|
|
276
|
+
.enum(["upload", "preview", "subset", "delete"])
|
|
277
|
+
.describe("upload: add a CSV; preview: inspect before training; subset: create subset dataset; delete: remove dataset"),
|
|
278
|
+
name: z.string().optional().describe("Dataset name (required for action=upload and subset)"),
|
|
279
|
+
file_path: z.string().optional().describe("Path to local CSV (for upload; prefer over csv_data)"),
|
|
280
|
+
csv_data: z.string().optional().describe("Inline CSV string (for upload; use for small data)"),
|
|
281
|
+
dataset_id: z.string().optional().describe("Dataset ID (required for preview, subset, and delete)"),
|
|
282
|
+
n_rows: z.number().int().optional().default(5).describe("Sample rows to return (preview only)"),
|
|
283
|
+
row_range: z
|
|
284
|
+
.tuple([z.number().int(), z.number().int()])
|
|
295
285
|
.optional()
|
|
296
|
-
.describe("
|
|
297
|
-
|
|
298
|
-
.
|
|
286
|
+
.describe("For subset: [start, end] 1-based inclusive row range (e.g. [1, 2000])"),
|
|
287
|
+
filter: z
|
|
288
|
+
.object({
|
|
289
|
+
column: z.string(),
|
|
290
|
+
op: z.enum(["eq", "in", "gt", "lt", "gte", "lte"]),
|
|
291
|
+
value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
|
|
292
|
+
})
|
|
299
293
|
.optional()
|
|
300
|
-
.describe("
|
|
301
|
-
}, async ({ name, file_path, csv_data }) => {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
294
|
+
.describe("For subset: filter rows by column value (e.g. { column: 'region', op: 'eq', value: 'Europe' })"),
|
|
295
|
+
}, async ({ action, name, file_path, csv_data, dataset_id, n_rows, row_range, filter }) => {
|
|
296
|
+
if (action === "upload") {
|
|
297
|
+
if (!name)
|
|
298
|
+
throw new Error("datasets(upload) requires name");
|
|
299
|
+
let body;
|
|
300
|
+
if (file_path) {
|
|
301
|
+
const resolved = path.resolve(file_path);
|
|
302
|
+
try {
|
|
303
|
+
body = await fs.readFile(resolved, "utf-8");
|
|
304
|
+
}
|
|
305
|
+
catch (err) {
|
|
306
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
307
|
+
throw new Error(`Cannot read file "${resolved}": ${msg}`);
|
|
308
|
+
}
|
|
307
309
|
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
throw new Error(`Cannot read file "${resolved}": ${msg}`);
|
|
310
|
+
else if (csv_data && csv_data.length > 0) {
|
|
311
|
+
body = csv_data;
|
|
311
312
|
}
|
|
313
|
+
else {
|
|
314
|
+
throw new Error("datasets(upload) requires file_path or csv_data");
|
|
315
|
+
}
|
|
316
|
+
const data = await apiCall("POST", "/v1/datasets", body, {
|
|
317
|
+
"X-Dataset-Name": name,
|
|
318
|
+
"Content-Type": "text/csv",
|
|
319
|
+
});
|
|
320
|
+
return textResult(data);
|
|
312
321
|
}
|
|
313
|
-
|
|
314
|
-
|
|
322
|
+
if (action === "preview") {
|
|
323
|
+
if (!dataset_id)
|
|
324
|
+
throw new Error("datasets(preview) requires dataset_id");
|
|
325
|
+
const data = (await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`));
|
|
326
|
+
const cols = data.columns ?? [];
|
|
327
|
+
const stats = data.column_stats ?? [];
|
|
328
|
+
const hints = data.cyclic_hints ?? [];
|
|
329
|
+
const samples = data.sample_rows ?? [];
|
|
330
|
+
const dtCols = data.datetime_columns ?? [];
|
|
331
|
+
const temporalSugg = data.temporal_suggestions ?? [];
|
|
332
|
+
const fmt = (v) => v === null || v === undefined ? "—" : Number(v).toFixed(3);
|
|
333
|
+
const lines = [
|
|
334
|
+
`Dataset: ${data.name} (${data.dataset_id})`,
|
|
335
|
+
`${data.total_rows} rows × ${data.total_cols} columns`,
|
|
336
|
+
``,
|
|
337
|
+
`Column Statistics:`,
|
|
338
|
+
`| Column | Min | Max | Mean | Std | Nulls | Numeric |`,
|
|
339
|
+
`|--------|-----|-----|------|-----|-------|---------|`,
|
|
340
|
+
];
|
|
341
|
+
for (const s of stats) {
|
|
342
|
+
lines.push(`| ${s.column} | ${fmt(s.min)} | ${fmt(s.max)} | ${fmt(s.mean)} | ${fmt(s.std)} | ${s.null_count ?? 0} | ${s.is_numeric !== false ? "yes" : "no"} |`);
|
|
343
|
+
}
|
|
344
|
+
if (hints.length > 0) {
|
|
345
|
+
lines.push(``, `Detected Cyclic Feature Hints:`);
|
|
346
|
+
for (const h of hints) {
|
|
347
|
+
lines.push(` • ${h.column} — period=${h.period} (${h.reason})`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
if (dtCols.length > 0) {
|
|
351
|
+
lines.push(``, `Detected Datetime Columns:`);
|
|
352
|
+
for (const dc of dtCols) {
|
|
353
|
+
const formats = dc.detected_formats ?? [];
|
|
354
|
+
const fmtStrs = formats
|
|
355
|
+
.map((f) => `${f.format} — ${f.description} (${(f.match_rate * 100).toFixed(0)}% match)`)
|
|
356
|
+
.join("; ");
|
|
357
|
+
lines.push(` • ${dc.column}: sample="${dc.sample}" → ${fmtStrs}`);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
if (temporalSugg.length > 0) {
|
|
361
|
+
lines.push(``, `Temporal Feature Suggestions (require user approval):`);
|
|
362
|
+
for (const ts of temporalSugg) {
|
|
363
|
+
lines.push(` • Columns: ${ts.columns.join(" + ")} → format: "${ts.format}"`);
|
|
364
|
+
lines.push(` Available components: ${ts.available_components.join(", ")}`);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
if (samples.length > 0) {
|
|
368
|
+
lines.push(``, `Sample Rows (first ${samples.length}):`);
|
|
369
|
+
lines.push(`| ${cols.join(" | ")} |`);
|
|
370
|
+
lines.push(`| ${cols.map(() => "---").join(" | ")} |`);
|
|
371
|
+
for (const row of samples) {
|
|
372
|
+
lines.push(`| ${cols.map((c) => String(row[c] ?? "")).join(" | ")} |`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
376
|
+
}
|
|
377
|
+
if (action === "subset") {
|
|
378
|
+
if (!dataset_id)
|
|
379
|
+
throw new Error("datasets(subset) requires dataset_id");
|
|
380
|
+
if (!name)
|
|
381
|
+
throw new Error("datasets(subset) requires name");
|
|
382
|
+
if (row_range === undefined && filter === undefined) {
|
|
383
|
+
throw new Error("datasets(subset) requires at least one of row_range or filter");
|
|
384
|
+
}
|
|
385
|
+
const body = { name };
|
|
386
|
+
if (row_range !== undefined)
|
|
387
|
+
body.row_range = row_range;
|
|
388
|
+
if (filter !== undefined)
|
|
389
|
+
body.filter = filter;
|
|
390
|
+
const data = await apiCall("POST", `/v1/datasets/${dataset_id}/subset`, JSON.stringify(body), {
|
|
391
|
+
"Content-Type": "application/json",
|
|
392
|
+
});
|
|
393
|
+
return textResult(data);
|
|
315
394
|
}
|
|
316
|
-
|
|
317
|
-
|
|
395
|
+
if (action === "delete") {
|
|
396
|
+
if (!dataset_id)
|
|
397
|
+
throw new Error("datasets(delete) requires dataset_id");
|
|
398
|
+
const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
|
|
399
|
+
return textResult(data);
|
|
318
400
|
}
|
|
319
|
-
|
|
320
|
-
"X-Dataset-Name": name,
|
|
321
|
-
"Content-Type": "text/csv",
|
|
322
|
-
});
|
|
323
|
-
return textResult(data);
|
|
401
|
+
throw new Error("Invalid action");
|
|
324
402
|
});
|
|
325
403
|
// ---- train_som ----
|
|
326
404
|
server.tool("train_som", `Train a Self-Organizing Map on the dataset. Returns a job_id for polling.
|
|
@@ -343,11 +421,11 @@ BEFORE calling, ask the user:
|
|
|
343
421
|
5. Quick exploration or refined map?
|
|
344
422
|
|
|
345
423
|
PRESET TABLE:
|
|
346
|
-
| preset | grid | epochs
|
|
347
|
-
| quick | 15x15 | [
|
|
348
|
-
| standard | 25x25 | [
|
|
349
|
-
| refined | 40x40 | [
|
|
350
|
-
| high_res | 60x60 | [
|
|
424
|
+
| preset | grid | epochs | batch_size |
|
|
425
|
+
| quick | 15x15 | [15, 5] | 48 |
|
|
426
|
+
| standard | 25x25 | [30, 15] | 48 |
|
|
427
|
+
| refined | 40x40 | [50, 25] | 32 |
|
|
428
|
+
| high_res | 60x60 | [60, 40] | 32 |
|
|
351
429
|
|
|
352
430
|
TRAINING PHASES:
|
|
353
431
|
- Ordering: large neighborhoods → global structure. sigma_f controls end-radius (default 1.0).
|
|
@@ -356,7 +434,7 @@ TRAINING PHASES:
|
|
|
356
434
|
|
|
357
435
|
TRANSFORMS: Per-column preprocessing before normalization.
|
|
358
436
|
transforms: {revenue: "log", volume: "log1p", pressure: "sqrt"}
|
|
359
|
-
Suggest when
|
|
437
|
+
Suggest when datasets(action=preview) shows large value ranges or right-skewed distributions.
|
|
360
438
|
|
|
361
439
|
TEMPORAL FEATURES: NEVER auto-apply. Always ask which components to extract.
|
|
362
440
|
temporal_features: [{columns: ['Date'], format: 'dd.mm.yyyy', extract: ['day_of_year'], cyclic: true}]
|
|
@@ -375,20 +453,20 @@ COMMON MISTAKES:
|
|
|
375
453
|
QUALITY TARGETS: QE < 1.5 good, TE < 0.05 good, explained variance > 0.8 good.
|
|
376
454
|
If QE > 2 → more epochs or larger grid. If TE > 0.15 → larger grid or periodic=true.
|
|
377
455
|
|
|
378
|
-
OUTPUT: format (png/pdf/svg),
|
|
456
|
+
OUTPUT: default format pdf, default colormap coolwarm. Override with output_format (png/pdf/svg), output_dpi (standard/retina/print), colormap (e.g. viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral).
|
|
379
457
|
|
|
380
458
|
After training, use get_results → analyze(clusters) → component_planes → feature_correlation.
|
|
381
459
|
See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`, {
|
|
382
|
-
dataset_id: z.string().describe("Dataset ID from
|
|
460
|
+
dataset_id: z.string().describe("Dataset ID from datasets(action=upload) or list(type=datasets)"),
|
|
383
461
|
preset: z
|
|
384
462
|
.enum(["quick", "standard", "refined", "high_res"])
|
|
385
463
|
.optional()
|
|
386
464
|
.describe("Training preset — sets sensible defaults for grid, epochs, and batch_size. " +
|
|
387
465
|
"Explicit params override preset values. " +
|
|
388
|
-
"quick: 15×15, [
|
|
389
|
-
"standard: 25×25, [
|
|
390
|
-
"refined: 40×40, [
|
|
391
|
-
"high_res: 60×60, [
|
|
466
|
+
"quick: 15×15, [15,5], batch=48. " +
|
|
467
|
+
"standard: 25×25, [30,15], batch=48, best with GPU. " +
|
|
468
|
+
"refined: 40×40, [50,25], batch=32, best with GPU. " +
|
|
469
|
+
"high_res: 60×60, [60,40], batch=32, best with GPU."),
|
|
392
470
|
grid_x: z
|
|
393
471
|
.number()
|
|
394
472
|
.int()
|
|
@@ -519,8 +597,8 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
|
|
|
519
597
|
output_format: z
|
|
520
598
|
.enum(["png", "pdf", "svg"])
|
|
521
599
|
.optional()
|
|
522
|
-
.default("
|
|
523
|
-
.describe("Image output format.
|
|
600
|
+
.default("pdf")
|
|
601
|
+
.describe("Image output format. PDF (default) for publication-quality vector graphics, PNG for quick viewing, SVG for web embedding."),
|
|
524
602
|
output_dpi: z
|
|
525
603
|
.enum(["standard", "retina", "print"])
|
|
526
604
|
.optional()
|
|
@@ -529,13 +607,17 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
|
|
|
529
607
|
colormap: z
|
|
530
608
|
.string()
|
|
531
609
|
.optional()
|
|
532
|
-
.describe("Override default colormap for component planes
|
|
533
|
-
|
|
610
|
+
.describe("Override default colormap (coolwarm) for component planes and hit histogram. Examples: viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix always uses grays, cyclic features use twilight."),
|
|
611
|
+
row_range: z
|
|
612
|
+
.tuple([z.number().int().min(1), z.number().int().min(1)])
|
|
613
|
+
.optional()
|
|
614
|
+
.describe("Train on a subset of rows only: [start, end] 1-based inclusive. Alternative to creating a subset dataset with datasets(action=subset)."),
|
|
615
|
+
}, async ({ dataset_id, preset, grid_x, grid_y, epochs, model, periodic, columns, transforms, cyclic_features, temporal_features, feature_weights, normalize, sigma_f, learning_rate, batch_size, backend, output_format, output_dpi, colormap, row_range, }) => {
|
|
534
616
|
const PRESETS = {
|
|
535
|
-
quick: { grid: [15, 15], epochs: [
|
|
536
|
-
standard: { grid: [25, 25], epochs: [
|
|
537
|
-
refined: { grid: [40, 40], epochs: [
|
|
538
|
-
high_res: { grid: [60, 60], epochs: [
|
|
617
|
+
quick: { grid: [15, 15], epochs: [15, 5], batch_size: 48 },
|
|
618
|
+
standard: { grid: [25, 25], epochs: [30, 15], batch_size: 48, backend: "cuda" },
|
|
619
|
+
refined: { grid: [40, 40], epochs: [50, 25], batch_size: 32, backend: "cuda" },
|
|
620
|
+
high_res: { grid: [60, 60], epochs: [60, 40], batch_size: 32, backend: "cuda" },
|
|
539
621
|
};
|
|
540
622
|
const p = preset ? PRESETS[preset] : undefined;
|
|
541
623
|
const params = {
|
|
@@ -588,9 +670,7 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
|
|
|
588
670
|
else if (p?.backend) {
|
|
589
671
|
params.backend = p.backend;
|
|
590
672
|
}
|
|
591
|
-
|
|
592
|
-
params.output_format = output_format;
|
|
593
|
-
}
|
|
673
|
+
params.output_format = output_format ?? "pdf";
|
|
594
674
|
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
595
675
|
if (output_dpi && output_dpi !== "retina") {
|
|
596
676
|
params.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
@@ -598,6 +678,9 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
|
|
|
598
678
|
if (colormap) {
|
|
599
679
|
params.colormap = colormap;
|
|
600
680
|
}
|
|
681
|
+
if (row_range && row_range.length >= 2 && row_range[0] <= row_range[1]) {
|
|
682
|
+
params.row_range = row_range;
|
|
683
|
+
}
|
|
601
684
|
const data = await apiCall("POST", "/v1/jobs", { dataset_id, params });
|
|
602
685
|
return textResult(data);
|
|
603
686
|
});
|
|
@@ -615,7 +698,9 @@ When status is 'failed', show the error to the user and suggest parameter adjust
|
|
|
615
698
|
const data = (await apiCall("GET", `/v1/jobs/${job_id}`));
|
|
616
699
|
const status = data.status;
|
|
617
700
|
const progress = (data.progress ?? 0) * 100;
|
|
618
|
-
|
|
701
|
+
const label = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
702
|
+
const jobDesc = label ? `Job ${label} (id: ${job_id})` : `Job ${job_id}`;
|
|
703
|
+
let text = `${jobDesc}: ${status} (${progress.toFixed(1)}%)`;
|
|
619
704
|
if (status === "completed") {
|
|
620
705
|
text += ` | Results ready. Use get_results(job_id="${job_id}") to retrieve.`;
|
|
621
706
|
}
|
|
@@ -624,6 +709,61 @@ When status is 'failed', show the error to the user and suggest parameter adjust
|
|
|
624
709
|
}
|
|
625
710
|
return { content: [{ type: "text", text }] };
|
|
626
711
|
});
|
|
712
|
+
/** Resolve get_results figures param to list of image filenames to fetch. */
|
|
713
|
+
function getResultsImagesToFetch(jobType, summary, figures, includeIndividual) {
|
|
714
|
+
const ext = summary.output_format ?? "pdf";
|
|
715
|
+
if (jobType === "transition_flow") {
|
|
716
|
+
const lag = summary.lag ?? 1;
|
|
717
|
+
return [`transition_flow_lag${lag}.${ext}`];
|
|
718
|
+
}
|
|
719
|
+
if (jobType === "project_variable") {
|
|
720
|
+
const varName = summary.variable_name ?? "variable";
|
|
721
|
+
const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
|
|
722
|
+
return [`projected_${safe}.${ext}`];
|
|
723
|
+
}
|
|
724
|
+
if (jobType === "derive_variable") {
|
|
725
|
+
const varName = summary.variable_name ?? "variable";
|
|
726
|
+
const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
|
|
727
|
+
return [`projected_${safe}.${ext}`];
|
|
728
|
+
}
|
|
729
|
+
// train_som
|
|
730
|
+
const features = summary.features ?? [];
|
|
731
|
+
const combinedName = `combined.${ext}`;
|
|
732
|
+
const umatrixName = `umatrix.${ext}`;
|
|
733
|
+
const hitHistName = `hit_histogram.${ext}`;
|
|
734
|
+
const correlationName = `correlation.${ext}`;
|
|
735
|
+
const componentNames = features.map((f, i) => `component_${i + 1}_${f.replace(/[^a-zA-Z0-9_]/g, "_")}.${ext}`);
|
|
736
|
+
const allList = [combinedName, umatrixName, hitHistName, correlationName, ...componentNames];
|
|
737
|
+
if (figures === undefined || figures === "default") {
|
|
738
|
+
return includeIndividual ? allList : [combinedName];
|
|
739
|
+
}
|
|
740
|
+
if (figures === "combined_only")
|
|
741
|
+
return [combinedName];
|
|
742
|
+
if (figures === "all")
|
|
743
|
+
return allList;
|
|
744
|
+
if (Array.isArray(figures)) {
|
|
745
|
+
const nameToFile = {
|
|
746
|
+
combined: combinedName,
|
|
747
|
+
umatrix: umatrixName,
|
|
748
|
+
hit_histogram: hitHistName,
|
|
749
|
+
correlation: correlationName,
|
|
750
|
+
};
|
|
751
|
+
features.forEach((_, i) => {
|
|
752
|
+
nameToFile[`component_${i + 1}`] = componentNames[i];
|
|
753
|
+
});
|
|
754
|
+
return figures
|
|
755
|
+
.map((key) => {
|
|
756
|
+
const k = key.trim().toLowerCase();
|
|
757
|
+
if (nameToFile[k])
|
|
758
|
+
return nameToFile[k];
|
|
759
|
+
if (key.includes("."))
|
|
760
|
+
return key;
|
|
761
|
+
return null;
|
|
762
|
+
})
|
|
763
|
+
.filter((f) => f != null);
|
|
764
|
+
}
|
|
765
|
+
return [combinedName];
|
|
766
|
+
}
|
|
627
767
|
// ---- get_results ----
|
|
628
768
|
server.tool("get_results", `Retrieve results of a completed SOM training, projection, or derived variable job.
|
|
629
769
|
|
|
@@ -635,8 +775,12 @@ Returns: text summary with metrics and inline images (combined view and all plot
|
|
|
635
775
|
DOWNLOAD LINKS: Links to API-domain or presigned URLs may not work when clicked (MCP holds the API key, not the browser). Images are inlined. For weights use get_job_export(export="weights"); for node stats use get_job_export(export="nodes"). If the user wants to save a file, offer to fetch via the appropriate tool.
|
|
636
776
|
|
|
637
777
|
OPTIONS:
|
|
638
|
-
-
|
|
639
|
-
|
|
778
|
+
- figures: request specific plots only. Omit for default (combined only; or all if include_individual=true).
|
|
779
|
+
- "combined_only": only the combined view.
|
|
780
|
+
- "all": combined + umatrix + hit_histogram + all component planes.
|
|
781
|
+
- Array of logical names: e.g. figures: ["umatrix"] for just the U-matrix, or figures: ["combined","hit_histogram","correlation"] or ["combined","umatrix","component_1","component_2"]. Logical names: combined, umatrix, hit_histogram, correlation, component_1, component_2, ... (component_N = Nth feature). Pass an array to fetch/save only those figures.
|
|
782
|
+
- include_individual=true: when figures is omitted, shows each component plane, U-matrix, and hit histogram
|
|
783
|
+
as separate inline images. Ignored when figures is set.
|
|
640
784
|
|
|
641
785
|
AFTER showing results, guide the user:
|
|
642
786
|
1. "The U-matrix shows [N] distinct regions. Does this match expected groupings?"
|
|
@@ -647,6 +791,7 @@ AFTER showing results, guide the user:
|
|
|
647
791
|
6. If explained variance < 0.7: suggest transforms, feature selection, or more training
|
|
648
792
|
|
|
649
793
|
WORKFLOW: get_results → analyze(clusters) → component_planes → feature_correlation.
|
|
794
|
+
Request specific figures with get_results(job_id, figures=[...]) (e.g. figures: ["umatrix"] or figures: ["combined","hit_histogram"]) or run analyze(job_id, analysis_type) for a single view.
|
|
650
795
|
Use get_job_export(export="training_log") for the learning curve (QE vs epoch — healthy=steady decline then plateau).
|
|
651
796
|
Use analyze(job_id, "quality_report") for extended metrics (trustworthiness, neighborhood preservation).
|
|
652
797
|
|
|
@@ -655,20 +800,31 @@ METRIC INTERPRETATION:
|
|
|
655
800
|
- TE < 0.05: good topology. TE > 0.15: grid too small.
|
|
656
801
|
- Explained variance > 0.8: good. < 0.7: try transforms, fewer features, or more training.`, {
|
|
657
802
|
job_id: z.string().describe("Job ID of a completed job"),
|
|
803
|
+
figures: z
|
|
804
|
+
.union([
|
|
805
|
+
z.enum(["default", "combined_only", "all"]),
|
|
806
|
+
z.array(z.string()),
|
|
807
|
+
])
|
|
808
|
+
.optional()
|
|
809
|
+
.describe("Which figures to return. Omit or 'default' for combined only (or all if include_individual=true). 'combined_only': just combined view. 'all': combined + umatrix + hit_histogram + correlation + all component planes. Or array of logical names to fetch only those: combined, umatrix, hit_histogram, correlation, component_1, component_2, ..."),
|
|
658
810
|
include_individual: z
|
|
659
811
|
.boolean()
|
|
660
812
|
.optional()
|
|
661
813
|
.default(false)
|
|
662
|
-
.describe("If true, inline each individual plot (component planes, u-matrix, hit histogram)
|
|
663
|
-
}, async ({ job_id, include_individual }) => {
|
|
814
|
+
.describe("If true and figures is omitted, inline each individual plot (component planes, u-matrix, hit histogram). Ignored when figures is set."),
|
|
815
|
+
}, async ({ job_id, figures, include_individual }) => {
|
|
664
816
|
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
665
817
|
const summary = (data.summary ?? {});
|
|
666
818
|
const downloadUrls = (data.download_urls ?? {});
|
|
819
|
+
const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
820
|
+
const resultsHeader = jobLabel
|
|
821
|
+
? `Results for ${jobLabel} (job_id: ${job_id})`
|
|
822
|
+
: `Results for job_id: ${job_id}`;
|
|
667
823
|
const content = [];
|
|
668
824
|
const inlinedImages = new Set();
|
|
669
825
|
const jobType = summary.job_type ?? "train_som";
|
|
670
826
|
// ── Dispatch by job type ──────────────────────────────────────────────────
|
|
671
|
-
const fmtExt = summary.output_format ?? "
|
|
827
|
+
const fmtExt = summary.output_format ?? "pdf";
|
|
672
828
|
if (jobType === "transition_flow") {
|
|
673
829
|
const lag = summary.lag ?? 1;
|
|
674
830
|
const flowImg = `transition_flow_lag${lag}.${fmtExt}`;
|
|
@@ -676,7 +832,7 @@ METRIC INTERPRETATION:
|
|
|
676
832
|
content.push({
|
|
677
833
|
type: "text",
|
|
678
834
|
text: [
|
|
679
|
-
`Transition Flow
|
|
835
|
+
`Transition Flow ${resultsHeader}`,
|
|
680
836
|
`Parent SOM: ${summary.parent_job_id ?? "N/A"} | Lag: ${lag} | Samples: ${summary.n_samples ?? 0}`,
|
|
681
837
|
``,
|
|
682
838
|
`Flow Statistics:`,
|
|
@@ -691,8 +847,10 @@ METRIC INTERPRETATION:
|
|
|
691
847
|
`Use transition_flow(lag=N) with larger N to reveal longer-term temporal structure.`,
|
|
692
848
|
].join("\n"),
|
|
693
849
|
});
|
|
694
|
-
|
|
695
|
-
|
|
850
|
+
for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
|
|
851
|
+
await tryAttachImage(content, job_id, name);
|
|
852
|
+
inlinedImages.add(name);
|
|
853
|
+
}
|
|
696
854
|
}
|
|
697
855
|
else if (jobType === "project_variable") {
|
|
698
856
|
const varName = summary.variable_name ?? "variable";
|
|
@@ -702,7 +860,7 @@ METRIC INTERPRETATION:
|
|
|
702
860
|
content.push({
|
|
703
861
|
type: "text",
|
|
704
862
|
text: [
|
|
705
|
-
`Projected Variable: ${varName} (${agg}) —
|
|
863
|
+
`Projected Variable: ${varName} (${agg}) — ${resultsHeader}`,
|
|
706
864
|
`Parent SOM: ${summary.parent_job_id ?? "N/A"} | Samples: ${summary.n_samples ?? 0}`,
|
|
707
865
|
``,
|
|
708
866
|
`Variable Statistics (per-node ${agg}):`,
|
|
@@ -715,8 +873,10 @@ METRIC INTERPRETATION:
|
|
|
715
873
|
`learned feature space, even if it wasn't used in training.`,
|
|
716
874
|
].join("\n"),
|
|
717
875
|
});
|
|
718
|
-
|
|
719
|
-
|
|
876
|
+
for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
|
|
877
|
+
await tryAttachImage(content, job_id, name);
|
|
878
|
+
inlinedImages.add(name);
|
|
879
|
+
}
|
|
720
880
|
}
|
|
721
881
|
else {
|
|
722
882
|
// ── Default: train_som results ──────────────────────────────────────────
|
|
@@ -732,7 +892,7 @@ METRIC INTERPRETATION:
|
|
|
732
892
|
const duration = summary.training_duration_seconds;
|
|
733
893
|
const ordErrors = summary.ordering_errors;
|
|
734
894
|
const textSummary = [
|
|
735
|
-
`SOM Training
|
|
895
|
+
`SOM Training ${resultsHeader}`,
|
|
736
896
|
`Grid: ${grid[0]}×${grid[1]} | Features: ${summary.n_features ?? 0} | Samples: ${summary.n_samples ?? 0}`,
|
|
737
897
|
`Model: ${summary.model ?? "SOM"} | Epochs: ${epochStr}`,
|
|
738
898
|
`Periodic: ${summary.periodic ?? true} | Normalize: ${summary.normalize ?? "auto"}`,
|
|
@@ -763,29 +923,11 @@ METRIC INTERPRETATION:
|
|
|
763
923
|
.filter((l) => l !== "")
|
|
764
924
|
.join("\n");
|
|
765
925
|
content.push({ type: "text", text: textSummary });
|
|
766
|
-
const imgExt = summary.output_format ?? "
|
|
767
|
-
const
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
const feats = summary.features ?? [];
|
|
772
|
-
const imageNames = [
|
|
773
|
-
`umatrix.${imgExt}`,
|
|
774
|
-
`hit_histogram.${imgExt}`,
|
|
775
|
-
...feats.map((f, i) => `component_${i + 1}_${f.replace(/[^a-zA-Z0-9_]/g, "_")}.${imgExt}`),
|
|
776
|
-
];
|
|
777
|
-
const results = await Promise.allSettled(imageNames.map((name) => apiRawCall(`/v1/results/${job_id}/image/${name}`).then((r) => ({ name, ...r }))));
|
|
778
|
-
for (const r of results) {
|
|
779
|
-
if (r.status === "fulfilled") {
|
|
780
|
-
content.push({
|
|
781
|
-
type: "image",
|
|
782
|
-
data: r.value.data.toString("base64"),
|
|
783
|
-
mimeType: mimeForFilename(r.value.name),
|
|
784
|
-
annotations: { audience: ["user"], priority: 0.8 },
|
|
785
|
-
});
|
|
786
|
-
inlinedImages.add(r.value.name);
|
|
787
|
-
}
|
|
788
|
-
}
|
|
926
|
+
const imgExt = summary.output_format ?? "pdf";
|
|
927
|
+
const imagesToFetch = getResultsImagesToFetch(jobType, summary, figures, include_individual);
|
|
928
|
+
for (const name of imagesToFetch) {
|
|
929
|
+
await tryAttachImage(content, job_id, name);
|
|
930
|
+
inlinedImages.add(name);
|
|
789
931
|
}
|
|
790
932
|
}
|
|
791
933
|
// Inline remaining image files; for JSON provide tool hints (no clickable URLs — auth required)
|
|
@@ -808,10 +950,109 @@ METRIC INTERPRETATION:
|
|
|
808
950
|
}
|
|
809
951
|
}
|
|
810
952
|
}
|
|
953
|
+
// List available artifacts so the LLM can offer to fetch specific views
|
|
954
|
+
if (files.length > 0) {
|
|
955
|
+
const features = summary.features ?? [];
|
|
956
|
+
const logicalNames = jobType === "train_som" || jobType === "render_variant"
|
|
957
|
+
? `Logical names: combined, umatrix, hit_histogram, correlation, ${features.map((_, i) => `component_${i + 1}`).join(", ")}. `
|
|
958
|
+
: "";
|
|
959
|
+
content.push({
|
|
960
|
+
type: "text",
|
|
961
|
+
text: `Available to fetch individually: ${files.join(", ")}. ${logicalNames}Use get_results(job_id, figures=[...]) to request specific plots, get_results(job_id, include_individual=true) or figures="all" to inline all plots, or analyze(job_id, analysis_type) for a specific view (u_matrix, component_planes, bmu_hits, clusters, quality_report, etc.).`,
|
|
962
|
+
});
|
|
963
|
+
}
|
|
964
|
+
return { content };
|
|
965
|
+
});
|
|
966
|
+
// ---- recolor_som ----
|
|
967
|
+
server.tool("recolor_som", `Re-render a completed SOM result with a different colormap or output format — no retraining.
|
|
968
|
+
|
|
969
|
+
Use when the user wants to see the same combined (or other) plot with another color scheme (e.g. plasma, inferno, coolwarm). You can also use this to re-export figures in a different format (e.g. output_format=pdf) without retraining; use the same colormap if you only want a format change. Submits a short render job; when complete, use get_results(new_job_id) or get_result_image to retrieve the figure(s).
|
|
970
|
+
|
|
971
|
+
Colormaps (default: coolwarm): e.g. viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix and cyclic panels keep fixed colormaps (grays, twilight).`, {
|
|
972
|
+
job_id: z.string().describe("Job ID of a completed SOM training job (parent)"),
|
|
973
|
+
colormap: z.string().describe("Colormap name (default: coolwarm). E.g. viridis, plasma, inferno, magma, coolwarm)"),
|
|
974
|
+
figures: z
|
|
975
|
+
.array(z.string())
|
|
976
|
+
.optional()
|
|
977
|
+
.default(["combined"])
|
|
978
|
+
.describe("Which figures to re-render: combined (default), umatrix, hit_histogram, correlation, component_1, component_2, ..."),
|
|
979
|
+
output_format: z.enum(["png", "pdf", "svg"]).optional().default("pdf"),
|
|
980
|
+
output_dpi: z.number().int().min(1).max(4).optional().default(2),
|
|
981
|
+
}, async ({ job_id, colormap, figures, output_format, output_dpi }) => {
|
|
982
|
+
const body = { colormap, figures, output_format, output_dpi };
|
|
983
|
+
const data = (await apiCall("POST", `/v1/results/${job_id}/render`, JSON.stringify(body), {
|
|
984
|
+
"Content-Type": "application/json",
|
|
985
|
+
}));
|
|
986
|
+
const newJobId = data.id;
|
|
987
|
+
const content = [
|
|
988
|
+
{
|
|
989
|
+
type: "text",
|
|
990
|
+
text: [
|
|
991
|
+
`Re-render job submitted with colormap "${colormap}".`,
|
|
992
|
+
`New job_id: ${newJobId}. Poll get_job_status(job_id="${newJobId}") until status is 'completed', then use get_results(job_id="${newJobId}") or get_result_image to retrieve the recolored plot(s). No retraining was performed.`,
|
|
993
|
+
].join("\n"),
|
|
994
|
+
},
|
|
995
|
+
];
|
|
811
996
|
return { content };
|
|
812
997
|
});
|
|
998
|
+
// ---- download_results ----
|
|
999
|
+
server.tool("download_results", `Save result figures (and optionally JSON) to a folder on disk. Use so the user can open, share, or version files locally without writing their own download script.
|
|
1000
|
+
|
|
1001
|
+
folder: path to the directory (e.g. "." for current/workspace, "./results", or absolute path). When folder is a generic path like "." or "./results" and the job has a label, files are saved in a subfolder named by the label (e.g. ./results/Winedata_a1b2c3_badger_thong_oil). You can also pass a path that already includes the label.
|
|
1002
|
+
figures: "all" (default) = all image files from the job; "images" = same; or pass an array of filenames to save only those (e.g. ["combined.pdf", "umatrix.pdf", "correlation.pdf"]). Default export format is PDF.
|
|
1003
|
+
include_json: if true, also save summary.json (and other JSON artifacts) into the same folder.`, {
|
|
1004
|
+
job_id: z.string().describe("Job ID of a completed job"),
|
|
1005
|
+
folder: z.string().describe("Directory path to save files (e.g. '.' or './results'). When the job has a label, a subfolder with that label may be used. Relative paths are relative to process cwd (usually project root)."),
|
|
1006
|
+
figures: z
|
|
1007
|
+
.union([z.enum(["all", "images"]), z.array(z.string())])
|
|
1008
|
+
.optional()
|
|
1009
|
+
.default("all")
|
|
1010
|
+
.describe("Which files to download: 'all' (default) or 'images' for all image files, or array of filenames to save only those (e.g. ['combined.pdf', 'umatrix.pdf', 'correlation.pdf'])."),
|
|
1011
|
+
include_json: z.boolean().optional().default(false).describe("If true, also download summary.json and other JSON files"),
|
|
1012
|
+
}, async ({ job_id, folder, figures, include_json }) => {
|
|
1013
|
+
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
1014
|
+
const summary = (data.summary ?? {});
|
|
1015
|
+
const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
1016
|
+
const files = summary.files ?? [];
|
|
1017
|
+
const isImage = (f) => f.endsWith(".png") || f.endsWith(".svg") || f.endsWith(".pdf");
|
|
1018
|
+
const isJson = (f) => f.endsWith(".json");
|
|
1019
|
+
let toDownload;
|
|
1020
|
+
if (figures === "all" || figures === "images") {
|
|
1021
|
+
toDownload = include_json ? files : files.filter(isImage);
|
|
1022
|
+
}
|
|
1023
|
+
else {
|
|
1024
|
+
toDownload = figures;
|
|
1025
|
+
if (include_json && !toDownload.includes("summary.json")) {
|
|
1026
|
+
toDownload = [...toDownload, "summary.json"];
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
let resolvedDir = path.resolve(folder);
|
|
1030
|
+
const useLabelSubfolder = jobLabel &&
|
|
1031
|
+
(folder === "." || folder === "./results" || folder === "results");
|
|
1032
|
+
if (useLabelSubfolder) {
|
|
1033
|
+
resolvedDir = path.join(resolvedDir, jobLabel);
|
|
1034
|
+
}
|
|
1035
|
+
await fs.mkdir(resolvedDir, { recursive: true });
|
|
1036
|
+
const saved = [];
|
|
1037
|
+
for (const filename of toDownload) {
|
|
1038
|
+
try {
|
|
1039
|
+
const { data: buf } = await apiRawCall(`/v1/results/${job_id}/image/${filename}`);
|
|
1040
|
+
const outPath = path.join(resolvedDir, filename);
|
|
1041
|
+
await fs.writeFile(outPath, buf);
|
|
1042
|
+
saved.push(filename);
|
|
1043
|
+
}
|
|
1044
|
+
catch {
|
|
1045
|
+
// Skip missing or failed files
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
const text = saved.length > 0
|
|
1049
|
+
? `Saved ${saved.length} file(s) to ${resolvedDir}: ${saved.join(", ")}`
|
|
1050
|
+
: `No files saved (job may have no matching files or download failed). Check job_id and that the job is completed.`;
|
|
1051
|
+
return { content: [{ type: "text", text }] };
|
|
1052
|
+
});
|
|
813
1053
|
// ---- analyze ----
|
|
814
1054
|
server.tool("analyze", `Run a specific analysis on SOM results. Use after get_results to drill into aspects.
|
|
1055
|
+
Request specific plots: get_results(job_id, figures=[...]) for chosen figures (e.g. figures: ["umatrix"]) or analyze(job_id, analysis_type) for a single analysis view.
|
|
815
1056
|
|
|
816
1057
|
Available analysis types and when to use them:
|
|
817
1058
|
|
|
@@ -873,7 +1114,7 @@ INTERPRETATION TIPS:
|
|
|
873
1114
|
const summary = (data.summary ?? {});
|
|
874
1115
|
const features = summary.features ?? [];
|
|
875
1116
|
const grid = summary.grid ?? [0, 0];
|
|
876
|
-
const ext = summary.output_format ?? "
|
|
1117
|
+
const ext = summary.output_format ?? "pdf";
|
|
877
1118
|
const content = [];
|
|
878
1119
|
if (analysis_type === "u_matrix") {
|
|
879
1120
|
content.push({
|
|
@@ -1211,108 +1452,6 @@ action=delete: Permanently delete a job and all S3 result files. Use to free sto
|
|
|
1211
1452
|
const data = await apiCall("DELETE", `/v1/jobs/${job_id}`);
|
|
1212
1453
|
return textResult(data);
|
|
1213
1454
|
});
|
|
1214
|
-
// ---- preview_dataset ----
|
|
1215
|
-
server.tool("preview_dataset", `Preview a dataset before training — shows columns, statistics, sample rows, and detections.
|
|
1216
|
-
|
|
1217
|
-
BEST FOR: Understanding data structure before training. ALWAYS call this before train_som
|
|
1218
|
-
on an unfamiliar dataset.
|
|
1219
|
-
NOT FOR: Large data exploration (returns only sample rows). Use derive_variable for computations.
|
|
1220
|
-
|
|
1221
|
-
TIMING: Near-instant (reads only header + sample rows from S3).
|
|
1222
|
-
|
|
1223
|
-
This tool detects:
|
|
1224
|
-
1. Column types (numeric vs string) and basic stats (min/max/mean/std)
|
|
1225
|
-
2. Cyclic feature candidates (columns named hour, weekday, angle, direction, etc.)
|
|
1226
|
-
3. Datetime columns with format auto-detection
|
|
1227
|
-
4. Skewed distributions (large max/min ratios suggest log transforms)
|
|
1228
|
-
|
|
1229
|
-
AFTER previewing, ask the user:
|
|
1230
|
-
- "Which columns are relevant?" → columns parameter in train_som
|
|
1231
|
-
- "I see cyclic candidates: [list]. Encode cyclically?" → cyclic_features
|
|
1232
|
-
- "Column X ranges 0.01–50,000. Log-transform?" → transforms: {X: "log"}
|
|
1233
|
-
- "Datetime columns found. Extract temporal features?" → temporal_features (NEVER auto-apply)
|
|
1234
|
-
- "Are any features more important than others?" → feature_weights
|
|
1235
|
-
|
|
1236
|
-
COMMON MISTAKES:
|
|
1237
|
-
- Skipping preview and training on all columns (including IDs, timestamps, irrelevant features)
|
|
1238
|
-
- Not checking for datetime columns that could provide valuable cyclic features
|
|
1239
|
-
- Ignoring skewed distributions that will dominate normalization
|
|
1240
|
-
|
|
1241
|
-
TIP: Use the prepare_training prompt for a structured walkthrough of all decisions.`, {
|
|
1242
|
-
dataset_id: z.string().describe("Dataset ID to preview"),
|
|
1243
|
-
n_rows: z
|
|
1244
|
-
.number()
|
|
1245
|
-
.int()
|
|
1246
|
-
.optional()
|
|
1247
|
-
.default(5)
|
|
1248
|
-
.describe("Number of sample rows to return (default 5)"),
|
|
1249
|
-
}, async ({ dataset_id, n_rows }) => {
|
|
1250
|
-
const data = (await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`));
|
|
1251
|
-
const cols = data.columns ?? [];
|
|
1252
|
-
const stats = data.column_stats ?? [];
|
|
1253
|
-
const hints = data.cyclic_hints ?? [];
|
|
1254
|
-
const samples = data.sample_rows ?? [];
|
|
1255
|
-
const dtCols = data.datetime_columns ?? [];
|
|
1256
|
-
const temporalSugg = data.temporal_suggestions ?? [];
|
|
1257
|
-
const fmt = (v) => v === null || v === undefined ? "—" : Number(v).toFixed(3);
|
|
1258
|
-
const lines = [
|
|
1259
|
-
`Dataset: ${data.name} (${data.dataset_id})`,
|
|
1260
|
-
`${data.total_rows} rows × ${data.total_cols} columns`,
|
|
1261
|
-
``,
|
|
1262
|
-
`Column Statistics:`,
|
|
1263
|
-
`| Column | Min | Max | Mean | Std | Nulls | Numeric |`,
|
|
1264
|
-
`|--------|-----|-----|------|-----|-------|---------|`,
|
|
1265
|
-
];
|
|
1266
|
-
for (const s of stats) {
|
|
1267
|
-
lines.push(`| ${s.column} | ${fmt(s.min)} | ${fmt(s.max)} | ${fmt(s.mean)} | ${fmt(s.std)} | ${s.null_count ?? 0} | ${s.is_numeric !== false ? "yes" : "no"} |`);
|
|
1268
|
-
}
|
|
1269
|
-
if (hints.length > 0) {
|
|
1270
|
-
lines.push(``, `Detected Cyclic Feature Hints:`);
|
|
1271
|
-
for (const h of hints) {
|
|
1272
|
-
lines.push(` • ${h.column} — period=${h.period} (${h.reason})`);
|
|
1273
|
-
}
|
|
1274
|
-
}
|
|
1275
|
-
if (dtCols.length > 0) {
|
|
1276
|
-
lines.push(``, `Detected Datetime Columns:`);
|
|
1277
|
-
for (const dc of dtCols) {
|
|
1278
|
-
const formats = dc.detected_formats ?? [];
|
|
1279
|
-
const fmtStrs = formats
|
|
1280
|
-
.map((f) => `${f.format} — ${f.description} (${(f.match_rate * 100).toFixed(0)}% match)`)
|
|
1281
|
-
.join("; ");
|
|
1282
|
-
lines.push(` • ${dc.column}: sample="${dc.sample}" → ${fmtStrs}`);
|
|
1283
|
-
if (formats.length > 1) {
|
|
1284
|
-
lines.push(` ⚠ AMBIGUOUS: multiple formats match. Ask user to clarify.`);
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1287
|
-
}
|
|
1288
|
-
if (temporalSugg.length > 0) {
|
|
1289
|
-
lines.push(``, `Temporal Feature Suggestions (require user approval):`);
|
|
1290
|
-
for (const ts of temporalSugg) {
|
|
1291
|
-
lines.push(` • Columns: ${ts.columns.join(" + ")} → format: "${ts.format}"`);
|
|
1292
|
-
lines.push(` Available components: ${ts.available_components.join(", ")}`);
|
|
1293
|
-
lines.push(` ${ts.note}`);
|
|
1294
|
-
}
|
|
1295
|
-
lines.push(``, `To use temporal features in train_som, add:`, ` temporal_features: [{columns: [...], format: "...", extract: [...], cyclic: true}]`);
|
|
1296
|
-
}
|
|
1297
|
-
if (samples.length > 0) {
|
|
1298
|
-
lines.push(``, `Sample Rows (first ${samples.length}):`);
|
|
1299
|
-
lines.push(`| ${cols.join(" | ")} |`);
|
|
1300
|
-
lines.push(`| ${cols.map(() => "---").join(" | ")} |`);
|
|
1301
|
-
for (const row of samples) {
|
|
1302
|
-
lines.push(`| ${cols.map((c) => String(row[c] ?? "")).join(" | ")} |`);
|
|
1303
|
-
}
|
|
1304
|
-
}
|
|
1305
|
-
return {
|
|
1306
|
-
content: [{ type: "text", text: lines.join("\n") }],
|
|
1307
|
-
};
|
|
1308
|
-
});
|
|
1309
|
-
// ---- delete_dataset ----
|
|
1310
|
-
server.tool("delete_dataset", "Delete a dataset and its stored data. Frees a dataset slot for new uploads.", {
|
|
1311
|
-
dataset_id: z.string().describe("Dataset ID to delete"),
|
|
1312
|
-
}, async ({ dataset_id }) => {
|
|
1313
|
-
const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
|
|
1314
|
-
return textResult(data);
|
|
1315
|
-
});
|
|
1316
1455
|
// ---- list ----
|
|
1317
1456
|
server.tool("list", `List datasets or jobs.
|
|
1318
1457
|
|
|
@@ -1333,7 +1472,19 @@ type=jobs: List SOM training jobs (optionally filtered by dataset_id). Use to fi
|
|
|
1333
1472
|
const path = dataset_id
|
|
1334
1473
|
? `/v1/jobs?dataset_id=${dataset_id}`
|
|
1335
1474
|
: "/v1/jobs";
|
|
1336
|
-
const data = await apiCall("GET", path);
|
|
1475
|
+
const data = (await apiCall("GET", path));
|
|
1476
|
+
if (type === "jobs" && Array.isArray(data)) {
|
|
1477
|
+
const lines = data.map((job) => {
|
|
1478
|
+
const id = String(job.id ?? "");
|
|
1479
|
+
const status = String(job.status ?? "");
|
|
1480
|
+
const label = job.label != null && job.label !== "" ? String(job.label) : null;
|
|
1481
|
+
return label
|
|
1482
|
+
? `${label} (id: ${id}) — status: ${status}`
|
|
1483
|
+
: `id: ${id} — status: ${status}`;
|
|
1484
|
+
});
|
|
1485
|
+
const text = lines.length > 0 ? lines.join("\n") : "No jobs found.";
|
|
1486
|
+
return { content: [{ type: "text", text }] };
|
|
1487
|
+
}
|
|
1337
1488
|
return textResult(data);
|
|
1338
1489
|
});
|
|
1339
1490
|
// ---- get_job_export ----
|
|
@@ -1507,8 +1658,8 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
|
|
|
1507
1658
|
output_format: z
|
|
1508
1659
|
.enum(["png", "pdf", "svg"])
|
|
1509
1660
|
.optional()
|
|
1510
|
-
.default("
|
|
1511
|
-
.describe("Image output format for the projection plot."),
|
|
1661
|
+
.default("pdf")
|
|
1662
|
+
.describe("Image output format for the projection plot (default: pdf)."),
|
|
1512
1663
|
output_dpi: z
|
|
1513
1664
|
.enum(["standard", "retina", "print"])
|
|
1514
1665
|
.optional()
|
|
@@ -1517,16 +1668,15 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
|
|
|
1517
1668
|
colormap: z
|
|
1518
1669
|
.string()
|
|
1519
1670
|
.optional()
|
|
1520
|
-
.describe("Override colormap for the projection plot (default:
|
|
1671
|
+
.describe("Override colormap for the projection plot (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
|
|
1521
1672
|
}, async ({ job_id, variable_name, values, aggregation, output_format, output_dpi, colormap }) => {
|
|
1522
1673
|
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
1523
1674
|
const body = {
|
|
1524
1675
|
variable_name,
|
|
1525
1676
|
values,
|
|
1526
1677
|
aggregation: aggregation ?? "mean",
|
|
1678
|
+
output_format: output_format ?? "pdf",
|
|
1527
1679
|
};
|
|
1528
|
-
if (output_format && output_format !== "png")
|
|
1529
|
-
body.output_format = output_format;
|
|
1530
1680
|
if (output_dpi && output_dpi !== "retina")
|
|
1531
1681
|
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
1532
1682
|
if (colormap)
|
|
@@ -1553,7 +1703,7 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
|
|
|
1553
1703
|
].join("\n"),
|
|
1554
1704
|
});
|
|
1555
1705
|
const safeName = variable_name.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1556
|
-
const imgExt = summary.output_format ?? output_format ?? "
|
|
1706
|
+
const imgExt = summary.output_format ?? output_format ?? "pdf";
|
|
1557
1707
|
await tryAttachImage(content, projJobId, `projected_${safeName}.${imgExt}`);
|
|
1558
1708
|
return { content };
|
|
1559
1709
|
}
|
|
@@ -1615,8 +1765,8 @@ After showing results, ask:
|
|
|
1615
1765
|
output_format: z
|
|
1616
1766
|
.enum(["png", "pdf", "svg"])
|
|
1617
1767
|
.optional()
|
|
1618
|
-
.default("
|
|
1619
|
-
.describe("Image output format for the flow plot."),
|
|
1768
|
+
.default("pdf")
|
|
1769
|
+
.describe("Image output format for the flow plot (default: pdf)."),
|
|
1620
1770
|
output_dpi: z
|
|
1621
1771
|
.enum(["standard", "retina", "print"])
|
|
1622
1772
|
.optional()
|
|
@@ -1624,9 +1774,7 @@ After showing results, ask:
|
|
|
1624
1774
|
.describe("Resolution: standard (1x), retina (2x), print (4x)."),
|
|
1625
1775
|
}, async ({ job_id, lag, output_format, output_dpi }) => {
|
|
1626
1776
|
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
1627
|
-
const body = { lag: lag ?? 1 };
|
|
1628
|
-
if (output_format && output_format !== "png")
|
|
1629
|
-
body.output_format = output_format;
|
|
1777
|
+
const body = { lag: lag ?? 1, output_format: output_format ?? "pdf" };
|
|
1630
1778
|
if (output_dpi && output_dpi !== "retina")
|
|
1631
1779
|
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
1632
1780
|
const data = (await apiCall("POST", `/v1/results/${job_id}/transition-flow`, body));
|
|
@@ -1649,7 +1797,7 @@ After showing results, ask:
|
|
|
1649
1797
|
` Mean magnitude: ${stats.mean_magnitude !== undefined ? Number(stats.mean_magnitude).toFixed(4) : "N/A"}`,
|
|
1650
1798
|
].join("\n"),
|
|
1651
1799
|
});
|
|
1652
|
-
const imgExt = output_format ?? "
|
|
1800
|
+
const imgExt = output_format ?? "pdf";
|
|
1653
1801
|
await tryAttachImage(content, flowJobId, `transition_flow_lag${lag ?? 1}.${imgExt}`);
|
|
1654
1802
|
return { content };
|
|
1655
1803
|
}
|
|
@@ -1757,8 +1905,8 @@ COMMON MISTAKES:
|
|
|
1757
1905
|
output_format: z
|
|
1758
1906
|
.enum(["png", "pdf", "svg"])
|
|
1759
1907
|
.optional()
|
|
1760
|
-
.default("
|
|
1761
|
-
.describe("Image format for projection visualization
|
|
1908
|
+
.default("pdf")
|
|
1909
|
+
.describe("Image format for projection visualization when project_onto_job is set (default: pdf)."),
|
|
1762
1910
|
output_dpi: z
|
|
1763
1911
|
.enum(["standard", "retina", "print"])
|
|
1764
1912
|
.optional()
|
|
@@ -1767,7 +1915,7 @@ COMMON MISTAKES:
|
|
|
1767
1915
|
colormap: z
|
|
1768
1916
|
.string()
|
|
1769
1917
|
.optional()
|
|
1770
|
-
.describe("Colormap for projection visualization (default: plasma
|
|
1918
|
+
.describe("Colormap for projection visualization (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
|
|
1771
1919
|
}, async ({ dataset_id, name, expression, project_onto_job, aggregation, options, output_format, output_dpi, colormap, }) => {
|
|
1772
1920
|
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
1773
1921
|
if (project_onto_job) {
|
|
@@ -1776,11 +1924,10 @@ COMMON MISTAKES:
|
|
|
1776
1924
|
name,
|
|
1777
1925
|
expression,
|
|
1778
1926
|
aggregation: aggregation ?? "mean",
|
|
1927
|
+
output_format: output_format ?? "pdf",
|
|
1779
1928
|
};
|
|
1780
1929
|
if (options)
|
|
1781
1930
|
body.options = options;
|
|
1782
|
-
if (output_format && output_format !== "png")
|
|
1783
|
-
body.output_format = output_format;
|
|
1784
1931
|
if (output_dpi && output_dpi !== "retina")
|
|
1785
1932
|
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
1786
1933
|
if (colormap)
|
|
@@ -1811,7 +1958,7 @@ COMMON MISTAKES:
|
|
|
1811
1958
|
.join("\n"),
|
|
1812
1959
|
});
|
|
1813
1960
|
const safeName = name.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1814
|
-
const imgExt = summary.output_format ?? output_format ?? "
|
|
1961
|
+
const imgExt = summary.output_format ?? output_format ?? "pdf";
|
|
1815
1962
|
await tryAttachImage(content, deriveJobId, `projected_${safeName}.${imgExt}`);
|
|
1816
1963
|
return { content };
|
|
1817
1964
|
}
|
|
@@ -1852,7 +1999,7 @@ COMMON MISTAKES:
|
|
|
1852
1999
|
`Min: ${summary.min ?? "?"} | Max: ${summary.max ?? "?"} | Mean: ${summary.mean ?? "?"}`,
|
|
1853
2000
|
``,
|
|
1854
2001
|
`The column is now available in the dataset. Include it in train_som`,
|
|
1855
|
-
`via the 'columns' parameter, or use
|
|
2002
|
+
`via the 'columns' parameter, or use datasets(action=preview) to verify.`,
|
|
1856
2003
|
]
|
|
1857
2004
|
.filter((l) => l !== "")
|
|
1858
2005
|
.join("\n"),
|
|
@@ -1940,24 +2087,49 @@ Use this BEFORE submitting large jobs to:
|
|
|
1940
2087
|
// ---------------------------------------------------------------------------
|
|
1941
2088
|
// Prompts
|
|
1942
2089
|
// ---------------------------------------------------------------------------
|
|
1943
|
-
server.prompt("
|
|
1944
|
-
"train_som. Walks through column selection, transforms, cyclic features, " +
|
|
1945
|
-
"temporal features, weighting, derived variables, and grid sizing.", { dataset_id: z.string().describe("Dataset ID to prepare for training") }, ({ dataset_id }) => ({
|
|
2090
|
+
server.prompt("info", "Brief overview of the Barivia SOM MCP: what it does, main workflow, and key tools. Use when the user asks what this MCP can do, how to get started, or what the process is.", {}, () => ({
|
|
1946
2091
|
messages: [
|
|
1947
2092
|
{
|
|
1948
2093
|
role: "user",
|
|
1949
2094
|
content: {
|
|
1950
2095
|
type: "text",
|
|
1951
|
-
text:
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
2096
|
+
text: [
|
|
2097
|
+
"Briefly inform the user using this overview:",
|
|
2098
|
+
"",
|
|
2099
|
+
"**What it is:** Barivia MCP connects you to a Self-Organizing Map (SOM) analytics engine. SOMs learn a 2D map from your data for visualization, clustering, and pattern discovery.",
|
|
2100
|
+
"",
|
|
2101
|
+
"**Main workflow:** (1) Upload CSV with datasets(upload) or list existing with list(type=datasets). (2) Inspect data with datasets(action=preview). (3) Optionally create a subset with datasets(action=subset) or use prepare_training for a guided checklist. (4) Train with train_som (grid size, epochs, columns, transforms, cyclic/temporal features). (5) Check progress with get_job_status; get results and figures with get_results. (6) Analyze with analyze (u_matrix, component_planes, clusters, quality_report, etc.), recolor with recolor_som, export with get_job_export or download_results. Output format (default pdf) and colormap (default coolwarm) can be set at training or changed later via recolor_som without retraining.",
|
|
2102
|
+
"",
|
|
2103
|
+
"**Key tools:** datasets (upload/preview/subset), list (datasets/jobs), train_som, get_job_status, get_results, analyze, recolor_som, download_results, project_variable, compare_runs, system_info.",
|
|
2104
|
+
"",
|
|
2105
|
+
"Keep the reply short and scannable.",
|
|
2106
|
+
].join("\n"),
|
|
2107
|
+
},
|
|
2108
|
+
},
|
|
2109
|
+
],
|
|
2110
|
+
}));
|
|
2111
|
+
server.prompt("prepare_training", "Guided pre-training checklist. Use after uploading a dataset and before calling train_som. " +
|
|
2112
|
+
"Walks through column selection, transforms, cyclic and temporal features, weighting, derived variables, and grid sizing.", { dataset_id: z.string().describe("Dataset ID to prepare for training") }, ({ dataset_id }) => ({
|
|
2113
|
+
messages: [
|
|
2114
|
+
{
|
|
2115
|
+
role: "user",
|
|
2116
|
+
content: {
|
|
2117
|
+
type: "text",
|
|
2118
|
+
text: [
|
|
2119
|
+
`Guide me through preparing dataset ${dataset_id} for SOM training.`,
|
|
2120
|
+
"",
|
|
2121
|
+
"For each step below, show the relevant data (use datasets(action=preview, dataset_id=\"" + dataset_id + "\") first), then ask for my choices. When we have all choices, summarize the train_som params and offer to submit.",
|
|
2122
|
+
"",
|
|
2123
|
+
"1. COLUMN SELECTION — Which columns to include? (Exclude IDs, free text, or redundant columns. Use train_som param: columns.)",
|
|
2124
|
+
"2. TRANSFORMS — Any right-skewed or heavy-tailed columns? Consider log, log1p, sqrt, or rank. (Use train_som param: transforms.)",
|
|
2125
|
+
"3. CYCLIC FEATURES — Any periodic variables (hour 0–24, weekday, angle 0–360)? These need cyclic encoding. (Use train_som param: cyclic_features, e.g. [{ feature: \"hour\", period: 24 }].)",
|
|
2126
|
+
"4. TEMPORAL FEATURES — Any datetime columns? We can extract day_of_year, month, hour_of_day, etc. (Use train_som param: temporal_features; preview shows suggested format.)",
|
|
2127
|
+
"5. FEATURE WEIGHTS — Should any feature count more or less? (Use train_som param: feature_weights, e.g. { temperature: 2.0 }.)",
|
|
2128
|
+
"6. DERIVED VARIABLES — Any new columns from expressions (e.g. ratio, difference)? (Use derive_variable before or after training; or add later with project_variable.)",
|
|
2129
|
+
"7. GRID & MODEL — Grid size (e.g. 20×20 or 25×25), epochs (e.g. [30, 15]), and model (SOM, RSOM, or SOFT variants). Use preset=quick|standard|refined or set grid_x, grid_y, epochs explicitly. Output format (default pdf) and colormap (default coolwarm) can be set here or changed later via recolor_som without retraining.",
|
|
2130
|
+
"",
|
|
2131
|
+
"Start by calling datasets(action=preview, dataset_id=\"" + dataset_id + "\") to show me the columns and statistics, then go through the steps.",
|
|
2132
|
+
].join("\n"),
|
|
1961
2133
|
},
|
|
1962
2134
|
},
|
|
1963
2135
|
],
|