npm - @barivia/barsom-mcp - Versions diffs - 0.2.5 → 0.2.7 - Mend

@barivia/barsom-mcp 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -7,7 +7,7 @@
  * BARIVIA_API_URL as environment variables.
  *
  * Usage (in MCP client config, e.g. Cursor / Claude Desktop):
- *
  *   {
  *     "mcpServers": {
  *       "analytics-engine": {
@@ -259,68 +259,146 @@ registerAppTool(server, "explore_som", {
             download_urls: data.download_urls,
         }),
     });
-    const imgExt = summary.output_format ?? "png";
+    const imgExt = summary.output_format ?? "pdf";
     await tryAttachImage(content, job_id, `combined.${imgExt}`);
     return { content };
 });
-// ---- upload_dataset ----
-server.tool("upload_dataset", `Upload a CSV dataset for SOM analysis. Returns dataset metadata including ID.
-PREFER file_path over csv_data: when the user points to a local file, use file_path.
-The MCP reads the file directly — no need to pass large CSV strings through the LLM.
-BEST FOR: Tabular data with numeric columns (sensor readings, financial data, process
-measurements, survey results). CSV with header row required.
-NOT FOR: Images, text documents, or pre-trained embeddings.
-TIMING: Upload is near-instant for datasets under 100MB.
+// ---- datasets ----
+server.tool("datasets", `Manage datasets: upload, preview, subset, or delete.
-AFTER uploading, ask the user these questions to guide the analysis:
-1. "What are you trying to discover in this data?" (clustering, anomalies, temporal patterns)
-2. "Are any columns cyclic/periodic?" (hour=24, weekday=7, wind direction=360)
-3. "Are any columns irrelevant or should be excluded?"
-4. "Should any features be weighted more heavily?"
-5. "Do any columns have very skewed distributions?" (suggest transforms)
+action=upload: Upload a CSV for SOM analysis. Prefer file_path over csv_data so the MCP reads the file directly. Returns dataset ID and metadata. Then use datasets(action=preview) before train_som.
+action=preview: Show columns, stats, sample rows, cyclic/datetime detections. ALWAYS preview before train_som on an unfamiliar dataset.
+action=subset: Create a new dataset from a subset of an existing one (by row range and/or column filter). Use to train on a slice (e.g. first 2000 rows, or region=Europe) without re-uploading. Requires name and at least one of row_range or filter. row_range: [start, end] 1-based inclusive. filter: { column, op, value } with op in eq, in, gt, lt, gte, lte.
+action=delete: Remove a dataset and free the slot.
-COMMON MISTAKES:
-- Uploading without previewing first — always use preview_dataset before train_som
-- Including ID columns or row indices — these add noise without meaning
-- Forgetting to check for datetime columns that could provide temporal features
-Show the column names from the response so the user can identify features.
-TIP: Use the prepare_training prompt for a structured preprocessing checklist.`, {
-    name: z.string().describe("Human-readable dataset name"),
-    file_path: z
-        .string()
+BEST FOR: Tabular numeric data. CSV with header required. Use list(type=datasets) to see existing datasets. To train on a subset, use datasets(action=subset) then train_som on the new dataset_id, or pass row_range in train_som params.`, {
+    action: z
+        .enum(["upload", "preview", "subset", "delete"])
+        .describe("upload: add a CSV; preview: inspect before training; subset: create subset dataset; delete: remove dataset"),
+    name: z.string().optional().describe("Dataset name (required for action=upload and subset)"),
+    file_path: z.string().optional().describe("Path to local CSV (for upload; prefer over csv_data)"),
+    csv_data: z.string().optional().describe("Inline CSV string (for upload; use for small data)"),
+    dataset_id: z.string().optional().describe("Dataset ID (required for preview, subset, and delete)"),
+    n_rows: z.number().int().optional().default(5).describe("Sample rows to return (preview only)"),
+    row_range: z
+        .tuple([z.number().int(), z.number().int()])
         .optional()
-        .describe("Path to a local CSV file. Use this when the user has a file on disk — the MCP reads it directly. Absolute or relative to the MCP process CWD (often the project root)."),
-    csv_data: z
-        .string()
+        .describe("For subset: [start, end] 1-based inclusive row range (e.g. [1, 2000])"),
+    filter: z
+        .object({
+        column: z.string(),
+        op: z.enum(["eq", "in", "gt", "lt", "gte", "lte"]),
+        value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
+    })
         .optional()
-        .describe("CSV data with header row. Use for small inline data (<10KB). Prefer file_path for larger files."),
-}, async ({ name, file_path, csv_data }) => {
-    let body;
-    if (file_path) {
-        const resolved = path.resolve(file_path);
-        try {
-            body = await fs.readFile(resolved, "utf-8");
+        .describe("For subset: filter rows by column value (e.g. { column: 'region', op: 'eq', value: 'Europe' })"),
+}, async ({ action, name, file_path, csv_data, dataset_id, n_rows, row_range, filter }) => {
+    if (action === "upload") {
+        if (!name)
+            throw new Error("datasets(upload) requires name");
+        let body;
+        if (file_path) {
+            const resolved = path.resolve(file_path);
+            try {
+                body = await fs.readFile(resolved, "utf-8");
+            }
+            catch (err) {
+                const msg = err instanceof Error ? err.message : String(err);
+                throw new Error(`Cannot read file "${resolved}": ${msg}`);
+            }
         }
-        catch (err) {
-            const msg = err instanceof Error ? err.message : String(err);
-            throw new Error(`Cannot read file "${resolved}": ${msg}`);
+        else if (csv_data && csv_data.length > 0) {
+            body = csv_data;
         }
+        else {
+            throw new Error("datasets(upload) requires file_path or csv_data");
+        }
+        const data = await apiCall("POST", "/v1/datasets", body, {
+            "X-Dataset-Name": name,
+            "Content-Type": "text/csv",
+        });
+        return textResult(data);
     }
-    else if (csv_data && csv_data.length > 0) {
-        body = csv_data;
+    if (action === "preview") {
+        if (!dataset_id)
+            throw new Error("datasets(preview) requires dataset_id");
+        const data = (await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`));
+        const cols = data.columns ?? [];
+        const stats = data.column_stats ?? [];
+        const hints = data.cyclic_hints ?? [];
+        const samples = data.sample_rows ?? [];
+        const dtCols = data.datetime_columns ?? [];
+        const temporalSugg = data.temporal_suggestions ?? [];
+        const fmt = (v) => v === null || v === undefined ? "—" : Number(v).toFixed(3);
+        const lines = [
+            `Dataset: ${data.name} (${data.dataset_id})`,
+            `${data.total_rows} rows × ${data.total_cols} columns`,
+            ``,
+            `Column Statistics:`,
+            `| Column | Min | Max | Mean | Std | Nulls | Numeric |`,
+            `|--------|-----|-----|------|-----|-------|---------|`,
+        ];
+        for (const s of stats) {
+            lines.push(`| ${s.column} | ${fmt(s.min)} | ${fmt(s.max)} | ${fmt(s.mean)} | ${fmt(s.std)} | ${s.null_count ?? 0} | ${s.is_numeric !== false ? "yes" : "no"} |`);
+        }
+        if (hints.length > 0) {
+            lines.push(``, `Detected Cyclic Feature Hints:`);
+            for (const h of hints) {
+                lines.push(`  • ${h.column} — period=${h.period} (${h.reason})`);
+            }
+        }
+        if (dtCols.length > 0) {
+            lines.push(``, `Detected Datetime Columns:`);
+            for (const dc of dtCols) {
+                const formats = dc.detected_formats ?? [];
+                const fmtStrs = formats
+                    .map((f) => `${f.format} — ${f.description} (${(f.match_rate * 100).toFixed(0)}% match)`)
+                    .join("; ");
+                lines.push(`  • ${dc.column}: sample="${dc.sample}" → ${fmtStrs}`);
+            }
+        }
+        if (temporalSugg.length > 0) {
+            lines.push(``, `Temporal Feature Suggestions (require user approval):`);
+            for (const ts of temporalSugg) {
+                lines.push(`  • Columns: ${ts.columns.join(" + ")} → format: "${ts.format}"`);
+                lines.push(`    Available components: ${ts.available_components.join(", ")}`);
+            }
+        }
+        if (samples.length > 0) {
+            lines.push(``, `Sample Rows (first ${samples.length}):`);
+            lines.push(`| ${cols.join(" | ")} |`);
+            lines.push(`| ${cols.map(() => "---").join(" | ")} |`);
+            for (const row of samples) {
+                lines.push(`| ${cols.map((c) => String(row[c] ?? "")).join(" | ")} |`);
+            }
+        }
+        return { content: [{ type: "text", text: lines.join("\n") }] };
+    }
+    if (action === "subset") {
+        if (!dataset_id)
+            throw new Error("datasets(subset) requires dataset_id");
+        if (!name)
+            throw new Error("datasets(subset) requires name");
+        if (row_range === undefined && filter === undefined) {
+            throw new Error("datasets(subset) requires at least one of row_range or filter");
+        }
+        const body = { name };
+        if (row_range !== undefined)
+            body.row_range = row_range;
+        if (filter !== undefined)
+            body.filter = filter;
+        const data = await apiCall("POST", `/v1/datasets/${dataset_id}/subset`, JSON.stringify(body), {
+            "Content-Type": "application/json",
+        });
+        return textResult(data);
     }
-    else {
-        throw new Error("Provide either file_path or csv_data");
+    if (action === "delete") {
+        if (!dataset_id)
+            throw new Error("datasets(delete) requires dataset_id");
+        const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
+        return textResult(data);
     }
-    const data = await apiCall("POST", "/v1/datasets", body, {
-        "X-Dataset-Name": name,
-        "Content-Type": "text/csv",
-    });
-    return textResult(data);
+    throw new Error("Invalid action");
 });
 // ---- train_som ----
 server.tool("train_som", `Train a Self-Organizing Map on the dataset. Returns a job_id for polling.
@@ -343,11 +421,11 @@ BEFORE calling, ask the user:
 5. Quick exploration or refined map?
 PRESET TABLE:
-| preset   | grid  | epochs   | batch_size |
-| quick    | 15x15 | [10, 0]  | 64         |
-| standard | 25x25 | [20, 10] | 64         |
-| refined  | 40x40 | [40, 20] | 32         |
-| high_res | 60x60 | [50, 30] | 32         |
+| preset   | grid  | epochs    | batch_size |
+| quick    | 15x15 | [15, 5]   | 48         |
+| standard | 25x25 | [30, 15]  | 48         |
+| refined  | 40x40 | [50, 25]  | 32         |
+| high_res | 60x60 | [60, 40]  | 32         |
 TRAINING PHASES:
 - Ordering: large neighborhoods → global structure. sigma_f controls end-radius (default 1.0).
@@ -356,7 +434,7 @@ TRAINING PHASES:
 TRANSFORMS: Per-column preprocessing before normalization.
   transforms: {revenue: "log", volume: "log1p", pressure: "sqrt"}
-  Suggest when preview_dataset shows large value ranges or right-skewed distributions.
+  Suggest when datasets(action=preview) shows large value ranges or right-skewed distributions.
 TEMPORAL FEATURES: NEVER auto-apply. Always ask which components to extract.
   temporal_features: [{columns: ['Date'], format: 'dd.mm.yyyy', extract: ['day_of_year'], cyclic: true}]
@@ -375,20 +453,20 @@ COMMON MISTAKES:
 QUALITY TARGETS: QE < 1.5 good, TE < 0.05 good, explained variance > 0.8 good.
 If QE > 2 → more epochs or larger grid. If TE > 0.15 → larger grid or periodic=true.
-OUTPUT: format (png/pdf/svg), dpi (standard/retina/print), colormap (viridis/plasma/inferno).
+OUTPUT: default format pdf, default colormap coolwarm. Override with output_format (png/pdf/svg), output_dpi (standard/retina/print), colormap (e.g. viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral).
 After training, use get_results → analyze(clusters) → component_planes → feature_correlation.
 See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`, {
-    dataset_id: z.string().describe("Dataset ID from upload_dataset"),
+    dataset_id: z.string().describe("Dataset ID from datasets(action=upload) or list(type=datasets)"),
     preset: z
         .enum(["quick", "standard", "refined", "high_res"])
         .optional()
         .describe("Training preset — sets sensible defaults for grid, epochs, and batch_size. " +
         "Explicit params override preset values. " +
-        "quick: 15×15, [10,0], batch=64. " +
-        "standard: 25×25, [20,10], batch=64, best with GPU. " +
-        "refined: 40×40, [40,20], batch=32, best with GPU. " +
-        "high_res: 60×60, [50,30], batch=32, best with GPU."),
+        "quick: 15×15, [15,5], batch=48. " +
+        "standard: 25×25, [30,15], batch=48, best with GPU. " +
+        "refined: 40×40, [50,25], batch=32, best with GPU. " +
+        "high_res: 60×60, [60,40], batch=32, best with GPU."),
     grid_x: z
         .number()
         .int()
@@ -519,8 +597,8 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
     output_format: z
         .enum(["png", "pdf", "svg"])
         .optional()
-        .default("png")
-        .describe("Image output format. PNG for quick viewing (default), PDF for publication-quality vector graphics, SVG for web embedding."),
+        .default("pdf")
+        .describe("Image output format. PDF (default) for publication-quality vector graphics, PNG for quick viewing, SVG for web embedding."),
     output_dpi: z
         .enum(["standard", "retina", "print"])
         .optional()
@@ -529,13 +607,17 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
     colormap: z
         .string()
         .optional()
-        .describe("Override default colormap for component planes (e.g. viridis, plasma, inferno, coolwarm). U-matrix always uses grays, cyclic features use twilight."),
-}, async ({ dataset_id, preset, grid_x, grid_y, epochs, model, periodic, columns, transforms, cyclic_features, temporal_features, feature_weights, normalize, sigma_f, learning_rate, batch_size, backend, output_format, output_dpi, colormap, }) => {
+        .describe("Override default colormap (coolwarm) for component planes and hit histogram. Examples: viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix always uses grays, cyclic features use twilight."),
+    row_range: z
+        .tuple([z.number().int().min(1), z.number().int().min(1)])
+        .optional()
+        .describe("Train on a subset of rows only: [start, end] 1-based inclusive. Alternative to creating a subset dataset with datasets(action=subset)."),
+}, async ({ dataset_id, preset, grid_x, grid_y, epochs, model, periodic, columns, transforms, cyclic_features, temporal_features, feature_weights, normalize, sigma_f, learning_rate, batch_size, backend, output_format, output_dpi, colormap, row_range, }) => {
     const PRESETS = {
-        quick: { grid: [15, 15], epochs: [10, 0], batch_size: 64 },
-        standard: { grid: [25, 25], epochs: [20, 10], batch_size: 64, backend: "cuda" },
-        refined: { grid: [40, 40], epochs: [40, 20], batch_size: 32, backend: "cuda" },
-        high_res: { grid: [60, 60], epochs: [50, 30], batch_size: 32, backend: "cuda" },
+        quick: { grid: [15, 15], epochs: [15, 5], batch_size: 48 },
+        standard: { grid: [25, 25], epochs: [30, 15], batch_size: 48, backend: "cuda" },
+        refined: { grid: [40, 40], epochs: [50, 25], batch_size: 32, backend: "cuda" },
+        high_res: { grid: [60, 60], epochs: [60, 40], batch_size: 32, backend: "cuda" },
     };
     const p = preset ? PRESETS[preset] : undefined;
     const params = {
@@ -588,9 +670,7 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
     else if (p?.backend) {
         params.backend = p.backend;
     }
-    if (output_format && output_format !== "png") {
-        params.output_format = output_format;
-    }
+    params.output_format = output_format ?? "pdf";
     const dpiMap = { standard: 1, retina: 2, print: 4 };
     if (output_dpi && output_dpi !== "retina") {
         params.output_dpi = dpiMap[output_dpi] ?? 2;
@@ -598,6 +678,9 @@ See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`,
     if (colormap) {
         params.colormap = colormap;
     }
+    if (row_range && row_range.length >= 2 && row_range[0] <= row_range[1]) {
+        params.row_range = row_range;
+    }
     const data = await apiCall("POST", "/v1/jobs", { dataset_id, params });
     return textResult(data);
 });
@@ -615,7 +698,9 @@ When status is 'failed', show the error to the user and suggest parameter adjust
     const data = (await apiCall("GET", `/v1/jobs/${job_id}`));
     const status = data.status;
     const progress = (data.progress ?? 0) * 100;
-    let text = `Job ${job_id}: ${status} (${progress.toFixed(1)}%)`;
+    const label = data.label != null && data.label !== "" ? String(data.label) : null;
+    const jobDesc = label ? `Job ${label} (id: ${job_id})` : `Job ${job_id}`;
+    let text = `${jobDesc}: ${status} (${progress.toFixed(1)}%)`;
     if (status === "completed") {
         text += ` | Results ready. Use get_results(job_id="${job_id}") to retrieve.`;
     }
@@ -624,6 +709,61 @@ When status is 'failed', show the error to the user and suggest parameter adjust
     }
     return { content: [{ type: "text", text }] };
 });
+/** Resolve get_results figures param to list of image filenames to fetch. */
+function getResultsImagesToFetch(jobType, summary, figures, includeIndividual) {
+    const ext = summary.output_format ?? "pdf";
+    if (jobType === "transition_flow") {
+        const lag = summary.lag ?? 1;
+        return [`transition_flow_lag${lag}.${ext}`];
+    }
+    if (jobType === "project_variable") {
+        const varName = summary.variable_name ?? "variable";
+        const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
+        return [`projected_${safe}.${ext}`];
+    }
+    if (jobType === "derive_variable") {
+        const varName = summary.variable_name ?? "variable";
+        const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
+        return [`projected_${safe}.${ext}`];
+    }
+    // train_som
+    const features = summary.features ?? [];
+    const combinedName = `combined.${ext}`;
+    const umatrixName = `umatrix.${ext}`;
+    const hitHistName = `hit_histogram.${ext}`;
+    const correlationName = `correlation.${ext}`;
+    const componentNames = features.map((f, i) => `component_${i + 1}_${f.replace(/[^a-zA-Z0-9_]/g, "_")}.${ext}`);
+    const allList = [combinedName, umatrixName, hitHistName, correlationName, ...componentNames];
+    if (figures === undefined || figures === "default") {
+        return includeIndividual ? allList : [combinedName];
+    }
+    if (figures === "combined_only")
+        return [combinedName];
+    if (figures === "all")
+        return allList;
+    if (Array.isArray(figures)) {
+        const nameToFile = {
+            combined: combinedName,
+            umatrix: umatrixName,
+            hit_histogram: hitHistName,
+            correlation: correlationName,
+        };
+        features.forEach((_, i) => {
+            nameToFile[`component_${i + 1}`] = componentNames[i];
+        });
+        return figures
+            .map((key) => {
+            const k = key.trim().toLowerCase();
+            if (nameToFile[k])
+                return nameToFile[k];
+            if (key.includes("."))
+                return key;
+            return null;
+        })
+            .filter((f) => f != null);
+    }
+    return [combinedName];
+}
 // ---- get_results ----
 server.tool("get_results", `Retrieve results of a completed SOM training, projection, or derived variable job.
@@ -635,8 +775,12 @@ Returns: text summary with metrics and inline images (combined view and all plot
 DOWNLOAD LINKS: Links to API-domain or presigned URLs may not work when clicked (MCP holds the API key, not the browser). Images are inlined. For weights use get_job_export(export="weights"); for node stats use get_job_export(export="nodes"). If the user wants to save a file, offer to fetch via the appropriate tool.
 OPTIONS:
-- include_individual=true: shows each component plane, U-matrix, and hit histogram
-  as separate inline images. Best for side-by-side feature comparison.
+- figures: request specific plots only. Omit for default (combined only; or all if include_individual=true).
+  - "combined_only": only the combined view.
+  - "all": combined + umatrix + hit_histogram + all component planes.
+  - Array of logical names: e.g. figures: ["umatrix"] for just the U-matrix, or figures: ["combined","hit_histogram","correlation"] or ["combined","umatrix","component_1","component_2"]. Logical names: combined, umatrix, hit_histogram, correlation, component_1, component_2, ... (component_N = Nth feature). Pass an array to fetch/save only those figures.
+- include_individual=true: when figures is omitted, shows each component plane, U-matrix, and hit histogram
+  as separate inline images. Ignored when figures is set.
 AFTER showing results, guide the user:
 1. "The U-matrix shows [N] distinct regions. Does this match expected groupings?"
@@ -647,6 +791,7 @@ AFTER showing results, guide the user:
 6. If explained variance < 0.7: suggest transforms, feature selection, or more training
 WORKFLOW: get_results → analyze(clusters) → component_planes → feature_correlation.
+Request specific figures with get_results(job_id, figures=[...]) (e.g. figures: ["umatrix"] or figures: ["combined","hit_histogram"]) or run analyze(job_id, analysis_type) for a single view.
 Use get_job_export(export="training_log") for the learning curve (QE vs epoch — healthy=steady decline then plateau).
 Use analyze(job_id, "quality_report") for extended metrics (trustworthiness, neighborhood preservation).
@@ -655,20 +800,31 @@ METRIC INTERPRETATION:
 - TE < 0.05: good topology. TE > 0.15: grid too small.
 - Explained variance > 0.8: good. < 0.7: try transforms, fewer features, or more training.`, {
     job_id: z.string().describe("Job ID of a completed job"),
+    figures: z
+        .union([
+        z.enum(["default", "combined_only", "all"]),
+        z.array(z.string()),
+    ])
+        .optional()
+        .describe("Which figures to return. Omit or 'default' for combined only (or all if include_individual=true). 'combined_only': just combined view. 'all': combined + umatrix + hit_histogram + correlation + all component planes. Or array of logical names to fetch only those: combined, umatrix, hit_histogram, correlation, component_1, component_2, ..."),
     include_individual: z
         .boolean()
         .optional()
         .default(false)
-        .describe("If true, inline each individual plot (component planes, u-matrix, hit histogram) separately instead of just the combined view. Useful for side-by-side feature comparison or publication-quality individual figures."),
-}, async ({ job_id, include_individual }) => {
+        .describe("If true and figures is omitted, inline each individual plot (component planes, u-matrix, hit histogram). Ignored when figures is set."),
+}, async ({ job_id, figures, include_individual }) => {
     const data = (await apiCall("GET", `/v1/results/${job_id}`));
     const summary = (data.summary ?? {});
     const downloadUrls = (data.download_urls ?? {});
+    const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
+    const resultsHeader = jobLabel
+        ? `Results for ${jobLabel} (job_id: ${job_id})`
+        : `Results for job_id: ${job_id}`;
     const content = [];
     const inlinedImages = new Set();
     const jobType = summary.job_type ?? "train_som";
     // ── Dispatch by job type ──────────────────────────────────────────────────
-    const fmtExt = summary.output_format ?? "png";
+    const fmtExt = summary.output_format ?? "pdf";
     if (jobType === "transition_flow") {
         const lag = summary.lag ?? 1;
         const flowImg = `transition_flow_lag${lag}.${fmtExt}`;
@@ -676,7 +832,7 @@ METRIC INTERPRETATION:
         content.push({
             type: "text",
             text: [
-                `Transition Flow Results (job: ${job_id})`,
+                `Transition Flow ${resultsHeader}`,
                 `Parent SOM: ${summary.parent_job_id ?? "N/A"} | Lag: ${lag} | Samples: ${summary.n_samples ?? 0}`,
                 ``,
                 `Flow Statistics:`,
@@ -691,8 +847,10 @@ METRIC INTERPRETATION:
                 `Use transition_flow(lag=N) with larger N to reveal longer-term temporal structure.`,
             ].join("\n"),
         });
-        await tryAttachImage(content, job_id, flowImg);
-        inlinedImages.add(flowImg);
+        for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
+            await tryAttachImage(content, job_id, name);
+            inlinedImages.add(name);
+        }
     }
     else if (jobType === "project_variable") {
         const varName = summary.variable_name ?? "variable";
@@ -702,7 +860,7 @@ METRIC INTERPRETATION:
         content.push({
             type: "text",
             text: [
-                `Projected Variable: ${varName} (${agg}) — job: ${job_id}`,
+                `Projected Variable: ${varName} (${agg}) — ${resultsHeader}`,
                 `Parent SOM: ${summary.parent_job_id ?? "N/A"} | Samples: ${summary.n_samples ?? 0}`,
                 ``,
                 `Variable Statistics (per-node ${agg}):`,
@@ -715,8 +873,10 @@ METRIC INTERPRETATION:
                 `learned feature space, even if it wasn't used in training.`,
             ].join("\n"),
         });
-        await tryAttachImage(content, job_id, projImg);
-        inlinedImages.add(projImg);
+        for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
+            await tryAttachImage(content, job_id, name);
+            inlinedImages.add(name);
+        }
     }
     else {
         // ── Default: train_som results ──────────────────────────────────────────
@@ -732,7 +892,7 @@ METRIC INTERPRETATION:
         const duration = summary.training_duration_seconds;
         const ordErrors = summary.ordering_errors;
         const textSummary = [
-            `SOM Training Results (job: ${job_id})`,
+            `SOM Training ${resultsHeader}`,
             `Grid: ${grid[0]}×${grid[1]} | Features: ${summary.n_features ?? 0} | Samples: ${summary.n_samples ?? 0}`,
             `Model: ${summary.model ?? "SOM"} | Epochs: ${epochStr}`,
             `Periodic: ${summary.periodic ?? true} | Normalize: ${summary.normalize ?? "auto"}`,
@@ -763,29 +923,11 @@ METRIC INTERPRETATION:
             .filter((l) => l !== "")
             .join("\n");
         content.push({ type: "text", text: textSummary });
-        const imgExt = summary.output_format ?? "png";
-        const combinedName = `combined.${imgExt}`;
-        await tryAttachImage(content, job_id, combinedName);
-        inlinedImages.add(combinedName);
-        if (include_individual) {
-            const feats = summary.features ?? [];
-            const imageNames = [
-                `umatrix.${imgExt}`,
-                `hit_histogram.${imgExt}`,
-                ...feats.map((f, i) => `component_${i + 1}_${f.replace(/[^a-zA-Z0-9_]/g, "_")}.${imgExt}`),
-            ];
-            const results = await Promise.allSettled(imageNames.map((name) => apiRawCall(`/v1/results/${job_id}/image/${name}`).then((r) => ({ name, ...r }))));
-            for (const r of results) {
-                if (r.status === "fulfilled") {
-                    content.push({
-                        type: "image",
-                        data: r.value.data.toString("base64"),
-                        mimeType: mimeForFilename(r.value.name),
-                        annotations: { audience: ["user"], priority: 0.8 },
-                    });
-                    inlinedImages.add(r.value.name);
-                }
-            }
+        const imgExt = summary.output_format ?? "pdf";
+        const imagesToFetch = getResultsImagesToFetch(jobType, summary, figures, include_individual);
+        for (const name of imagesToFetch) {
+            await tryAttachImage(content, job_id, name);
+            inlinedImages.add(name);
         }
     }
     // Inline remaining image files; for JSON provide tool hints (no clickable URLs — auth required)
@@ -808,10 +950,109 @@ METRIC INTERPRETATION:
             }
         }
     }
+    // List available artifacts so the LLM can offer to fetch specific views
+    if (files.length > 0) {
+        const features = summary.features ?? [];
+        const logicalNames = jobType === "train_som" || jobType === "render_variant"
+            ? `Logical names: combined, umatrix, hit_histogram, correlation, ${features.map((_, i) => `component_${i + 1}`).join(", ")}. `
+            : "";
+        content.push({
+            type: "text",
+            text: `Available to fetch individually: ${files.join(", ")}. ${logicalNames}Use get_results(job_id, figures=[...]) to request specific plots, get_results(job_id, include_individual=true) or figures="all" to inline all plots, or analyze(job_id, analysis_type) for a specific view (u_matrix, component_planes, bmu_hits, clusters, quality_report, etc.).`,
+        });
+    }
+    return { content };
+});
+// ---- recolor_som ----
+server.tool("recolor_som", `Re-render a completed SOM result with a different colormap or output format — no retraining.
+Use when the user wants to see the same combined (or other) plot with another color scheme (e.g. plasma, inferno, coolwarm). You can also use this to re-export figures in a different format (e.g. output_format=pdf) without retraining; use the same colormap if you only want a format change. Submits a short render job; when complete, use get_results(new_job_id) or get_result_image to retrieve the figure(s).
+Colormaps (default: coolwarm): e.g. viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix and cyclic panels keep fixed colormaps (grays, twilight).`, {
+    job_id: z.string().describe("Job ID of a completed SOM training job (parent)"),
+    colormap: z.string().describe("Colormap name (default: coolwarm). E.g. viridis, plasma, inferno, magma, coolwarm)"),
+    figures: z
+        .array(z.string())
+        .optional()
+        .default(["combined"])
+        .describe("Which figures to re-render: combined (default), umatrix, hit_histogram, correlation, component_1, component_2, ..."),
+    output_format: z.enum(["png", "pdf", "svg"]).optional().default("pdf"),
+    output_dpi: z.number().int().min(1).max(4).optional().default(2),
+}, async ({ job_id, colormap, figures, output_format, output_dpi }) => {
+    const body = { colormap, figures, output_format, output_dpi };
+    const data = (await apiCall("POST", `/v1/results/${job_id}/render`, JSON.stringify(body), {
+        "Content-Type": "application/json",
+    }));
+    const newJobId = data.id;
+    const content = [
+        {
+            type: "text",
+            text: [
+                `Re-render job submitted with colormap "${colormap}".`,
+                `New job_id: ${newJobId}. Poll get_job_status(job_id="${newJobId}") until status is 'completed', then use get_results(job_id="${newJobId}") or get_result_image to retrieve the recolored plot(s). No retraining was performed.`,
+            ].join("\n"),
+        },
+    ];
     return { content };
 });
+// ---- download_results ----
+server.tool("download_results", `Save result figures (and optionally JSON) to a folder on disk. Use so the user can open, share, or version files locally without writing their own download script.
+folder: path to the directory (e.g. "." for current/workspace, "./results", or absolute path). When folder is a generic path like "." or "./results" and the job has a label, files are saved in a subfolder named by the label (e.g. ./results/Winedata_a1b2c3_badger_thong_oil). You can also pass a path that already includes the label.
+figures: "all" (default) = all image files from the job; "images" = same; or pass an array of filenames to save only those (e.g. ["combined.pdf", "umatrix.pdf", "correlation.pdf"]). Default export format is PDF.
+include_json: if true, also save summary.json (and other JSON artifacts) into the same folder.`, {
+    job_id: z.string().describe("Job ID of a completed job"),
+    folder: z.string().describe("Directory path to save files (e.g. '.' or './results'). When the job has a label, a subfolder with that label may be used. Relative paths are relative to process cwd (usually project root)."),
+    figures: z
+        .union([z.enum(["all", "images"]), z.array(z.string())])
+        .optional()
+        .default("all")
+        .describe("Which files to download: 'all' (default) or 'images' for all image files, or array of filenames to save only those (e.g. ['combined.pdf', 'umatrix.pdf', 'correlation.pdf'])."),
+    include_json: z.boolean().optional().default(false).describe("If true, also download summary.json and other JSON files"),
+}, async ({ job_id, folder, figures, include_json }) => {
+    const data = (await apiCall("GET", `/v1/results/${job_id}`));
+    const summary = (data.summary ?? {});
+    const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
+    const files = summary.files ?? [];
+    const isImage = (f) => f.endsWith(".png") || f.endsWith(".svg") || f.endsWith(".pdf");
+    const isJson = (f) => f.endsWith(".json");
+    let toDownload;
+    if (figures === "all" || figures === "images") {
+        toDownload = include_json ? files : files.filter(isImage);
+    }
+    else {
+        toDownload = figures;
+        if (include_json && !toDownload.includes("summary.json")) {
+            toDownload = [...toDownload, "summary.json"];
+        }
+    }
+    let resolvedDir = path.resolve(folder);
+    const useLabelSubfolder = jobLabel &&
+        (folder === "." || folder === "./results" || folder === "results");
+    if (useLabelSubfolder) {
+        resolvedDir = path.join(resolvedDir, jobLabel);
+    }
+    await fs.mkdir(resolvedDir, { recursive: true });
+    const saved = [];
+    for (const filename of toDownload) {
+        try {
+            const { data: buf } = await apiRawCall(`/v1/results/${job_id}/image/${filename}`);
+            const outPath = path.join(resolvedDir, filename);
+            await fs.writeFile(outPath, buf);
+            saved.push(filename);
+        }
+        catch {
+            // Skip missing or failed files
+        }
+    }
+    const text = saved.length > 0
+        ? `Saved ${saved.length} file(s) to ${resolvedDir}: ${saved.join(", ")}`
+        : `No files saved (job may have no matching files or download failed). Check job_id and that the job is completed.`;
+    return { content: [{ type: "text", text }] };
+});
 // ---- analyze ----
 server.tool("analyze", `Run a specific analysis on SOM results. Use after get_results to drill into aspects.
+Request specific plots: get_results(job_id, figures=[...]) for chosen figures (e.g. figures: ["umatrix"]) or analyze(job_id, analysis_type) for a single analysis view.
 Available analysis types and when to use them:
@@ -873,7 +1114,7 @@ INTERPRETATION TIPS:
     const summary = (data.summary ?? {});
     const features = summary.features ?? [];
     const grid = summary.grid ?? [0, 0];
-    const ext = summary.output_format ?? "png";
+    const ext = summary.output_format ?? "pdf";
     const content = [];
     if (analysis_type === "u_matrix") {
         content.push({
@@ -1211,108 +1452,6 @@ action=delete: Permanently delete a job and all S3 result files. Use to free sto
     const data = await apiCall("DELETE", `/v1/jobs/${job_id}`);
     return textResult(data);
 });
-// ---- preview_dataset ----
-server.tool("preview_dataset", `Preview a dataset before training — shows columns, statistics, sample rows, and detections.
-BEST FOR: Understanding data structure before training. ALWAYS call this before train_som
-on an unfamiliar dataset.
-NOT FOR: Large data exploration (returns only sample rows). Use derive_variable for computations.
-TIMING: Near-instant (reads only header + sample rows from S3).
-This tool detects:
-1. Column types (numeric vs string) and basic stats (min/max/mean/std)
-2. Cyclic feature candidates (columns named hour, weekday, angle, direction, etc.)
-3. Datetime columns with format auto-detection
-4. Skewed distributions (large max/min ratios suggest log transforms)
-AFTER previewing, ask the user:
-- "Which columns are relevant?" → columns parameter in train_som
-- "I see cyclic candidates: [list]. Encode cyclically?" → cyclic_features
-- "Column X ranges 0.01–50,000. Log-transform?" → transforms: {X: "log"}
-- "Datetime columns found. Extract temporal features?" → temporal_features (NEVER auto-apply)
-- "Are any features more important than others?" → feature_weights
-COMMON MISTAKES:
-- Skipping preview and training on all columns (including IDs, timestamps, irrelevant features)
-- Not checking for datetime columns that could provide valuable cyclic features
-- Ignoring skewed distributions that will dominate normalization
-TIP: Use the prepare_training prompt for a structured walkthrough of all decisions.`, {
-    dataset_id: z.string().describe("Dataset ID to preview"),
-    n_rows: z
-        .number()
-        .int()
-        .optional()
-        .default(5)
-        .describe("Number of sample rows to return (default 5)"),
-}, async ({ dataset_id, n_rows }) => {
-    const data = (await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`));
-    const cols = data.columns ?? [];
-    const stats = data.column_stats ?? [];
-    const hints = data.cyclic_hints ?? [];
-    const samples = data.sample_rows ?? [];
-    const dtCols = data.datetime_columns ?? [];
-    const temporalSugg = data.temporal_suggestions ?? [];
-    const fmt = (v) => v === null || v === undefined ? "—" : Number(v).toFixed(3);
-    const lines = [
-        `Dataset: ${data.name} (${data.dataset_id})`,
-        `${data.total_rows} rows × ${data.total_cols} columns`,
-        ``,
-        `Column Statistics:`,
-        `| Column | Min | Max | Mean | Std | Nulls | Numeric |`,
-        `|--------|-----|-----|------|-----|-------|---------|`,
-    ];
-    for (const s of stats) {
-        lines.push(`| ${s.column} | ${fmt(s.min)} | ${fmt(s.max)} | ${fmt(s.mean)} | ${fmt(s.std)} | ${s.null_count ?? 0} | ${s.is_numeric !== false ? "yes" : "no"} |`);
-    }
-    if (hints.length > 0) {
-        lines.push(``, `Detected Cyclic Feature Hints:`);
-        for (const h of hints) {
-            lines.push(`  • ${h.column} — period=${h.period} (${h.reason})`);
-        }
-    }
-    if (dtCols.length > 0) {
-        lines.push(``, `Detected Datetime Columns:`);
-        for (const dc of dtCols) {
-            const formats = dc.detected_formats ?? [];
-            const fmtStrs = formats
-                .map((f) => `${f.format} — ${f.description} (${(f.match_rate * 100).toFixed(0)}% match)`)
-                .join("; ");
-            lines.push(`  • ${dc.column}: sample="${dc.sample}" → ${fmtStrs}`);
-            if (formats.length > 1) {
-                lines.push(`    ⚠ AMBIGUOUS: multiple formats match. Ask user to clarify.`);
-            }
-        }
-    }
-    if (temporalSugg.length > 0) {
-        lines.push(``, `Temporal Feature Suggestions (require user approval):`);
-        for (const ts of temporalSugg) {
-            lines.push(`  • Columns: ${ts.columns.join(" + ")} → format: "${ts.format}"`);
-            lines.push(`    Available components: ${ts.available_components.join(", ")}`);
-            lines.push(`    ${ts.note}`);
-        }
-        lines.push(``, `To use temporal features in train_som, add:`, `  temporal_features: [{columns: [...], format: "...", extract: [...], cyclic: true}]`);
-    }
-    if (samples.length > 0) {
-        lines.push(``, `Sample Rows (first ${samples.length}):`);
-        lines.push(`| ${cols.join(" | ")} |`);
-        lines.push(`| ${cols.map(() => "---").join(" | ")} |`);
-        for (const row of samples) {
-            lines.push(`| ${cols.map((c) => String(row[c] ?? "")).join(" | ")} |`);
-        }
-    }
-    return {
-        content: [{ type: "text", text: lines.join("\n") }],
-    };
-});
-// ---- delete_dataset ----
-server.tool("delete_dataset", "Delete a dataset and its stored data. Frees a dataset slot for new uploads.", {
-    dataset_id: z.string().describe("Dataset ID to delete"),
-}, async ({ dataset_id }) => {
-    const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
-    return textResult(data);
-});
 // ---- list ----
 server.tool("list", `List datasets or jobs.
@@ -1333,7 +1472,19 @@ type=jobs: List SOM training jobs (optionally filtered by dataset_id). Use to fi
     const path = dataset_id
         ? `/v1/jobs?dataset_id=${dataset_id}`
         : "/v1/jobs";
-    const data = await apiCall("GET", path);
+    const data = (await apiCall("GET", path));
+    if (type === "jobs" && Array.isArray(data)) {
+        const lines = data.map((job) => {
+            const id = String(job.id ?? "");
+            const status = String(job.status ?? "");
+            const label = job.label != null && job.label !== "" ? String(job.label) : null;
+            return label
+                ? `${label} (id: ${id}) — status: ${status}`
+                : `id: ${id} — status: ${status}`;
+        });
+        const text = lines.length > 0 ? lines.join("\n") : "No jobs found.";
+        return { content: [{ type: "text", text }] };
+    }
     return textResult(data);
 });
 // ---- get_job_export ----
@@ -1507,8 +1658,8 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
     output_format: z
         .enum(["png", "pdf", "svg"])
         .optional()
-        .default("png")
-        .describe("Image output format for the projection plot."),
+        .default("pdf")
+        .describe("Image output format for the projection plot (default: pdf)."),
     output_dpi: z
         .enum(["standard", "retina", "print"])
         .optional()
@@ -1517,16 +1668,15 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
     colormap: z
         .string()
         .optional()
-        .describe("Override colormap for the projection plot (default: plasma)."),
+        .describe("Override colormap for the projection plot (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
 }, async ({ job_id, variable_name, values, aggregation, output_format, output_dpi, colormap }) => {
     const dpiMap = { standard: 1, retina: 2, print: 4 };
     const body = {
         variable_name,
         values,
         aggregation: aggregation ?? "mean",
+        output_format: output_format ?? "pdf",
     };
-    if (output_format && output_format !== "png")
-        body.output_format = output_format;
     if (output_dpi && output_dpi !== "retina")
         body.output_dpi = dpiMap[output_dpi] ?? 2;
     if (colormap)
@@ -1553,7 +1703,7 @@ HINT: If values length mismatch, suggest derive_variable for formula-based varia
             ].join("\n"),
         });
         const safeName = variable_name.replace(/[^a-zA-Z0-9_]/g, "_");
-        const imgExt = summary.output_format ?? output_format ?? "png";
+        const imgExt = summary.output_format ?? output_format ?? "pdf";
         await tryAttachImage(content, projJobId, `projected_${safeName}.${imgExt}`);
         return { content };
     }
@@ -1615,8 +1765,8 @@ After showing results, ask:
     output_format: z
         .enum(["png", "pdf", "svg"])
         .optional()
-        .default("png")
-        .describe("Image output format for the flow plot."),
+        .default("pdf")
+        .describe("Image output format for the flow plot (default: pdf)."),
     output_dpi: z
         .enum(["standard", "retina", "print"])
         .optional()
@@ -1624,9 +1774,7 @@ After showing results, ask:
         .describe("Resolution: standard (1x), retina (2x), print (4x)."),
 }, async ({ job_id, lag, output_format, output_dpi }) => {
     const dpiMap = { standard: 1, retina: 2, print: 4 };
-    const body = { lag: lag ?? 1 };
-    if (output_format && output_format !== "png")
-        body.output_format = output_format;
+    const body = { lag: lag ?? 1, output_format: output_format ?? "pdf" };
     if (output_dpi && output_dpi !== "retina")
         body.output_dpi = dpiMap[output_dpi] ?? 2;
     const data = (await apiCall("POST", `/v1/results/${job_id}/transition-flow`, body));
@@ -1649,7 +1797,7 @@ After showing results, ask:
                 `  Mean magnitude:     ${stats.mean_magnitude !== undefined ? Number(stats.mean_magnitude).toFixed(4) : "N/A"}`,
             ].join("\n"),
         });
-        const imgExt = output_format ?? "png";
+        const imgExt = output_format ?? "pdf";
         await tryAttachImage(content, flowJobId, `transition_flow_lag${lag ?? 1}.${imgExt}`);
         return { content };
     }
@@ -1757,8 +1905,8 @@ COMMON MISTAKES:
     output_format: z
         .enum(["png", "pdf", "svg"])
         .optional()
-        .default("png")
-        .describe("Image format for projection visualization (only when project_onto_job is set)"),
+        .default("pdf")
+        .describe("Image format for projection visualization when project_onto_job is set (default: pdf)."),
     output_dpi: z
         .enum(["standard", "retina", "print"])
         .optional()
@@ -1767,7 +1915,7 @@ COMMON MISTAKES:
     colormap: z
         .string()
         .optional()
-        .describe("Colormap for projection visualization (default: plasma)"),
+        .describe("Colormap for projection visualization (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
 }, async ({ dataset_id, name, expression, project_onto_job, aggregation, options, output_format, output_dpi, colormap, }) => {
     const dpiMap = { standard: 1, retina: 2, print: 4 };
     if (project_onto_job) {
@@ -1776,11 +1924,10 @@ COMMON MISTAKES:
             name,
             expression,
             aggregation: aggregation ?? "mean",
+            output_format: output_format ?? "pdf",
         };
         if (options)
             body.options = options;
-        if (output_format && output_format !== "png")
-            body.output_format = output_format;
         if (output_dpi && output_dpi !== "retina")
             body.output_dpi = dpiMap[output_dpi] ?? 2;
         if (colormap)
@@ -1811,7 +1958,7 @@ COMMON MISTAKES:
                     .join("\n"),
             });
             const safeName = name.replace(/[^a-zA-Z0-9_]/g, "_");
-            const imgExt = summary.output_format ?? output_format ?? "png";
+            const imgExt = summary.output_format ?? output_format ?? "pdf";
             await tryAttachImage(content, deriveJobId, `projected_${safeName}.${imgExt}`);
             return { content };
         }
@@ -1852,7 +1999,7 @@ COMMON MISTAKES:
                             `Min: ${summary.min ?? "?"} | Max: ${summary.max ?? "?"} | Mean: ${summary.mean ?? "?"}`,
                             ``,
                             `The column is now available in the dataset. Include it in train_som`,
-                            `via the 'columns' parameter, or use preview_dataset to verify.`,
+                            `via the 'columns' parameter, or use datasets(action=preview) to verify.`,
                         ]
                             .filter((l) => l !== "")
                             .join("\n"),
@@ -1940,24 +2087,49 @@ Use this BEFORE submitting large jobs to:
 // ---------------------------------------------------------------------------
 // Prompts
 // ---------------------------------------------------------------------------
-server.prompt("prepare_training", "Guided pre-training checklist. Use after uploading a dataset and before calling " +
-    "train_som. Walks through column selection, transforms, cyclic features, " +
-    "temporal features, weighting, derived variables, and grid sizing.", { dataset_id: z.string().describe("Dataset ID to prepare for training") }, ({ dataset_id }) => ({
+server.prompt("info", "Brief overview of the Barivia SOM MCP: what it does, main workflow, and key tools. Use when the user asks what this MCP can do, how to get started, or what the process is.", {}, () => ({
     messages: [
         {
             role: "user",
             content: {
                 type: "text",
-                text: `Guide me through preparing dataset ${dataset_id} for SOM training. ` +
-                    `For each step, show the relevant data and ask me to decide:\n` +
-                    `1. COLUMN SELECTION: Which columns to include/exclude?\n` +
-                    `2. TRANSFORMS: Any columns need log, sqrt, or rank transforms? (check for skewed distributions)\n` +
-                    `3. CYCLIC FEATURES: Any periodic columns (hour, weekday, angle, direction)?\n` +
-                    `4. TEMPORAL FEATURES: Any datetime columns to extract components from?\n` +
-                    `5. FEATURE WEIGHTS: Should any features be emphasized or de-emphasized?\n` +
-                    `6. DERIVED VARIABLES: Any new columns to compute from existing ones? (e.g., ratios, differences)\n` +
-                    `7. GRID & MODEL: What grid size and model type?\n\n` +
-                    `Start by calling preview_dataset to show me the columns and statistics.`,
+                text: [
+                    "Briefly inform the user using this overview:",
+                    "",
+                    "**What it is:** Barivia MCP connects you to a Self-Organizing Map (SOM) analytics engine. SOMs learn a 2D map from your data for visualization, clustering, and pattern discovery.",
+                    "",
+                    "**Main workflow:** (1) Upload CSV with datasets(upload) or list existing with list(type=datasets). (2) Inspect data with datasets(action=preview). (3) Optionally create a subset with datasets(action=subset) or use prepare_training for a guided checklist. (4) Train with train_som (grid size, epochs, columns, transforms, cyclic/temporal features). (5) Check progress with get_job_status; get results and figures with get_results. (6) Analyze with analyze (u_matrix, component_planes, clusters, quality_report, etc.), recolor with recolor_som, export with get_job_export or download_results. Output format (default pdf) and colormap (default coolwarm) can be set at training or changed later via recolor_som without retraining.",
+                    "",
+                    "**Key tools:** datasets (upload/preview/subset), list (datasets/jobs), train_som, get_job_status, get_results, analyze, recolor_som, download_results, project_variable, compare_runs, system_info.",
+                    "",
+                    "Keep the reply short and scannable.",
+                ].join("\n"),
+            },
+        },
+    ],
+}));
+server.prompt("prepare_training", "Guided pre-training checklist. Use after uploading a dataset and before calling train_som. " +
+    "Walks through column selection, transforms, cyclic and temporal features, weighting, derived variables, and grid sizing.", { dataset_id: z.string().describe("Dataset ID to prepare for training") }, ({ dataset_id }) => ({
+    messages: [
+        {
+            role: "user",
+            content: {
+                type: "text",
+                text: [
+                    `Guide me through preparing dataset ${dataset_id} for SOM training.`,
+                    "",
+                    "For each step below, show the relevant data (use datasets(action=preview, dataset_id=\"" + dataset_id + "\") first), then ask for my choices. When we have all choices, summarize the train_som params and offer to submit.",
+                    "",
+                    "1. COLUMN SELECTION — Which columns to include? (Exclude IDs, free text, or redundant columns. Use train_som param: columns.)",
+                    "2. TRANSFORMS — Any right-skewed or heavy-tailed columns? Consider log, log1p, sqrt, or rank. (Use train_som param: transforms.)",
+                    "3. CYCLIC FEATURES — Any periodic variables (hour 0–24, weekday, angle 0–360)? These need cyclic encoding. (Use train_som param: cyclic_features, e.g. [{ feature: \"hour\", period: 24 }].)",
+                    "4. TEMPORAL FEATURES — Any datetime columns? We can extract day_of_year, month, hour_of_day, etc. (Use train_som param: temporal_features; preview shows suggested format.)",
+                    "5. FEATURE WEIGHTS — Should any feature count more or less? (Use train_som param: feature_weights, e.g. { temperature: 2.0 }.)",
+                    "6. DERIVED VARIABLES — Any new columns from expressions (e.g. ratio, difference)? (Use derive_variable before or after training; or add later with project_variable.)",
+                    "7. GRID & MODEL — Grid size (e.g. 20×20 or 25×25), epochs (e.g. [30, 15]), and model (SOM, RSOM, or SOFT variants). Use preset=quick|standard|refined or set grid_x, grid_y, epochs explicitly. Output format (default pdf) and colormap (default coolwarm) can be set here or changed later via recolor_som without retraining.",
+                    "",
+                    "Start by calling datasets(action=preview, dataset_id=\"" + dataset_id + "\") to show me the columns and statistics, then go through the steps.",
+                ].join("\n"),
             },
         },
     ],