@barivia/barsom-mcp 0.9.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -97,11 +97,12 @@ Call at the **start of mapping work** (or when the user asks what the MCP can do
97
97
 
98
98
  | Action | Use when |
99
99
  |--------|----------|
100
- | `train_map` | Submitting a new map training job — full control: model type, grid, epochs, cyclic/temporal features, transforms. Returns `job_id`; poll with `jobs(action=status, job_id=...)`. |
100
+ | `train_map` | Submitting a new map training job — full control: model type, grid, epochs, cyclic/temporal features, transforms. Returns `job_id`; poll with `jobs(action=status, job_id=...)`. Complete-case data only (no NaNs in training columns). |
101
+ | `train_impute` | Sparse training data: accelerated missSOM / SIOM missSOM — trains a map and imputes missing cells in one job. Defaults: `model=auto`, `cv_folds=5`. Returns map artifacts + `imputed.csv`, `imputation_mask.csv`, optional `quality.csv` / `imputation_uncertainty.csv`. Plain numeric columns only. Valid parent for `inference(impute_column)`. |
101
102
  | `train_siom_map` | Submitting a self-interacting map training job — same grid-map workflow plus SIOM controls such as `gamma`, `siom_decay`, and penalty selection. |
102
103
  | `train_floop_siom` | Submitting a FLooP-SIOM job — growing node-budget manifold; default `topology=free` (CHL); optional `topology=chain` for strict 1D linked list. |
103
104
  | `train_floop_chain` | Deprecated alias for `train_floop_siom` — same behavior. |
104
- | `status` | Polling after any async job — every 10–15s |
105
+ | `status` | Polling after any async job — every 10–15s; map jobs expose `progress_phase` (ordering, convergence, cv, artifacts) |
105
106
  | `list` | Finding job IDs, checking pipeline state |
106
107
  | `compare` | Picking the best run from a set (QE, TE, silhouette table) |
107
108
  | `cancel` | Stopping a running job |
@@ -127,7 +128,7 @@ All actions use a frozen trained map — no retraining. Derived columns use **`d
127
128
  | Action | Output | Timing |
128
129
  |--------|--------|--------|
129
130
  | `predict` | Score rows against the trained map. **Inputs:** `dataset_id` (defaults to the parent training dataset) **or** inline `rows` (≤500). **Output style** (`output` param): `"compact"` → `predictions.csv` (row_id, bmu_x/y, bmu_node_index, cluster_id [+ QE / qe_p95 / potential_anomaly when scoring **new** data]); `"annotated"` → `annotated.csv` (original CSV + BMU columns appended). **Regime auto-detected:** when the resolved dataset matches the training dataset, QE columns are intentionally omitted in compact output (training-set fit ≠ generalisation; the p95 anomaly flag would be circular). Prefer `dataset_id` for batches and SIOM/irregular maps. | 5–120s |
130
- | `impute_column` | Fill a numeric **target_column** not used in training: **requires** `dataset_id` + `target_column`. Dataset must contain all training features plus the target. Pools observed target values from rows mapped to this row's BMU and topology neighbors (BMU + neighbors, often 7 nodes on hex interior; fewer on borders unless the map is periodic). `only_missing` (default true); `impute_aggregation`: mean or median. **Not** held-out validated — map-local estimate. Output **`imputed.csv`**. | 5–120s |
131
+ | `impute_column` | Fill a numeric **target_column** not used in training: **requires** `dataset_id` + `target_column`. Parent job: completed **train_map** or **train_impute**. Dataset must contain all training features plus the target. Pools observed target values from rows mapped to this row's BMU and topology neighbors (BMU + neighbors, often 7 nodes on hex interior; fewer on borders unless the map is periodic). `only_missing` (default true); `impute_aggregation`: mean or median. **Not** held-out validated — map-local estimate. Output **`imputed.csv`**. For holes across many training columns, prefer **jobs(train_impute)** first. | 5–120s |
131
132
  | `compare` | density-diff heatmap + top gained/lost nodes — drift, A/B, cohort | 30–120s |
132
133
  | `project_columns` | Project one or more dataset columns onto the trained map (component planes) | async |
133
134
  | `report` | Report **manifest** (figure names, download URLs, metrics, cluster summary) — sync; use with `results(download)` on the training `job_id` for `report.pdf` when present; build custom PDFs in Quarto/Jupyter | immediate |
@@ -1,5 +1,5 @@
1
1
  import { apiCall } from "./shared.js";
2
- const FALLBACK_PREFIX = "Please run datasets(action=preview, dataset_id=\"{id}\") to inspect columns, then datasets(action=analyze, dataset_id=\"{id}\") to see which columns and temporal periods are most informative. Then choose the training path: jobs(action=train_map, dataset_id=\"{id}\", ...) for a standard fixed-grid SOM, jobs(action=train_siom_map, dataset_id=\"{id}\", ...) for a fixed-grid SIOM, or jobs(action=train_floop_siom, dataset_id=\"{id}\", ...) for FLooP-SIOM (default topology=free / CHL; optional topology=chain).";
2
+ const FALLBACK_PREFIX = "Before upload: for jobs(action=train_map) ensure training columns have no NaNs; for jobs(action=train_impute) missing cells in training columns are OK (plain numeric only). Please run datasets(action=preview, dataset_id=\"{id}\") to inspect columns, then datasets(action=analyze, dataset_id=\"{id}\") to see which columns and temporal periods are most informative. Then choose the training path: jobs(action=train_map, dataset_id=\"{id}\", ...) for complete-case data, jobs(action=train_impute, dataset_id=\"{id}\", ...) for sparse matrices, jobs(action=train_siom_map, dataset_id=\"{id}\", ...) for a fixed-grid SIOM, or jobs(action=train_floop_siom, dataset_id=\"{id}\", ...) for FLooP-SIOM (default topology=free / CHL; optional topology=chain).";
3
3
  /** Used by the `prepare_training` MCP prompt; prefers tier-scoped text from the API when online. */
4
4
  export async function resolvePrepareTrainingPromptText(datasetId) {
5
5
  let promptText = FALLBACK_PREFIX.replaceAll("{id}", datasetId);
package/dist/shared.js CHANGED
@@ -19,7 +19,7 @@ export const RETRYABLE_STATUS = new Set([502, 503, 504]);
19
19
  * X-Barsom-Client-Version so the server can annotate tool guidance with the
20
20
  * wrapper version each action requires. Keep in sync with package.json on bump.
21
21
  */
22
- export const CLIENT_VERSION = "0.9.0";
22
+ export const CLIENT_VERSION = "0.10.2";
23
23
  /** User-facing links; keep aligned with barivia.se / api.barivia.se. */
24
24
  export const PUBLIC_SITE_ORIGIN = "https://barivia.se";
25
25
  /** Poll window for datasets(add_expression) / derive jobs (server-side work can exceed 30s). */
@@ -19,8 +19,8 @@ export function registerDatasetsTool(server) {
19
19
 
20
20
  action=upload: PREFER file_path — server reads from workspace root (token-efficient; no file content in context). Use csv_data only for small inline pastes (e.g. <10KB). Returns dataset ID. Then use datasets(action=preview) before jobs(action=train_map).
21
21
 
22
- Step 0 (before upload): (1) CSV with header; one row per observation. (2) Columns you will use for training must be numeric (or datetime if you will use temporal extraction). (3) No NaNs/missing in those columns (engine does not impute). (4) Categoricals: encode (e.g. one-hot, label) or exclude — do not use raw categorical columns as training features; preview shows which columns are non-numeric. (5) Optional: if you plan to use transition_flow, sort rows chronologically before upload.
23
- Step 1 (after upload): Upload, then always datasets(action=preview) to verify column types and spot cyclics/datetime. Add derived columns with datasets(action=add_expression, dataset_id=..., name=..., expression=...); fix or subset before jobs(action=train_map).
22
+ Step 0 (before upload): (1) CSV with header; one row per observation. (2) Columns you will use for training must be numeric (or datetime if you will use temporal extraction). (3) Complete-case path (jobs action=train_map): no NaNs in training columns. Sparse path (jobs action=train_impute): missing cells in training columns are OK — the job trains missSOM and returns dense imputed.csv; plain numeric columns only. (4) Categoricals: encode (e.g. one-hot, label) or exclude — do not use raw categorical columns as training features; preview shows which columns are non-numeric. (5) Optional: if you plan to use transition_flow, sort rows chronologically before upload.
23
+ Step 1 (after upload): Upload, then always datasets(action=preview) to verify column types and spot cyclics/datetime. Add derived columns with datasets(action=add_expression, dataset_id=..., name=..., expression=...); choose train_map (complete-case) or train_impute (sparse matrix) before submit.
24
24
 
25
25
  action=preview: Show columns, stats, sample rows, cyclic/datetime detections. ALWAYS preview before jobs(action=train_map) on an unfamiliar dataset.
26
26
  action=analyze: Pre-training analysis on numeric columns — Pearson correlation, autocorrelation periodicity scores, and column recommendations (train, consider_dropping, project_later, low_variance). Use to choose which columns to include in training and which to project onto the map after training.
@@ -42,7 +42,7 @@ action=delete: Remove a dataset and all S3 data permanently.
42
42
 
43
43
  BEST FOR: Tabular numeric data. CSV with header required.
44
44
  NOT FOR: Real-time data streams or binary files — upload a snapshot CSV instead.
45
- ESCALATION: If upload fails with column errors, open the file locally and verify the header row. If preview shows unexpected nulls, the user must clean the CSV before training.`, {
45
+ ESCALATION: If upload fails with column errors, open the file locally and verify the header row. If preview shows nulls in training columns: use jobs(action=train_impute) for sparse data, or clean/subset for jobs(action=train_map).`, {
46
46
  action: z
47
47
  .enum(["upload", "preview", "analyze", "list", "subset", "delete", "add_expression", "reduce_spectral"])
48
48
  .describe("upload: add CSV; preview: inspect columns/stats; analyze: pre-training correlation and periodicity; list: see all datasets; subset: create filtered subset; delete: remove dataset; add_expression: add derived column from expression; reduce_spectral: collapse a long ordered numeric block (e.g. spectrum, time series) into a small per-row feature set via PCA / log_sample / uniform_sample / stats"),
@@ -256,10 +256,12 @@ ESCALATION: If upload fails with column errors, open the file locally and verify
256
256
  const considerDropping = rec.consider_dropping ?? [];
257
257
  const projectLater = rec.project_later ?? [];
258
258
  const lowVariance = rec.low_variance ?? [];
259
+ const periodicityCaveat = data.periodicity_caveat;
259
260
  const lines = [
260
261
  `Pre-training analysis: ${data.name} (${data.dataset_id})`,
261
262
  `${data.total_rows} rows × ${columns.length} numeric columns analyzed`,
262
263
  ``,
264
+ ...(periodicityCaveat ? [`Note: ${periodicityCaveat}`, ``] : []),
263
265
  `Column recommendations:`,
264
266
  ` Train (use in jobs(action=train_map)): ${trainList.length ? trainList.join(", ") : "—"}`,
265
267
  ` Consider dropping (highly correlated with another): ${considerDropping.length ? considerDropping.join(", ") : "—"}`,
@@ -8,6 +8,7 @@ export const JOBS_DESCRIPTION_BASE = `Manage and inspect jobs.
8
8
  | list | Finding job IDs, checking what is pending/completed, reviewing hyperparameters. Response includes job_type (train_map, report, recolor, project, transition_flow, compare, predict, impute_column, annotated_dataset, reduce_spectral) to filter or display. |
9
9
  | compare | Picking the best training run from a set of completed jobs |
10
10
  | train_map | Submitting a new map training job — returns job_id for polling |
11
+ | train_impute | Sparse training data: train a map AND impute missing cells in one job (accelerated missSOM) — returns map + imputed.csv |
11
12
  | train_siom_map | Submitting a self-interacting map training job — same map flow with SIOM coverage control |
12
13
  | train_floop_siom | Submitting a FLooP-SIOM job (growing manifold; default topology=free / CHL) — requires Premium or Enterprise plan (all_algorithms) |
13
14
  | train_floop_chain | Deprecated alias for train_floop_siom — same behavior; prefer train_floop_siom |
@@ -28,9 +29,10 @@ ESCALATION (action=status):
28
29
  - failed → error message and optional failure_stage (e.g. preprocessing, training, metrics, visualization, upload) indicate which phase broke:
29
30
  - memory/allocation error: reduce batch_size or grid size and retrain
30
31
  - column missing: verify with datasets(action=preview)
31
- - NaN error: user must clean the dataset
32
+ - NaN error on train_map: use jobs(action=train_impute) for sparse training columns, or clean the CSV for complete-case train_map
32
33
 
33
34
  action=train_map / train_siom_map: Submits a grid-map training job. Returns job_id — poll with jobs(action=status, job_id=...).
35
+ action=train_impute: Submits missing-tolerant map training (accelerated missSOM / SIOM missSOM). Use when many cells are missing across training columns; use inference(impute_column) when you already have a complete-case map and need to fill one held-out column. Plain numeric columns only (no cyclic/temporal/categorical). Defaults: model=auto (SIOM on som_siom+ plans), cv_folds=5 → quality.csv (header feature). status returns progress_phase (ordering/convergence/cv/artifacts). High-dim auto policy: >40 features caps viz; >100 fast metrics; >200 GPU when entitled. Params: viz_mode, viz_top_components, emit_cell_uncertainty, quality_metrics. Artifacts: imputed.csv, imputation_mask.csv, optional imputation_uncertainty.csv. Completed train_impute job_id is a valid parent for inference(impute_column).
34
36
  Presets: quick | standard | refined | high_res — use preset=... for grid/epochs/batch defaults; call training_guidance for details.
35
37
  Presets refined/high_res may use GPU. On CPU-only hosts pass backend=cpu. API expects strings "cpu" | "gpu" | "gpu_graphs" (no colon). Future backends (e.g. non-CUDA) may be added under the same contract.
36
38
  normalize: "auto" (default) = scale only non-cyclic features; "all" = scale every feature. Use "auto" when using cyclic_features.
@@ -224,7 +226,7 @@ export function buildTrainMapParams(args, presets) {
224
226
  export function registerJobsTool(server, description) {
225
227
  server.tool("jobs", description, {
226
228
  action: z
227
- .enum(["status", "list", "compare", "cancel", "delete", "train_map", "train_siom_map", "train_floop_siom", "train_floop_chain", "batch_predict", "run_baseline_study"])
229
+ .enum(["status", "list", "compare", "cancel", "delete", "train_map", "train_impute", "train_siom_map", "train_floop_siom", "train_floop_chain", "batch_predict", "run_baseline_study"])
228
230
  .describe("status: check progress; list: see all jobs; compare: metrics table; cancel: stop job; delete: remove job + files; train_map: submit standard map training; train_siom_map: submit SIOM map training; train_floop_siom: submit FLooP-SIOM (preferred); train_floop_chain: deprecated alias for train_floop_siom; batch_predict: submit multiple predict jobs at once; run_baseline_study: auto-configure and train a baseline SOM"),
229
231
  job_id: z
230
232
  .string()
@@ -267,7 +269,7 @@ export function registerJobsTool(server, description) {
267
269
  }
268
270
  return v;
269
271
  }, z.union([z.number().int(), z.array(z.number().int()).length(2)]).optional()),
270
- model: z.enum(["SOM", "RSOM", "SOM-SOFT", "RSOM-SOFT"]).optional().default("SOM"),
272
+ model: z.enum(["auto", "SOM", "SIOM", "RSOM", "SOM-SOFT", "RSOM-SOFT"]).optional(),
271
273
  periodic: z.boolean().optional().default(true),
272
274
  columns: z.array(z.string()).optional(),
273
275
  cyclic_features: z.array(z.object({
@@ -306,6 +308,14 @@ export function registerJobsTool(server, description) {
306
308
  output_dpi: z.enum(["standard", "retina", "print"]).optional().default("retina"),
307
309
  colormap: z.string().optional(),
308
310
  row_range: z.tuple([z.number().int().min(1), z.number().int().min(1)]).optional(),
311
+ cv_folds: z.number().int().min(0).max(20).optional()
312
+ .describe("train_impute only: held-out MAE/RMSE/R2 per column (0=off, default 5) → quality.csv"),
313
+ viz_mode: z.enum(["full", "summary", "summary_plus_top"]).optional()
314
+ .describe("Visualization density; auto-capped when feature count > 40"),
315
+ viz_top_components: z.number().int().min(0).max(64).optional()
316
+ .describe("With viz_mode=summary_plus_top: upload top-N component maps by variance (default 8)"),
317
+ emit_cell_uncertainty: z.boolean().optional()
318
+ .describe("train_impute: write imputation_uncertainty.csv (pool_std per imputed cell)"),
309
319
  gamma: z.preprocess((v) => (v !== undefined && v !== null && typeof v === "string") ? parseFloat(v) : v, z.number().optional()),
310
320
  gamma_f: z.preprocess((v) => (v !== undefined && v !== null && typeof v === "string") ? parseFloat(v) : v, z.number().optional()),
311
321
  siom_decay: z.preprocess((v) => (v !== undefined && v !== null && typeof v === "string") ? parseFloat(v) : v, z.number().optional()),
@@ -361,8 +371,8 @@ export function registerJobsTool(server, description) {
361
371
  const jid = String(data.id ?? "");
362
372
  return { content: [{ type: "text", text: `Baseline study submitted. Job ID: ${jid}\nGrid size: ${side}x${side}\nNormalization: MAD\n\nPoll with jobs(action=status, job_id="${jid}") until complete, then retrieve with results(action=get, job_id="${jid}"). Optional: training_monitor(job_id="${jid}") for a visual panel—not required.` }] };
363
373
  }
364
- if (action === "train_map" || action === "train_siom_map") {
365
- const { preset, grid_x, grid_y, epochs, model, periodic, columns, cyclic_features, temporal_features, feature_weights, transforms, auto_log_transforms, time_delay_embeddings, categorical_features, normalize, sigma_f, learning_rate, batch_size, quality_metrics, backend, output_format, output_dpi, colormap, row_range, gamma, gamma_f, siom_decay, siom_penalty, penalty_alpha, reset_per_epoch, siom_feature_geometry, siom_qe_backend, siom_qe_batch_size, label, } = args;
374
+ if (action === "train_map" || action === "train_siom_map" || action === "train_impute") {
375
+ const { preset, grid_x, grid_y, epochs, model, periodic, columns, cyclic_features, temporal_features, feature_weights, transforms, auto_log_transforms, time_delay_embeddings, categorical_features, normalize, sigma_f, learning_rate, batch_size, quality_metrics, backend, output_format, output_dpi, colormap, row_range, gamma, gamma_f, siom_decay, siom_penalty, penalty_alpha, reset_per_epoch, siom_feature_geometry, siom_qe_backend, siom_qe_batch_size, label, cv_folds, viz_mode, viz_top_components, emit_cell_uncertainty, } = args;
366
376
  let PRESETS = {};
367
377
  try {
368
378
  PRESETS = await fetchTrainingPresets();
@@ -375,7 +385,7 @@ export function registerJobsTool(server, description) {
375
385
  }
376
386
  }
377
387
  if (!dataset_id)
378
- throw new Error("jobs(train_map) requires dataset_id");
388
+ throw new Error(`jobs(${action}) requires dataset_id`);
379
389
  const { params, paramSummary, effectiveGrid } = buildTrainMapParams({
380
390
  preset, grid_x, grid_y, epochs, model, periodic, columns, cyclic_features,
381
391
  temporal_features, feature_weights, transforms, auto_log_transforms,
@@ -383,6 +393,24 @@ export function registerJobsTool(server, description) {
383
393
  normalize, sigma_f, learning_rate, batch_size, quality_metrics, backend,
384
394
  output_format, output_dpi, colormap, row_range,
385
395
  }, PRESETS);
396
+ if (action === "train_impute") {
397
+ params._job_type = "train_impute";
398
+ params.model = model ?? "auto";
399
+ params.cv_folds = cv_folds ?? 5;
400
+ if (viz_mode !== undefined)
401
+ params.viz_mode = viz_mode;
402
+ if (viz_top_components !== undefined)
403
+ params.viz_top_components = viz_top_components;
404
+ if (emit_cell_uncertainty !== undefined)
405
+ params.emit_cell_uncertainty = emit_cell_uncertainty;
406
+ // Plain numeric path only — strip unsupported keys if caller passed them
407
+ delete params.cyclic_features;
408
+ delete params.temporal_features;
409
+ delete params.categorical_features;
410
+ delete params.transforms;
411
+ delete params.auto_log_transforms;
412
+ delete params.time_delay_embeddings;
413
+ }
386
414
  if (action === "train_siom_map") {
387
415
  params._job_type = "train_siom";
388
416
  if (gamma !== undefined)
@@ -415,7 +443,9 @@ export function registerJobsTool(server, description) {
415
443
  submitBody.label = label;
416
444
  const data = (await apiCall("POST", "/v1/jobs", submitBody));
417
445
  const newJobId = data.id;
418
- const variantPrefix = action === "train_siom_map" ? "variant=siom" : "variant=som";
446
+ const variantPrefix = action === "train_siom_map" ? "variant=siom"
447
+ : action === "train_impute" ? "variant=train_impute"
448
+ : "variant=som";
419
449
  data.effective_params = `${variantPrefix}, ${paramSummary}`;
420
450
  try {
421
451
  const sys = (await apiCall("GET", "/v1/system/info"));
@@ -426,7 +456,7 @@ export function registerJobsTool(server, description) {
426
456
  let msg = `Job submitted (${variantPrefix}, ${paramSummary}). `;
427
457
  if (waitMinutes > 1)
428
458
  msg += `You are #${pending + 1} in queue. Estimated wait: ~${waitMinutes} min. `;
429
- msg += `Poll with jobs(action=status, job_id="${newJobId}"). When status is completed, use results(action=get, job_id="${newJobId}") to view the map and metrics.`;
459
+ msg += `Poll with jobs(action=status, job_id="${newJobId}"). When status is completed, use results(action=get, job_id="${newJobId}") to view the map${action === "train_impute" ? " and imputed.csv" : ""} and metrics.`;
430
460
  msg += ` Optional: training_monitor(job_id="${newJobId}") for charts—not required.`;
431
461
  if (effectiveGrid && totalRows > 0 && effectiveGrid[0] * effectiveGrid[1] > totalRows * 0.75) {
432
462
  msg += ` Note: Grid may be large for ${totalRows} rows (consider grid=auto for fewer dead nodes).`;
@@ -434,7 +464,7 @@ export function registerJobsTool(server, description) {
434
464
  data.message = msg;
435
465
  }
436
466
  catch {
437
- let msg = `Job submitted (${variantPrefix}, ${paramSummary}). Poll with jobs(action=status, job_id="${newJobId}"). When status is completed, use results(action=get, job_id="${newJobId}") to view the map and metrics.`;
467
+ let msg = `Job submitted (${variantPrefix}, ${paramSummary}). Poll with jobs(action=status, job_id="${newJobId}"). When status is completed, use results(action=get, job_id="${newJobId}") to view the map${action === "train_impute" ? " and imputed.csv" : ""} and metrics.`;
438
468
  msg += ` Optional: training_monitor(job_id="${newJobId}") for charts—not required.`;
439
469
  if (effectiveGrid && totalRows > 0 && effectiveGrid[0] * effectiveGrid[1] > totalRows * 0.75) {
440
470
  msg += ` Note: Grid may be large for ${totalRows} rows (consider grid=auto for fewer dead nodes).`;
@@ -549,6 +579,10 @@ export function registerJobsTool(server, description) {
549
579
  const label = data.label != null && data.label !== "" ? String(data.label) : null;
550
580
  const jobDesc = label ? `Job ${label} (id: ${job_id})` : `Job ${job_id}`;
551
581
  let text = `${jobDesc}: ${status} (${progress.toFixed(1)}%)`;
582
+ const phase = data.progress_phase != null && data.progress_phase !== "" ? String(data.progress_phase) : null;
583
+ if (phase && status === "running") {
584
+ text += ` — phase: ${phase}`;
585
+ }
552
586
  if (status === "completed") {
553
587
  text += ` | Results ready. Use results(action=get, job_id="${job_id}") to retrieve.`;
554
588
  }
@@ -29,7 +29,7 @@ action=export: Structured data exports. Use export_type= to choose what to expor
29
29
  - export_type=nodes: per-node hit count + feature stats. Profile clusters and operating modes.
30
30
 
31
31
  action=download: Save figures to disk. Use so user can open, share, or version files locally.
32
- - folder: e.g. "." or "./results". Interpreted relative to the client's current working directory (or workspace). Files are always written into a per-job subfolder (the job label, else the job_id) under this folder, so downloading several jobs into one folder never overwrites the shared filenames every job emits (e.g. summary.json).
32
+ - folder: e.g. "." or "./results". Interpreted relative to the client's current working directory (or workspace). Files are written into a per-job subfolder named job_type + label (or job_id) under this folder, so downloading several jobs (even with the same label across types) never overwrites shared filenames like summary.json.
33
33
  - figures: "all" (default) or array of filenames.
34
34
  - include_json: also save summary.json.
35
35
 
@@ -247,6 +247,44 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
247
247
  "Map-local pool estimates — not held-out validated predictions.",
248
248
  ].join("\n") });
249
249
  }
250
+ else if (jobType === "train_impute") {
251
+ const imp = summary.imputation ?? {};
252
+ const cv = imp.cv_quality ?? null;
253
+ const policy = summary.effective_policy ?? {};
254
+ const hitStats = summary.hit_stats ?? {};
255
+ const siom = summary.siom ?? undefined;
256
+ const fmt = (v) => v !== null && v !== undefined ? Number(v).toFixed(4) : "N/A";
257
+ const lines = [
258
+ `missSOM train+impute (${summary.model ?? imp.variant ?? "SOM"}) — ${resultsHeader}`,
259
+ `Grid: ${(summary.grid ?? [0, 0]).join("×")} | Features: ${summary.n_features ?? 0} | Samples: ${summary.n_samples ?? 0}`,
260
+ `Missing cells: ${imp.n_missing_cells ?? "?"} (${imp.missing_fraction !== undefined ? (Number(imp.missing_fraction) * 100).toFixed(1) + "%" : "?"})`,
261
+ `Map quality — QE: ${fmt(summary.quantization_error)} | TE: ${fmt(summary.topographic_error)} | EV: ${fmt(summary.explained_variance)}`,
262
+ `Observed-dimension QE: ${fmt(imp.observed_quantization_error)}`,
263
+ ...(siom ? [`SIOM — utilization: ${fmt(siom.utilization)} | dead_fraction: ${fmt(siom.dead_fraction)} | siom_qe: ${fmt(siom.siom_qe)}`] : []),
264
+ hitStats.grid_suggestion ? `Grid hint: ${String(hitStats.grid_suggestion)}` : "",
265
+ ...(policy.auto_rules_applied && Array.isArray(policy.auto_rules_applied) && policy.auto_rules_applied.length > 0
266
+ ? [`Auto policy: ${policy.auto_rules_applied.join(", ")} (${policy.viz_mode ?? "viz"}, metrics=${policy.quality_metrics ?? "?"})`]
267
+ : []),
268
+ cv ? [
269
+ "",
270
+ `Held-out imputation accuracy (cv_folds=${cv.cv_folds ?? "?"}, method=${cv.cv_method ?? "held_out_prototype_on_trained_map"}):`,
271
+ cv.columns_sampled ? ` (sampled ${cv.columns_evaluated ?? "?"} columns for speed)` : "",
272
+ cv.aggregate ? ` Aggregate MAE: ${fmt(cv.aggregate.mae)} | RMSE: ${fmt(cv.aggregate.rmse)}` : "",
273
+ " See quality.csv (header column: feature).",
274
+ ].filter(Boolean).join("\n") : "\nHeld-out accuracy: skipped (cv_folds=0). Set cv_folds=5 to validate imputation.",
275
+ "",
276
+ `Artifacts: ${(summary.files ?? []).filter((f) => f !== "summary.json").join(", ")}`,
277
+ "Next: results(action=download) for imputed.csv / quality.csv; inference(impute_column) with this job_id as parent for held-out columns.",
278
+ ].filter((l) => l !== "");
279
+ content.push({ type: "text", text: lines.join("\n") });
280
+ for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
281
+ const cap = getCaptionForImage(name);
282
+ if (cap)
283
+ content.push({ type: "text", text: cap });
284
+ await tryAttachImage(content, job_id, name);
285
+ inlinedImages.add(name);
286
+ }
287
+ }
250
288
  else if (jobType === "reduce_spectral") {
251
289
  const method = String(summary.method ?? "?");
252
290
  const sourceCols = summary.source_columns ?? [];
@@ -393,7 +431,7 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
393
431
  return "";
394
432
  return `Columns not used in training: ${excluded.join(", ")}. You can project them onto this map with inference(action=project_columns, job_id=${job_id}, dataset_id=<dataset_id>, columns=[${excluded.map((c) => `"${c}"`).join(", ")}]) to see how they distribute across the topology. If you ran datasets(action=analyze) before training, any columns it recommended as "project later" are especially good candidates.`;
395
433
  })(),
396
- ...((jobType === "train_som" || jobType === "train_siom") ? ["", `Next: results(action=export, export_type=training_log) for learning curve; results(action=download) to save figures; jobs(action=compare, job_ids=[...]) to compare runs; inference(action=predict) to score new data; inference(action=project_columns) to project other variables onto the map.`] : []),
434
+ ...((jobType === "train_som" || jobType === "train_siom" || jobType === "train_impute") ? ["", `Next: results(action=export, export_type=training_log) for learning curve; results(action=download) to save figures${jobType === "train_impute" ? " (includes imputed.csv)" : ""}; jobs(action=compare, job_ids=[...]) to compare runs; inference(action=predict) to score new data; inference(action=project_columns) to project other variables onto the map.`] : []),
397
435
  ].filter((l) => l !== "").join("\n");
398
436
  content.push({ type: "text", text: textSummary });
399
437
  const imagesToFetch = getResultsImagesToFetch(jobType, summary, figures, include_individual);
@@ -581,7 +619,8 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
581
619
  // Always namespace each job's files into its own subfolder so that
582
620
  // downloading multiple jobs (or job types) into the same folder never
583
621
  // overwrites the shared filenames every job emits (e.g. summary.json).
584
- const jobSubfolder = (jobLabel ?? job_id).replace(/[^a-zA-Z0-9_.-]/g, "_");
622
+ // Prefix job_type so the same label on train vs predict/impute/flow jobs cannot collide.
623
+ const jobSubfolder = `${jobType}_${jobLabel ?? job_id}`.replace(/[^a-zA-Z0-9_.-]/g, "_");
585
624
  resolvedDir = path.join(resolvedDir, jobSubfolder);
586
625
  if (jobType === "render_variant" && summary.colormap) {
587
626
  const colormapDir = String(summary.colormap).replace(/[^a-zA-Z0-9_-]/g, "_");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@barivia/barsom-mcp",
3
- "version": "0.9.0",
3
+ "version": "0.10.2",
4
4
  "description": "barSOM MCP proxy — connect any MCP client to the barSOM cloud API for Self-Organizing Map analytics",
5
5
  "keywords": [
6
6
  "mcp",