@barivia/barsom-mcp 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -126,7 +126,8 @@ All actions use a frozen trained map — no retraining. Derived columns use **`d
126
126
 
127
127
  | Action | Output | Timing |
128
128
  |--------|--------|--------|
129
- | `predict` | Score rows against the trained map. **Inputs:** `dataset_id` (defaults to the parent training dataset) **or** inline `rows` (≤500). **Output style** (`output` param): `"compact"` → `predictions.csv` (row_id, bmu_x/y, bmu_node_index, cluster_id [+ QE / qe_p95 / potential_anomaly when scoring **new** data]); `"annotated"` → `enriched.csv` (original CSV + BMU columns appended). **Regime auto-detected:** when the resolved dataset matches the training dataset, QE columns are intentionally omitted in compact output (training-set fit ≠ generalisation; the p95 anomaly flag would be circular). Prefer `dataset_id` for batches and SIOM/irregular maps. | 5–120s |
129
+ | `predict` | Score rows against the trained map. **Inputs:** `dataset_id` (defaults to the parent training dataset) **or** inline `rows` (≤500). **Output style** (`output` param): `"compact"` → `predictions.csv` (row_id, bmu_x/y, bmu_node_index, cluster_id [+ QE / qe_p95 / potential_anomaly when scoring **new** data]); `"annotated"` → `annotated.csv` (original CSV + BMU columns appended). **Regime auto-detected:** when the resolved dataset matches the training dataset, QE columns are intentionally omitted in compact output (training-set fit ≠ generalisation; the p95 anomaly flag would be circular). Prefer `dataset_id` for batches and SIOM/irregular maps. | 5–120s |
130
+ | `impute_column` | Fill a numeric **target_column** not used in training: **requires** `dataset_id` + `target_column`. Dataset must contain all training features plus the target. Pools observed target values from rows mapped to this row's BMU and topology neighbors (BMU + neighbors, often 7 nodes on hex interior; fewer on borders unless the map is periodic). `only_missing` (default true); `impute_aggregation`: mean or median. **Not** held-out validated — map-local estimate. Output **`imputed.csv`**. | 5–120s |
130
131
  | `compare` | density-diff heatmap + top gained/lost nodes — drift, A/B, cohort | 30–120s |
131
132
  | `project_columns` | Project one or more dataset columns onto the trained map (component planes) | async |
132
133
  | `report` | Report **manifest** (figure names, download URLs, metrics, cluster summary) — sync; use with `results(download)` on the training `job_id` for `report.pdf` when present; build custom PDFs in Quarto/Jupyter | immediate |
@@ -168,7 +169,6 @@ The right viewer depends on **(MCP App support)** **and** **(can the human reach
168
169
  ### Migration notes
169
170
 
170
171
  - **`explore_map` → `results_explorer`:** Update Cursor, Claude Desktop, or other MCP configs that still reference `explore_map`. The alias remains for backward compatibility.
171
- - **`inference(action=enrich)` → `inference(action=predict, output="annotated")`:** the `enrich` action has been removed in favor of regime-aware `predict`. Calling `predict` with `output="annotated"` (and the default training dataset) returns the same `enriched.csv` artifact. Calling `predict` on the training dataset with the default `output="compact"` now correctly omits QE / `qe_p95` / `potential_anomaly` fields — those are fitting errors on training data, not generalisation metrics.
172
172
  - **Shorter `info` prompt:** Clients that relied on the old long `info` text should use **`guide_barsom_workflow`** or server **instructions** for the full story.
173
173
 
174
174
  ### `send_feedback`
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
1
  #!/usr/bin/env node
2
- import{McpServer as e}from"@modelcontextprotocol/sdk/server/mcp.js";import{StdioServerTransport as t}from"@modelcontextprotocol/sdk/server/stdio.js";import{z as o}from"zod";import{getUiCapability as r,registerAppResource as n,RESOURCE_MIME_TYPE as a}from"@modelcontextprotocol/ext-apps/server";import{startVizServer as s}from"./viz-server.js";import{API_KEY as i,apiCall as l,apiRawCall as p,loadViewHtml as c,setVizPort as m,setClientSupportsMcpApps as d}from"./shared.js";import{registerDatasetsTool as u}from"./tools/datasets.js";import{registerJobsTool as f,JOBS_DESCRIPTION_BASE as g}from"./tools/jobs.js";import{registerResultsTool as _}from"./tools/results.js";import{registerExploreMapTool as h,RESULTS_EXPLORER_URI as b}from"./tools/explore_map.js";import{registerAccountTool as w}from"./tools/account.js";import{registerInferenceTool as y}from"./tools/inference.js";import{registerGuideBarsomTool as j}from"./tools/guide_barsom.js";import{registerTrainingGuidanceTool as v}from"./tools/training_guidance.js";import{registerFeedbackTool as P}from"./tools/feedback.js";import{registerTrainingPrepTools as x,TRAINING_PREP_URI as I}from"./tools/training_prep.js";import{registerTrainingMonitorTool as k,TRAINING_MONITOR_URI as M}from"./tools/training_monitor.js";import{resolvePrepareTrainingPromptText as O}from"./prepare_training_prompt.js";i||(console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config."),process.exit(1));const S=new e({name:"analytics-engine",version:"0.8.0",instructions:'# Barivia Mapping Analytics Engine\n\nSelf-organizing map (SOM) analytics: project high-dimensional data to a 2D grid for clusters, gradients, and anomalies.\n\n## Workflow (short)\n\nUpload (`datasets(upload)`) → `datasets(preview)` and `datasets(analyze)` before train → submit one of `jobs(train_map)`, `jobs(train_siom_map)`, or `jobs(train_floop_siom)` (only if plan allows FLooP) → poll `jobs(status)` every 10–15s until `completed` → `results(get)` for metrics and figures (there is no separate analyze tool). Then `jobs(compare)`, `results(download/recolor/transition_flow)`, or `inference` as needed.\n\n**Full detail:** Call `guide_barsom_workflow` for plan-scoped tool map, training modes, async rules, optional MCP App UIs, and step-by-step SOP (from the Barivia API when online).\n\n## Tool map (compact)\n\n| Area | Tool | Notes |\n|------|------|--------|\n| Data | `datasets` | upload, preview, analyze, list, subset, add_expression, reduce_spectral (pca/log_sample/uniform_sample/stats for long ordered numeric blocks), delete |\n| Jobs | `jobs` | train_map, train_siom_map, train_floop_siom (entitled), status, list, compare, cancel, delete, batch_predict, run_baseline_study; `train_floop_chain` = deprecated alias for train_floop_siom |\n| Results | `results` | get (figures="none" for metrics-only), export, download, recolor (async), transition_flow (async; time-ordered rows only) |\n| Inference | `inference` | predict (regime-aware; output="compact"|"annotated"; "annotated" replaces the removed `enrich`), compare, project_columns, report |\n| Account | `account` | status, burst/compute actions, history, add_funds |\n| Bootstrap | `guide_barsom_workflow` | orientation + SOP |\n| Parameters | `training_guidance` | presets and field hints (API-scoped) |\n| Prep | `prepare_training` prompt, `training_prep` + `submit_prepared_training` | checklist / interactive UI |\n| Explore | `results_explorer`, `training_monitor` | optional MCP Apps; `explore_map` = deprecated alias of `results_explorer`; `jobs(status)` and `results(get)` suffice without them |\n| Other | `send_feedback` | only after user agrees |\n\n## Async pattern\n\n- **Manual poll:** Training submits return `job_id` immediately — poll `jobs(status)` every 10–15s. **Running is not failed**; large grids or FLooP-SIOM can take many minutes. `max_nodes` (FLooP) is a total node budget, not grid side length.\n- **Often auto-polled:** `inference` actions, `results(recolor)`, `results(transition_flow)` may wait in-proxy; if you get a `job_id`, poll `jobs(status)` the same way.\n\nCredits: jobs consume compute credits; check `account(status)` before big runs. Slow networks: users can raise `BARIVIA_FETCH_TIMEOUT_MS`.\n\n## Constraints\n\n- Prep ladder: `prepare_training` prompt = narrative checklist; `training_guidance` = structured hints; `training_prep` = UI + guarded submit. Do not guess tiers or FLooP entitlement.\n- `inference(predict)`: prefer `dataset_id` for batch and for SIOM/irregular maps; single-row `rows` uses a fast path that can fail on some topologies — retry with `dataset_id`. FLooP-SIOM: if predict jobs fail while grid SIOM works, capture errors + `job_id`.\n- Column names are case-sensitive — match `datasets(preview)`.\n- Default training path is numeric/cyclic/temporal; use explicit `categorical_features` for baseline categoricals. `predict` must match the model contract.\n- After `recolor`, `transition_flow`, or `project_columns`, use the **new** `job_id` returned for follow-up `results` if applicable.'});n(S,b,b,{mimeType:a},async()=>{const e=await c("results-explorer");return{contents:[{uri:b,mimeType:a,text:e??"<html><body>Results Explorer view not built yet. Run: npm run build:views</body></html>"}]}}),n(S,I,I,{mimeType:a},async()=>{const e=await c("training-prep");return{contents:[{uri:I,mimeType:a,text:e??"<html><body>Training Preparation view not built yet.</body></html>"}]}}),n(S,M,M,{mimeType:a},async()=>{const e=await c("training-monitor");return{contents:[{uri:M,mimeType:a,text:e??"<html><body>Training Monitor view not built yet.</body></html>"}]}}),j(S),h(S),x(S),k(S),u(S),f(S,g),_(S),w(S),y(S),v(S),P(S),S.prompt("info","Short orientation for the Barivia Mapping MCP. For full plan-scoped workflow, tool map, and SOP, the model should call guide_barsom_workflow. Use when the user asks what this MCP can do or how to get started.",{},()=>({messages:[{role:"user",content:{type:"text",text:["Give a concise, scannable answer (headers + bullets):","","**What it is:** MCP client to the Barivia mapping engine (2D SOM / SIOM / FLooP-SIOM when entitled) over HTTPS.","","**First step:** Call `guide_barsom_workflow` for plan-scoped bootstrap (full tool list, async rules, training modes, optional MCP Apps, SOP).","","**Core path:** `datasets(upload)` → `datasets(preview)` + `datasets(analyze)` → choose training action → poll `jobs(status)` every 10–15s until completed → `results(get)` (all main figures/metrics; no separate analyze tool).","",'**Key tools:** `datasets` (data; reduce_spectral for spectra/long blocks), `jobs` (train/poll/compare/…; train_map accepts an optional `label` for readable compare rows), `results` (get/download/export/recolor/transition_flow; figures="none" for metrics-only), `inference` (predict regime-aware with output="compact"|"annotated"; replaces enrich, compare, project_columns, report), `account` (status/credits/queue).',"","**Prep help:** `prepare_training` prompt (checklist) · `training_guidance` (presets/JSON hints) · `training_prep` + `submit_prepared_training` (interactive UI).","","**Optional UI:** `results_explorer`, `training_monitor` — nice for browsing; not required if you use `results` + `jobs(status)`.","","**After training:** `jobs(compare)` across runs, `results(recolor)`, `inference(project_columns)` for variables not in training, `transition_flow` only if rows are time-ordered.","","**Rules:** Running ≠ failed. Column names must match `datasets(preview)` exactly. Do not call `_fetch_figure` from chat (host/UI only); use `results(get)` or `results_explorer`.","","Offer `send_feedback` only after asking the user."].join("\n")}}]})),S.prompt("prepare_training","Narrative pre-training checklist (prompt). Use after upload and before train. Content is tier-scoped from the API when online. Prep ladder: this prompt = story checklist; training_guidance tool = JSON presets/parameter hints; training_prep tool = interactive UI + submit_prepared_training.",{dataset_id:o.string().describe("Dataset ID to prepare for training")},async({dataset_id:e})=>({messages:[{role:"user",content:{type:"text",text:await O(e)}}]}));const A=new t;(async function(){try{const e=await s(l,p,c);m(e)}catch(e){process.env.BARIVIA_VIZ_PORT&&console.error("Barivia viz server failed to start:",e)}const e=S.server;e.oninitialized=()=>{const t=e.getClientCapabilities(),o=r(t);d(!!o?.mimeTypes?.includes(a))},await S.connect(A)})().catch(console.error);
2
+ import{McpServer as e}from"@modelcontextprotocol/sdk/server/mcp.js";import{StdioServerTransport as t}from"@modelcontextprotocol/sdk/server/stdio.js";import{z as o}from"zod";import{getUiCapability as r,registerAppResource as n,RESOURCE_MIME_TYPE as s}from"@modelcontextprotocol/ext-apps/server";import{startVizServer as a}from"./viz-server.js";import{API_KEY as i,apiCall as l,apiRawCall as p,loadViewHtml as c,setVizPort as m,setClientSupportsMcpApps as d,CLIENT_VERSION as u}from"./shared.js";import{registerDatasetsTool as f}from"./tools/datasets.js";import{registerJobsTool as g,JOBS_DESCRIPTION_BASE as _}from"./tools/jobs.js";import{registerResultsTool as b}from"./tools/results.js";import{registerExploreMapTool as h,RESULTS_EXPLORER_URI as y}from"./tools/explore_map.js";import{registerAccountTool as w}from"./tools/account.js";import{registerInferenceTool as j}from"./tools/inference.js";import{registerGuideBarsomTool as v}from"./tools/guide_barsom.js";import{registerTrainingGuidanceTool as P}from"./tools/training_guidance.js";import{registerFeedbackTool as x}from"./tools/feedback.js";import{registerTrainingPrepTools as I,TRAINING_PREP_URI as k}from"./tools/training_prep.js";import{registerTrainingMonitorTool as M,TRAINING_MONITOR_URI as O}from"./tools/training_monitor.js";import{resolvePrepareTrainingPromptText as S}from"./prepare_training_prompt.js";i||(console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config."),process.exit(1));const A=new e({name:"analytics-engine",version:u,instructions:'# Barivia Mapping Analytics Engine\n\nSelf-organizing map (SOM) analytics: project high-dimensional data to a 2D grid for clusters, gradients, and anomalies.\n\n## Workflow (short)\n\nUpload (`datasets(upload)`) → `datasets(preview)` and `datasets(analyze)` before train → submit one of `jobs(train_map)`, `jobs(train_siom_map)`, or `jobs(train_floop_siom)` (only if plan allows FLooP) → poll `jobs(status)` every 10–15s until `completed` → `results(get)` for metrics and figures (there is no separate analyze tool). Then `jobs(compare)`, `results(download/recolor/transition_flow)`, or `inference` as needed.\n\n**Full detail:** Call `guide_barsom_workflow` for plan-scoped tool map, training modes, async rules, optional MCP App UIs, and step-by-step SOP (from the Barivia API when online).\n\n## Tool map (compact)\n\n| Area | Tool | Notes |\n|------|------|--------|\n| Data | `datasets` | upload, preview, analyze, list, subset, add_expression, reduce_spectral (pca/log_sample/uniform_sample/stats for long ordered numeric blocks), delete |\n| Jobs | `jobs` | train_map, train_siom_map, train_floop_siom (entitled), status, list, compare, cancel, delete, batch_predict, run_baseline_study; `train_floop_chain` = deprecated alias for train_floop_siom |\n| Results | `results` | get (figures="none" for metrics-only), export, download, recolor (async), transition_flow (async; time-ordered rows only) |\n| Inference | `inference` | predict (regime-aware; output="compact"|"annotated"), impute_column (neighbor-pool fill for a non-training column), compare, project_columns, report |\n| Account | `account` | status, burst/compute actions, history, add_funds |\n| Bootstrap | `guide_barsom_workflow` | orientation + SOP |\n| Parameters | `training_guidance` | presets and field hints (API-scoped) |\n| Prep | `prepare_training` prompt, `training_prep` + `submit_prepared_training` | checklist / interactive UI |\n| Explore | `results_explorer`, `training_monitor` | optional MCP Apps; `explore_map` = deprecated alias of `results_explorer`; `jobs(status)` and `results(get)` suffice without them |\n| Other | `send_feedback` | only after user agrees |\n\n## Async pattern\n\n- **Manual poll:** Training submits return `job_id` immediately — poll `jobs(status)` every 10–15s. **Running is not failed**; large grids or FLooP-SIOM can take many minutes. `max_nodes` (FLooP) is a total node budget, not grid side length.\n- **Often auto-polled:** `inference` actions, `results(recolor)`, `results(transition_flow)` may wait in-proxy; if you get a `job_id`, poll `jobs(status)` the same way.\n\nCredits: jobs consume compute credits; check `account(status)` before big runs. Slow networks: users can raise `BARIVIA_FETCH_TIMEOUT_MS`.\n\n## Constraints\n\n- Prep ladder: `prepare_training` prompt = narrative checklist; `training_guidance` = structured hints; `training_prep` = UI + guarded submit. Do not guess tiers or FLooP entitlement.\n- `inference(predict)`: prefer `dataset_id` for batch and for SIOM/irregular maps; single-row `rows` uses a fast path that can fail on some topologies — retry with `dataset_id`. FLooP-SIOM: if predict jobs fail while grid SIOM works, capture errors + `job_id`.\n- Column names are case-sensitive — match `datasets(preview)`.\n- Default training path is numeric/cyclic/temporal; use explicit `categorical_features` for baseline categoricals. `predict` must match the model contract.\n- After `recolor`, `transition_flow`, or `project_columns`, use the **new** `job_id` returned for follow-up `results` if applicable.'});n(A,y,y,{mimeType:s},async()=>{const e=await c("results-explorer");return{contents:[{uri:y,mimeType:s,text:e??"<html><body>Results Explorer view not built yet. Run: npm run build:views</body></html>"}]}}),n(A,k,k,{mimeType:s},async()=>{const e=await c("training-prep");return{contents:[{uri:k,mimeType:s,text:e??"<html><body>Training Preparation view not built yet.</body></html>"}]}}),n(A,O,O,{mimeType:s},async()=>{const e=await c("training-monitor");return{contents:[{uri:O,mimeType:s,text:e??"<html><body>Training Monitor view not built yet.</body></html>"}]}}),v(A),h(A),I(A),M(A),f(A),g(A,_),b(A),w(A),j(A),P(A),x(A),A.prompt("info","Short orientation for the Barivia Mapping MCP. For full plan-scoped workflow, tool map, and SOP, the model should call guide_barsom_workflow. Use when the user asks what this MCP can do or how to get started.",{},()=>({messages:[{role:"user",content:{type:"text",text:["Give a concise, scannable answer (headers + bullets):","","**What it is:** MCP client to the Barivia mapping engine (2D SOM / SIOM / FLooP-SIOM when entitled) over HTTPS.","","**First step:** Call `guide_barsom_workflow` for plan-scoped bootstrap (full tool list, async rules, training modes, optional MCP Apps, SOP).","","**Core path:** `datasets(upload)` → `datasets(preview)` + `datasets(analyze)` → choose training action → poll `jobs(status)` every 10–15s until completed → `results(get)` (all main figures/metrics; no separate analyze tool).","",'**Key tools:** `datasets` (data; reduce_spectral for spectra/long blocks), `jobs` (train/poll/compare/…; train_map accepts an optional `label` for readable compare rows), `results` (get/download/export/recolor/transition_flow; figures="none" for metrics-only), `inference` (predict; impute_column for topology-neighbor pool fill; compare; project_columns; report), `account` (status/credits/queue).',"","**Prep help:** `prepare_training` prompt (checklist) · `training_guidance` (presets/JSON hints) · `training_prep` + `submit_prepared_training` (interactive UI).","","**Optional UI:** `results_explorer`, `training_monitor` — nice for browsing; not required if you use `results` + `jobs(status)`.","","**After training:** `jobs(compare)` across runs, `results(recolor)`, `inference(project_columns)` for variables not in training, `transition_flow` only if rows are time-ordered.","","**Rules:** Running ≠ failed. Column names must match `datasets(preview)` exactly. Do not call `_fetch_figure` from chat (host/UI only); use `results(get)` or `results_explorer`.","","Offer `send_feedback` only after asking the user."].join("\n")}}]})),A.prompt("prepare_training","Narrative pre-training checklist (prompt). Use after upload and before train. Content is tier-scoped from the API when online. Prep ladder: this prompt = story checklist; training_guidance tool = JSON presets/parameter hints; training_prep tool = interactive UI + submit_prepared_training.",{dataset_id:o.string().describe("Dataset ID to prepare for training")},async({dataset_id:e})=>({messages:[{role:"user",content:{type:"text",text:await S(e)}}]}));const T=new t;(async function(){try{const e=await a(l,p,c);m(e)}catch(e){process.env.BARIVIA_VIZ_PORT&&console.error("Barivia viz server failed to start:",e)}const e=A.server;e.oninitialized=()=>{const t=e.getClientCapabilities(),o=r(t);d(!!o?.mimeTypes?.includes(s))},await A.connect(T)})().catch(console.error);
package/dist/shared.js CHANGED
@@ -14,6 +14,12 @@ export const API_KEY = process.env.BARIVIA_API_KEY ?? process.env.BARSOM_API_KEY
14
14
  export const FETCH_TIMEOUT_MS = parseInt(process.env.BARIVIA_FETCH_TIMEOUT_MS ?? "30000", 10);
15
15
  export const MAX_RETRIES = 2;
16
16
  export const RETRYABLE_STATUS = new Set([502, 503, 504]);
17
+ /**
18
+ * Single source of truth for the proxy version. Sent to the API as
19
+ * X-Barsom-Client-Version so the server can annotate tool guidance with the
20
+ * wrapper version each action requires. Keep in sync with package.json on bump.
21
+ */
22
+ export const CLIENT_VERSION = "0.9.0";
17
23
  /** User-facing links; keep aligned with barivia.se / api.barivia.se. */
18
24
  export const PUBLIC_SITE_ORIGIN = "https://barivia.se";
19
25
  /** Poll window for datasets(add_expression) / derive jobs (server-side work can exceed 30s). */
@@ -256,12 +262,18 @@ export async function apiCall(method, path, body, extraHeaders, requestTimeoutMs
256
262
  Authorization: `Bearer ${API_KEY}`,
257
263
  "Content-Type": contentType,
258
264
  "X-Request-ID": requestId,
265
+ "X-Barsom-Client-Version": CLIENT_VERSION,
259
266
  ...extraHeaders,
260
267
  };
261
268
  let serializedBody;
262
269
  if (body !== undefined) {
263
- serializedBody =
264
- contentType === "application/json" ? JSON.stringify(body) : String(body);
270
+ if (body instanceof Uint8Array) {
271
+ serializedBody = body; // pre-encoded bytes (e.g. gzipped CSV upload)
272
+ }
273
+ else {
274
+ serializedBody =
275
+ contentType === "application/json" ? JSON.stringify(body) : String(body);
276
+ }
265
277
  }
266
278
  const effectiveTimeout = requestTimeoutMs ?? FETCH_TIMEOUT_MS;
267
279
  const t0 = Date.now();
@@ -1,5 +1,6 @@
1
1
  import path from "node:path";
2
2
  import fs from "node:fs/promises";
3
+ import { gzipSync } from "node:zlib";
3
4
  import { z } from "zod";
4
5
  import { apiCall, getWorkspaceRootAsync, resolveFilePathForUpload, textResult, pollUntilComplete, POLL_DERIVE_MAX_MS, UPLOAD_DATASET_TIMEOUT_MS, } from "../shared.js";
5
6
  export function registerDatasetsTool(server) {
@@ -33,7 +34,7 @@ action=subset: Create a new dataset from a subset of an existing one. Requires n
33
34
  - Single filter object is also accepted (auto-wrapped).
34
35
  action=reduce_spectral: Run a pre-training reducer over an ordered block of numeric columns. All four methods produce one feature vector per row (rows in = rows out; only the column dimension is collapsed) and append derived columns to the dataset. Choose by data shape:
35
36
  - pca: top-k principal components — general first try when many columns are correlated (spectroscopy, gene panels, sensor arrays). Returns explained_variance_ratio.
36
- - log_sample: keep k columns at log-spaced indices — SAXS/scattering, audio frequency bands, attenuation curves (anywhere the index axis is logarithmically informative).
37
+ - log_sample: keep k columns at log-spaced indices — SAXS/WAXS & powder diffraction, log-frequency / octave-like audio, attenuation vs wavelength (UV–Vis–IR stacks), depth profiling, chromatography retention ladders — anywhere column order is exponential, logarithmic, or perceptually log-spaced.
37
38
  - uniform_sample: keep k columns at evenly-spaced indices — regularly-sampled time series, frame-by-frame features, evenly-binned histograms.
38
39
  - stats: 6 fixed per-row statistics (mean, std, min, max, skew, integral) — cheap baseline for any sequenced numeric block; k is ignored.
39
40
  Required params: name (prefix for derived columns), method, columns_block (ordered source column names ≥ 2), k (≥ 1, < length(columns_block); ignored for stats).
@@ -114,7 +115,7 @@ ESCALATION: If upload fails with column errors, open the file locally and verify
114
115
  if (ext !== ".csv" && ext !== ".tsv") {
115
116
  throw new Error("Only .csv and .tsv files can be uploaded as datasets.");
116
117
  }
117
- const MAX_UPLOAD_BYTES = 100 * 1024 * 1024; // 100 MB
118
+ const MAX_UPLOAD_BYTES = 256 * 1024 * 1024; // 256 MB (gzip keeps the wire payload small)
118
119
  try {
119
120
  const stat = await fs.stat(resolved);
120
121
  if (stat.size > MAX_UPLOAD_BYTES) {
@@ -135,10 +136,19 @@ ESCALATION: If upload fails with column errors, open the file locally and verify
135
136
  else {
136
137
  throw new Error("datasets(upload) requires file_path or csv_data. Prefer file_path for token efficiency.");
137
138
  }
138
- const data = (await apiCall("POST", "/v1/datasets", body, {
139
+ // gzip large CSVs to keep the wire payload (and the API's compressed-body
140
+ // cap) small; the API transparently decompresses. Small bodies stay plain.
141
+ const GZIP_THRESHOLD = 1024 * 1024; // 1 MB
142
+ const uploadHeaders = {
139
143
  "X-Dataset-Name": name,
140
144
  "Content-Type": "text/csv",
141
- }, UPLOAD_DATASET_TIMEOUT_MS));
145
+ };
146
+ let uploadBody = body;
147
+ if (Buffer.byteLength(body, "utf-8") > GZIP_THRESHOLD) {
148
+ uploadBody = gzipSync(Buffer.from(body, "utf-8"));
149
+ uploadHeaders["Content-Encoding"] = "gzip";
150
+ }
151
+ const data = (await apiCall("POST", "/v1/datasets", uploadBody, uploadHeaders, UPLOAD_DATASET_TIMEOUT_MS));
142
152
  const id = data.id ?? data.dataset_id;
143
153
  if (id != null)
144
154
  data.suggested_next_step = `Suggested next step: datasets(action=preview, dataset_id=${id}) to inspect columns before training.`;
@@ -35,12 +35,13 @@ export function registerInferenceTool(server) {
35
35
  | Action | Use when | Timing |
36
36
  |--------|----------|--------|
37
37
  | predict | Scoring rows against the trained map (new data OR the training set itself) | 5–120s |
38
+ | impute_column | Fill a numeric column (not used in training) by pooling observed values on the BMU plus topology neighbors (typically 6 on hex; periodic maps wrap) | 5–120s |
38
39
  | compare | Comparing hit distributions of a second dataset against training (drift, A/B) | 30–120s |
39
40
  | project_columns | Project one or more dataset columns onto the map (component planes); dataset can be training set or partial-feature set | 10–90s |
40
41
  | report | Get a report manifest (artifact keys + URLs) to build your own report in Quarto/Notebook/script | Immediate (sync) |
41
42
 
42
- Sync/async: predict and compare are async jobs. The proxy auto-polls and usually returns when the job completes. If it returns a job_id instead (e.g. timeout), poll jobs(action=status, job_id=...) then results(action=download, job_id=...) to retrieve the artifact.
43
- Artifacts: When complete, use results(action=download, job_id=<returned_job_id>) to get: predict (output="compact") → predictions.csv; predict (output="annotated") → enriched.csv; compare → density-diff figure (e.g. density_diff.png).
43
+ Sync/async: predict, impute_column, and compare are async jobs. The proxy auto-polls and usually returns when the job completes. If it returns a job_id instead (e.g. timeout), poll jobs(action=status, job_id=...) then results(action=download, job_id=...) to retrieve the artifact.
44
+ Artifacts: When complete, use results(action=download, job_id=<returned_job_id>) to get: predict (output="compact") → predictions.csv; predict (output="annotated") → annotated.csv; impute_column → imputed.csv; compare → density-diff figure (e.g. density_diff.png).
44
45
  report is the only synchronous inference action — returns manifest immediately; no job to poll.
45
46
  NOT FOR: Retraining or changing the map — all actions treat the trained map as frozen.
46
47
  ESCALATION: If any action returns "missing column", verify column names with datasets(action=preview). Column names are case-sensitive and must match the training feature set exactly.
@@ -51,7 +52,7 @@ action=predict: Score rows against the trained map.
51
52
  - rows (≤500 inline). Always treated as new data.
52
53
  Output style (output param, default "compact"):
53
54
  - "compact" → predictions.csv (row_id, bmu_x, bmu_y, bmu_node_index, cluster_id [, quantization_error, potential_anomaly]).
54
- - "annotated" → enriched.csv (the full source CSV with bmu_x, bmu_y, bmu_node_index, cluster_id appended). Requires a dataset (no inline rows). Replaces the previous inference(action=enrich) — migrate by passing output="annotated".
55
+ - "annotated" → annotated.csv (full source CSV with bmu_x, bmu_y, bmu_node_index, cluster_id appended). Requires a dataset (no inline rows).
55
56
  Regime auto-detected:
56
57
  - If the resolved dataset matches the parent training dataset, regime="training" and QE / qe_p95 / potential_anomaly fields are omitted from the compact output. QE on training data is fitting error, not a generalisation metric, and the p95 anomaly flag would be circular. Use a held-out dataset for quality assessment.
57
58
  - Otherwise regime="new" and the full QE columns are returned.
@@ -59,22 +60,32 @@ action=predict: Score rows against the trained map.
59
60
  Routing: prefer dataset_id for many rows or whenever the map uses irregular SIOM / GeneralTopology layouts — the async worker path is the supported batch scorer. Single-row rows take a fast stateless path that may return invalid_inference_input on some topologies; if so, retry with dataset_id (a one-row dataset is fine). FLooP-SIOM: use dataset_id predict first.
60
61
  When the scored set has at most ${PREDICT_PREVIEW_ROW_CAP} rows, completed responses include a short per-line preview in the tool text for chat agents.
61
62
 
63
+ action=impute_column: Map-local imputation as read-only post-processing (the trained map is frozen; not a held-out validity claim). Requires dataset_id + target_column. The dataset must contain all training features (same names and cyclic expansion as predict) plus the target column. target_column must NOT have been in jobs(train_map) columns — train without it, then impute. Pools finite target values from rows whose BMUs lie on this row's BMU and its topology neighbors (BMU + neighbors, often 7 nodes on hex interior; fewer on borders if the parent map is non-periodic; periodic hex wraps), aggregated neighbourhood-distance-weighted by default (weighting="distance" — closer nodes count more; weighting="uniform" for a flat pool). Excludes the current row from its own pool. only_missing (default true): keep observed values. impute_aggregation: mean or median of the pool. Optional cv_folds (2-20) writes quality.csv (held-out MAE/RMSE/R2); target_column_kind handles categorical (mode) / cumulative (warns). Output imputed.csv: row_id, target_original, target_imputed, impute_source (observed | imputed | insufficient_data), bmu_node_index, n_patch_nodes, n_pool_rows, pool_std, pool_p5, pool_p95.
64
+
62
65
  action=compare: dataset_id must refer to a dataset with the same feature set as training (same column names and preprocessing, including cyclic expansion). A = training dataset; B = cohort to compare. Density-diff: positive = B gained vs A; negative = A had more. Returns density-diff heatmap (e.g. density_diff.png).
63
66
  action=project_columns: Project one or more columns from a dataset onto the trained map. Pass dataset_id (the dataset containing the columns) and columns (array of column names). Uses cached BMUs when dataset is the training set; supports partial-feature mapping when dataset has only a subset of training features. Returns one component plane image per column. Get files via results(action=download, job_id=<returned_job_id>).
64
67
  action=report: Returns a report manifest for the given job_id (job must be completed). Includes figure_manifest (logical names → filenames), download_urls for all artifacts, cluster_summary when available, and summary metrics. Stakeholder report PDF (if generated) is available via results(action=download, job_id=<training_job_id>), filename e.g. report.pdf.`, {
65
68
  action: z
66
- .enum(["predict", "compare", "project_columns", "report"])
67
- .describe("predict: score rows; compare: drift/cohort diff heatmap; project_columns: project dataset columns onto map; report: manifest of primitives for custom report. (Note: the previous 'enrich' action is now predict with output=\"annotated\".)"),
69
+ .enum(["predict", "impute_column", "compare", "project_columns", "report"])
70
+ .describe("predict: score rows; impute_column: topology-neighbor pool imputation for a column not in training; compare: drift/cohort diff heatmap; project_columns: project dataset columns onto map; report: manifest of primitives for custom report."),
68
71
  job_id: z.string().describe("Job ID of a completed map training job"),
69
- dataset_id: z.string().optional().describe("action=predict/compare/project_columns: Dataset ID. predict=data to score (defaults to the training dataset when omitted); compare=dataset B; project_columns=dataset with columns to project."),
72
+ dataset_id: z.string().optional().describe("action=predict/impute_column/compare/project_columns: Dataset ID. predict=data to score (defaults to the training dataset when omitted); impute_column=dataset with training features + target_column; compare=dataset B; project_columns=dataset with columns to project."),
70
73
  columns: z.array(z.string()).optional().describe("action=project_columns: column names to project onto the map (must exist in the dataset)."),
71
74
  rows: z.array(z.record(z.string(), z.union([z.number(), z.string()]))).optional().describe("action=predict: inline rows to score (max 500). For a single inline row, raw categorical strings are allowed for baseline categorical_features models. Batch rows should remain numeric and match the training schema."),
72
- output: z.enum(["compact", "annotated"]).optional().default("compact").describe("action=predict: output style. compact = predictions.csv (default); annotated = enriched.csv (original CSV + BMU columns). Use annotated to get the training set with BMU labels appended (the former inference(action=enrich) workflow)."),
75
+ output: z.enum(["compact", "annotated"]).optional().default("compact").describe("action=predict: output style. compact = predictions.csv (default); annotated = annotated.csv (original rows plus bmu_x, bmu_y, bmu_node_index, cluster_id)."),
73
76
  colormap: z.string().optional().describe("action=compare: colormap for diff heatmap (default: balance). action=report: n/a."),
74
77
  output_format: z.enum(["png", "pdf", "svg"]).optional().default("png").describe("action=compare: output format for heatmap (default: png)"),
75
78
  output_dpi: z.enum(["standard", "retina", "print"]).optional().default("retina").describe("Resolution: standard (1x), retina (2x, default), print (4x)"),
76
79
  top_n: z.number().int().min(1).max(50).optional().default(10).describe("action=compare: number of top gained/lost nodes to report (default: 10)"),
77
- }, async ({ action, job_id, dataset_id, columns, rows, output, colormap, output_format, output_dpi, top_n }) => {
80
+ target_column: z.string().optional().describe("action=impute_column: numeric column to impute (must not be a training feature)."),
81
+ only_missing: z.boolean().optional().default(true).describe("action=impute_column: if true, leave observed values unchanged."),
82
+ impute_aggregation: z.enum(["mean", "median"]).optional().default("mean").describe("action=impute_column: aggregation over pooled neighbor rows."),
83
+ cv_folds: z.number().int().min(2).max(20).optional().describe("action=impute_column: if set (2-20), run k-fold cross-validation on observed target cells and emit quality.csv with MAE / RMSE / R2 (held-out). Omit to skip."),
84
+ target_column_kind: z.enum(["instantaneous", "cumulative", "categorical"]).optional()
85
+ .describe("action=impute_column: instantaneous (default) = pool mean/median; categorical = pool mode; cumulative = monotonic-counter (pool aggregation is rough; warns). A monotonic-counter warning is emitted automatically regardless."),
86
+ weighting: z.enum(["distance", "uniform"]).optional()
87
+ .describe("action=impute_column: distance (default) weights pooled values by map-neighbourhood proximity (closer BMU nodes count more); uniform is a flat pool."),
88
+ }, async ({ action, job_id, dataset_id, columns, rows, output, colormap, output_format, output_dpi, top_n, target_column, only_missing, impute_aggregation, cv_folds, target_column_kind, weighting }) => {
78
89
  const dpiMap = { standard: 1, retina: 2, print: 4 };
79
90
  const numericDpi = dpiMap[output_dpi ?? "retina"] ?? 2;
80
91
  if (action === "predict") {
@@ -122,7 +133,7 @@ action=report: Returns a report manifest for the given job_id (job must be compl
122
133
  const regime = String(summary.regime ?? "new");
123
134
  const effectiveStyle = String(summary.output_style ?? outputStyle);
124
135
  const isAnnotated = effectiveStyle === "annotated";
125
- const artifactName = isAnnotated ? "enriched.csv" : "predictions.csv";
136
+ const artifactName = isAnnotated ? "annotated.csv" : "predictions.csv";
126
137
  const headerLine = isAnnotated
127
138
  ? `Annotated dataset ready — job: ${predictJobId}`
128
139
  : `Predictions complete — job: ${predictJobId}`;
@@ -133,7 +144,7 @@ action=report: Returns a report manifest for the given job_id (job must be compl
133
144
  ? `Mean QE: ${summary.mean_qe !== undefined ? Number(summary.mean_qe).toFixed(4) : "N/A"} | Max QE: ${summary.max_qe !== undefined ? Number(summary.max_qe).toFixed(4) : "N/A"} | qe_p95: ${summary.qe_p95 !== undefined ? Number(summary.qe_p95).toFixed(4) : "N/A"}`
134
145
  : "";
135
146
  const outputLine = isAnnotated
136
- ? `Output: enriched.csv (original CSV + bmu_x, bmu_y, bmu_node_index, cluster_id appended). Clusters: ${summary.n_clusters ?? Object.keys(summary.cluster_counts ?? {}).length}.`
147
+ ? `Output: annotated.csv (original CSV + bmu_x, bmu_y, bmu_node_index, cluster_id appended). Clusters: ${summary.n_clusters ?? Object.keys(summary.cluster_counts ?? {}).length}.`
137
148
  : (regime === "training"
138
149
  ? `Output: predictions.csv (row_id, bmu_x, bmu_y, bmu_node_index, cluster_id). Clusters: ${Object.keys(summary.cluster_counts ?? {}).length}.`
139
150
  : `Output: predictions.csv (row_id, bmu_x, bmu_y, bmu_node_index, cluster_id, quantization_error, potential_anomaly). Summary includes mean_qe, max_qe, qe_p95. Clusters: ${Object.keys(summary.cluster_counts ?? {}).length}.`);
@@ -152,6 +163,44 @@ action=report: Returns a report manifest for the given job_id (job must be compl
152
163
  return { content: [{ type: "text", text: `inference(predict) job ${predictJobId} failed: ${poll.error ?? "unknown error"}` }] };
153
164
  return { content: [{ type: "text", text: `inference(predict) job ${predictJobId} submitted. Poll with jobs(action=status, job_id="${predictJobId}").` }] };
154
165
  }
166
+ if (action === "impute_column") {
167
+ if (!dataset_id)
168
+ throw new Error("inference(impute_column) requires dataset_id");
169
+ if (!target_column?.trim())
170
+ throw new Error("inference(impute_column) requires target_column");
171
+ const body = {
172
+ dataset_id,
173
+ target_column: target_column.trim(),
174
+ only_missing: only_missing ?? true,
175
+ aggregation: impute_aggregation ?? "mean",
176
+ };
177
+ if (cv_folds !== undefined)
178
+ body.cv_folds = cv_folds;
179
+ if (target_column_kind !== undefined)
180
+ body.target_column_kind = target_column_kind;
181
+ if (weighting !== undefined)
182
+ body.weighting = weighting;
183
+ const data = (await apiCall("POST", `/v1/results/${job_id}/impute_column`, body));
184
+ const imputeJobId = data.id;
185
+ const poll = await pollUntilComplete(imputeJobId, 120_000);
186
+ if (poll.status === "completed") {
187
+ const results = (await apiCall("GET", `/v1/results/${imputeJobId}`));
188
+ const summary = (results.summary ?? {});
189
+ const urls = (results.download_urls ?? {});
190
+ return { content: [{ type: "text", text: [
191
+ `Impute column complete — job: ${imputeJobId}`,
192
+ `Target: ${summary.target_column ?? target_column} | aggregation: ${summary.aggregation ?? impute_aggregation} | only_missing: ${summary.only_missing ?? only_missing}`,
193
+ `Rows: ${summary.n_rows ?? "?"} | imputed rows (source=imputed): ${summary.n_imputed ?? "?"} | insufficient_data: ${summary.n_insufficient ?? "?"}`,
194
+ `Mean patch nodes: ${summary.mean_patch_nodes !== undefined ? Number(summary.mean_patch_nodes).toFixed(2) : "N/A"} (BMU + neighbors; hex interior often ~7).`,
195
+ urls["imputed.csv"] ? `Download imputed.csv: ${urls["imputed.csv"]}` : "Use results(action=get, download) for URLs.",
196
+ "",
197
+ "Map-local estimates only — not a substitute for held-out validation.",
198
+ ].join("\n") }] };
199
+ }
200
+ if (poll.status === "failed")
201
+ return { content: [{ type: "text", text: `inference(impute_column) job ${imputeJobId} failed: ${poll.error ?? "unknown error"}` }] };
202
+ return { content: [{ type: "text", text: `inference(impute_column) job ${imputeJobId} submitted. Poll with jobs(action=status, job_id="${imputeJobId}").` }] };
203
+ }
155
204
  if (action === "compare") {
156
205
  if (!dataset_id)
157
206
  throw new Error("inference(compare) requires dataset_id (dataset B)");
@@ -5,7 +5,7 @@ export const JOBS_DESCRIPTION_BASE = `Manage and inspect jobs.
5
5
  | Action | Use when |
6
6
  |--------|----------|
7
7
  | status | Polling after any async job submission — call every 10–15s |
8
- | list | Finding job IDs, checking what is pending/completed, reviewing hyperparameters. Response includes job_type (train_map, report, recolor, project, transition_flow, compare, predict, reduce_spectral) to filter or display. |
8
+ | list | Finding job IDs, checking what is pending/completed, reviewing hyperparameters. Response includes job_type (train_map, report, recolor, project, transition_flow, compare, predict, impute_column, annotated_dataset, reduce_spectral) to filter or display. |
9
9
  | compare | Picking the best training run from a set of completed jobs |
10
10
  | train_map | Submitting a new map training job — returns job_id for polling |
11
11
  | train_siom_map | Submitting a self-interacting map training job — same map flow with SIOM coverage control |
@@ -251,8 +251,9 @@ export function registerJobsTool(server, description) {
251
251
  .optional()
252
252
  .describe("Optional run label (≤120 chars) for train_map / train_siom_map / train_floop_siom — appears in jobs(list) and the jobs(compare) table; sanitized server-side. Useful for sweeps (e.g. label=\"sweep_periodic_true\")."),
253
253
  preset: z.enum(["quick", "standard", "refined", "high_res"]).optional(),
254
- grid_x: z.number().int().optional(),
255
- grid_y: z.number().int().optional(),
254
+ grid_x: z.number().int().optional()
255
+ .describe("Grid width. Omit grid_x AND grid_y (and preset) to auto-size the map (~5·√√n per side); the result reports hit_stats.active_node_fraction and a grid_suggestion when too many nodes are dead."),
256
+ grid_y: z.number().int().optional().describe("Grid height. See grid_x for auto-sizing."),
256
257
  epochs: z.preprocess((v) => {
257
258
  if (v === undefined || v === null)
258
259
  return v;
@@ -29,7 +29,7 @@ action=export: Structured data exports. Use export_type= to choose what to expor
29
29
  - export_type=nodes: per-node hit count + feature stats. Profile clusters and operating modes.
30
30
 
31
31
  action=download: Save figures to disk. Use so user can open, share, or version files locally.
32
- - folder: e.g. "." or "./results". Interpreted relative to the client's current working directory (or workspace). If job has a label, a named subfolder may be created.
32
+ - folder: e.g. "." or "./results". Interpreted relative to the client's current working directory (or workspace). Files are always written into a per-job subfolder (the job label, else the job_id) under this folder, so downloading several jobs into one folder never overwrites the shared filenames every job emits (e.g. summary.json).
33
33
  - figures: "all" (default) or array of filenames.
34
34
  - include_json: also save summary.json.
35
35
 
@@ -64,7 +64,7 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
64
64
  folder: z
65
65
  .string()
66
66
  .optional()
67
- .describe("action=download: directory path to save files (e.g. '.' or './results'). Relative to the client's current working directory (or workspace)."),
67
+ .describe("action=download: directory path to save files (e.g. '.' or './results'). Relative to the client's current working directory (or workspace). Files land in a per-job subfolder (job label or job_id) under this path."),
68
68
  colormap: z
69
69
  .string()
70
70
  .optional()
@@ -202,17 +202,14 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
202
202
  }
203
203
  }
204
204
  else if (jobType === "enrich_dataset") {
205
- // Historical job kind: pre-merge `enrich_dataset` worker output. New jobs
206
- // never produce this — they use predict with output_style="annotated"
207
- // (job_type="predict"). Kept as a read-only display path so old completed
208
- // jobs remain reviewable.
205
+ // Older exports: summary.job_type from stored results (read-only display).
209
206
  const files = summary.files ?? [];
207
+ const csvArtifact = files.find((f) => typeof f === "string" && f.endsWith(".csv")) ?? "artifact.csv";
210
208
  content.push({ type: "text", text: [
211
- `Annotated Dataset (legacy enrich_dataset job) — ${resultsHeader}`,
209
+ `Annotated dataset (older export) — ${resultsHeader}`,
212
210
  `Parent map job: ${summary.parent_job_id ?? "N/A"} | Rows: ${summary.n_rows ?? summary.n_samples ?? 0}`,
213
211
  `Output: ${files.filter((f) => f !== "summary.json").join(", ")}`,
214
- `Use results(action=download, job_id="${job_id}") to save enriched.csv.`,
215
- `(For new jobs, use inference(action=predict, output="annotated").)`,
212
+ `Use results(action=download, job_id="${job_id}") to save ${csvArtifact}.`,
216
213
  ].join("\n") });
217
214
  }
218
215
  else if (jobType === "predict") {
@@ -227,7 +224,7 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
227
224
  const metricsLine = (regime !== "training" && !isAnnotated)
228
225
  ? `Mean QE: ${summary.mean_qe !== undefined ? Number(summary.mean_qe).toFixed(4) : "N/A"} | Max QE: ${summary.max_qe !== undefined ? Number(summary.max_qe).toFixed(4) : "N/A"} | qe_p95: ${summary.qe_p95 !== undefined ? Number(summary.qe_p95).toFixed(4) : "N/A"}`
229
226
  : "";
230
- const downloadName = isAnnotated ? "enriched.csv" : "predictions.csv";
227
+ const downloadName = isAnnotated ? "annotated.csv" : "predictions.csv";
231
228
  content.push({ type: "text", text: [
232
229
  `${headerLabel} — ${resultsHeader}`,
233
230
  `Parent map job: ${summary.parent_job_id ?? "N/A"} | Regime: ${regime} | Style: ${outputStyle}`,
@@ -238,6 +235,18 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
238
235
  trainingCaveat,
239
236
  ].filter(Boolean).join("\n") });
240
237
  }
238
+ else if (jobType === "impute_column") {
239
+ const files = summary.files ?? [];
240
+ content.push({ type: "text", text: [
241
+ `Impute column — ${resultsHeader}`,
242
+ `Parent map job: ${summary.parent_job_id ?? "N/A"} | Target: ${summary.target_column ?? "?"} | Rows: ${summary.n_rows ?? "?"}`,
243
+ `Aggregation: ${summary.aggregation ?? "?"} | only_missing: ${summary.only_missing ?? "?"} | imputed: ${summary.n_imputed ?? "?"} | insufficient: ${summary.n_insufficient ?? "?"}`,
244
+ `Mean patch nodes: ${summary.mean_patch_nodes !== undefined ? Number(summary.mean_patch_nodes).toFixed(2) : "N/A"}`,
245
+ `Output: ${files.filter((f) => f !== "summary.json").join(", ")}`,
246
+ `Use results(action=download, job_id="${job_id}") to save imputed.csv.`,
247
+ "Map-local pool estimates — not held-out validated predictions.",
248
+ ].join("\n") });
249
+ }
241
250
  else if (jobType === "reduce_spectral") {
242
251
  const method = String(summary.method ?? "?");
243
252
  const sourceCols = summary.source_columns ?? [];
@@ -554,7 +563,7 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
554
563
  const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
555
564
  const files = summary.files ?? [];
556
565
  const jobType = summary.job_type ?? "train_som";
557
- const needsAllFiles = ["enrich_dataset", "predict", "compare_datasets"].includes(jobType);
566
+ const needsAllFiles = ["enrich_dataset", "predict", "impute_column", "compare_datasets"].includes(jobType);
558
567
  const isImage = (f) => f.endsWith(".png") || f.endsWith(".svg") || f.endsWith(".pdf");
559
568
  let toDownload;
560
569
  if (figures === "all" || figures === "images" || figures === undefined) {
@@ -569,9 +578,11 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
569
578
  toDownload = files.filter(isImage);
570
579
  }
571
580
  let resolvedDir = sandboxPath(folder, await getWorkspaceRootAsync(server));
572
- if (jobLabel && (folder === "." || folder === "./results" || folder === "results")) {
573
- resolvedDir = path.join(resolvedDir, jobLabel);
574
- }
581
+ // Always namespace each job's files into its own subfolder so that
582
+ // downloading multiple jobs (or job types) into the same folder never
583
+ // overwrites the shared filenames every job emits (e.g. summary.json).
584
+ const jobSubfolder = (jobLabel ?? job_id).replace(/[^a-zA-Z0-9_.-]/g, "_");
585
+ resolvedDir = path.join(resolvedDir, jobSubfolder);
575
586
  if (jobType === "render_variant" && summary.colormap) {
576
587
  const colormapDir = String(summary.colormap).replace(/[^a-zA-Z0-9_-]/g, "_");
577
588
  resolvedDir = path.join(resolvedDir, colormapDir);
@@ -586,7 +597,8 @@ NOT FOR: Jobs that haven't completed. Use jobs(action=status) to check first.`,
586
597
  }
587
598
  catch { /* skip missing files */ }
588
599
  }
589
- return { content: [{ type: "text", text: saved.length > 0 ? `Saved ${saved.length} file(s) to ${folder}: ${saved.join(", ")}` : `No files saved. Check job_id and that the job is completed.` }] };
600
+ const savedDir = path.join(folder, jobSubfolder);
601
+ return { content: [{ type: "text", text: saved.length > 0 ? `Saved ${saved.length} file(s) to ${savedDir}: ${saved.join(", ")}` : `No files saved. Check job_id and that the job is completed.` }] };
590
602
  }
591
603
  if (action === "recolor") {
592
604
  if (!colormap)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@barivia/barsom-mcp",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "description": "barSOM MCP proxy — connect any MCP client to the barSOM cloud API for Self-Organizing Map analytics",
5
5
  "keywords": [
6
6
  "mcp",