@barivia/barmesh-mcp 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,10 +13,14 @@ form on a shared self-organizing map (SOM)**:
13
13
  - **`barmesh_mesh_convergence`** — trains one SOM on all meshes (joint-normalized), projects
14
14
  each mesh to a volume-weighted fingerprint, and computes **symmetric KL** and
15
15
  **Wasserstein-1 (EMD)** distances stepwise and against a reference mesh, with publication
16
- figures and an advisory convergence reading. Submit enqueues **`cfd_prepare`** on worker-io
17
- when needed; the proxy auto-polls `prepare_job_id` before the mesh job runs. Default
18
- **`defer_figures=true`** **`cfd_finalize`** on worker-io; **`barmesh_jobs(status)`** auto-polls
19
- **`finalize_job_id`** when figures are deferred.
16
+ figures and an advisory convergence reading. The SOM features are preprocessed by the
17
+ same staged pipeline as barsom training, so any dataset (small or large, CSV/gzip)
18
+ is handled out-of-core by default; optional `transforms`, `normalize`,
19
+ `normalization_methods`, and `row_range` give the same preprocessing controls. Submit
20
+ enqueues **`prepare_training_matrix`** on worker-io when the dataset is staged; the proxy
21
+ auto-polls `prepare_job_id` before the mesh job runs. Default **`defer_figures=true`** →
22
+ **`cfd_finalize`** on worker-io; **`barmesh_jobs(status)`** auto-polls **`finalize_job_id`**
23
+ when figures are deferred.
20
24
  - **`barmesh_richardson`** — classical three-level Richardson extrapolation / Grid
21
25
  Convergence Index (GCI) on scalar quantities of interest.
22
26
 
@@ -48,7 +52,7 @@ API key; otherwise the analysis calls return HTTP 403. Contact Barivia to enable
48
52
  |------|---------|
49
53
  | `barmesh_guide_workflow` | Workflow + tool map (tier-scoped). Call first. |
50
54
  | `barmesh_prepare_mesh_data` | Recipe for the combined per-cell CSV. |
51
- | `barmesh_datasets` | Upload / preview / list the mesh CSV. |
55
+ | `barmesh_datasets` | Upload / preview / list / get / subset / delete the mesh CSV. |
52
56
  | `barmesh_mesh_convergence` | SOM fingerprint distances (async job). |
53
57
  | `barmesh_richardson` | Richardson/GCI on scalar QoIs (async job). |
54
58
  | `barmesh_jobs` | Poll job status / list jobs (auto-polls CFD prepare + finalize when applicable). |
@@ -65,6 +69,12 @@ One combined CSV: one row per cell, a mesh-label column (`mesh_id`), the physica
65
69
  you choose as `feature_columns` (e.g. `p`, `U_mag`, `k`, `log_epsilon`, `T`), and a
66
70
  cell-volume column (`V`). Use `barmesh_prepare_mesh_data` for the full recipe.
67
71
 
72
+ **Upload formats:** `.csv`, `.tsv`, `.csv.gz`, or `.tsv.gz`. For large per-cell tables
73
+ (≥64 MB), prefer `.csv.gz` — uploads stream directly to object storage with presigned PUT.
74
+ Use `barmesh_datasets(action=get, dataset_id=...)` to check staging status after upload;
75
+ `barmesh_datasets(action=subset, sample_n=...)` to downsample huge tables server-side.
76
+ Parquet staging is supported by the API but not yet exposed as an MCP upload format.
77
+
68
78
  ## Environment variables
69
79
 
70
80
  | Variable | Default | Purpose |
@@ -1,6 +1,8 @@
1
1
  import { pollUntilComplete } from "./shared.js";
2
2
  /**
3
- * When the API enqueues cfd_prepare for mesh-convergence, poll it before the mesh job runs.
3
+ * Mesh-convergence on a staged dataset enqueues a preprocessing job
4
+ * (prepare_training_matrix) first; the submit response carries its id as
5
+ * `prepare_job_id`. Poll it to completion before the mesh job runs.
4
6
  */
5
7
  export async function pollCfdPrepareIfPresent(data, label, timeoutMs = 600_000) {
6
8
  const prepareJobId = data.prepare_job_id;
@@ -8,10 +10,10 @@ export async function pollCfdPrepareIfPresent(data, label, timeoutMs = 600_000)
8
10
  return null;
9
11
  const poll = await pollUntilComplete(prepareJobId, timeoutMs);
10
12
  if (poll.status === "failed") {
11
- throw new Error(`${label}: cfd_prepare job ${prepareJobId} failed: ${poll.error ?? "unknown error"}`);
13
+ throw new Error(`${label}: data preparation job ${prepareJobId} failed: ${poll.error ?? "unknown error"}`);
12
14
  }
13
15
  if (poll.status !== "completed") {
14
- throw new Error(`${label}: cfd_prepare job ${prepareJobId} did not complete (status=${poll.status})`);
16
+ throw new Error(`${label}: data preparation job ${prepareJobId} did not complete (status=${poll.status})`);
15
17
  }
16
18
  return prepareJobId;
17
19
  }
package/dist/shared.js CHANGED
@@ -4,10 +4,12 @@
4
4
  * remains a thin HTTPS client to the same Barivia API (no domain logic here).
5
5
  */
6
6
  import fs from "node:fs/promises";
7
- import { createReadStream } from "node:fs";
7
+ import { createReadStream, createWriteStream } from "node:fs";
8
8
  import { createGzip } from "node:zlib";
9
- import { createHash } from "node:crypto";
9
+ import { createHash, randomUUID } from "node:crypto";
10
10
  import { Readable } from "node:stream";
11
+ import { pipeline } from "node:stream/promises";
12
+ import os from "node:os";
11
13
  import path from "node:path";
12
14
  import { fileURLToPath } from "node:url";
13
15
  import { logInfo } from "./logger.js";
@@ -20,7 +22,7 @@ export const FETCH_TIMEOUT_MS = parseInt(process.env.BARIVIA_FETCH_TIMEOUT_MS ??
20
22
  export const MAX_RETRIES = 2;
21
23
  export const RETRYABLE_STATUS = new Set([502, 503, 504]);
22
24
  /** Single source of truth for the proxy version. Keep in sync with package.json on bump. */
23
- export const CLIENT_VERSION = "0.3.1";
25
+ export const CLIENT_VERSION = "0.4.1";
24
26
  export const PUBLIC_SITE_ORIGIN = "https://barivia.se";
25
27
  /** Large per-cell CSV uploads may exceed the default fetch timeout. */
26
28
  export const UPLOAD_DATASET_TIMEOUT_MS = 180_000;
@@ -40,27 +42,56 @@ export async function streamFileSha256(srcPath) {
40
42
  s.on("error", reject);
41
43
  });
42
44
  }
43
- /** Stream a local file through gzip directly to a presigned PUT URL (e.g. R2). */
44
- export async function putPresignedStream(url, srcPath, contentType, timeoutMs = PRESIGNED_PUT_TIMEOUT_MS) {
45
- const gz = createReadStream(srcPath).pipe(createGzip());
46
- const webStream = Readable.toWeb(gz);
45
+ /** Turn a raw S3/R2 presigned-PUT error into a clean, actionable message. */
46
+ function presignedPutError(status, bodyText) {
47
+ const snippet = bodyText.slice(0, 200);
48
+ if (status === 411 || /MissingContentLength/i.test(bodyText)) {
49
+ return new Error("Upload rejected by storage: the Content-Length header was missing. " +
50
+ "This is a client bug — please update @barivia/barmesh-mcp to the latest version.");
51
+ }
52
+ if (status === 413 || /EntityTooLarge|entity is too large/i.test(bodyText)) {
53
+ return new Error("Upload rejected by storage: file exceeds the maximum upload size.");
54
+ }
55
+ if (status === 403) {
56
+ return new Error("Upload rejected by storage: the presigned URL expired or was invalid; retry barmesh_datasets(action=upload).");
57
+ }
58
+ return new Error(`Presigned upload failed: HTTP ${status} ${snippet}`);
59
+ }
60
+ /**
61
+ * Stream a local file directly to a presigned PUT URL (e.g. R2), gzip-compressing
62
+ * on the way unless the source is already gzipped. Never materializes the payload
63
+ * in process memory: when compression is needed we gzip to a temp file first, then
64
+ * PUT that file with an explicit Content-Length.
65
+ */
66
+ export async function putPresignedStream(url, srcPath, contentType, timeoutMs = PRESIGNED_PUT_TIMEOUT_MS, alreadyGzipped = false) {
67
+ let putPath = srcPath;
68
+ let tempPath;
69
+ if (!alreadyGzipped) {
70
+ tempPath = path.join(os.tmpdir(), `barmesh-upload-${randomUUID()}.csv.gz`);
71
+ await pipeline(createReadStream(srcPath), createGzip(), createWriteStream(tempPath));
72
+ putPath = tempPath;
73
+ }
47
74
  const controller = new AbortController();
48
75
  const timer = setTimeout(() => controller.abort(), timeoutMs);
49
76
  try {
77
+ const contentLength = (await fs.stat(putPath)).size;
78
+ const webStream = Readable.toWeb(createReadStream(putPath));
50
79
  const resp = await fetch(url, {
51
80
  method: "PUT",
52
81
  body: webStream,
53
- headers: { "Content-Type": contentType },
82
+ headers: { "Content-Type": contentType, "Content-Length": String(contentLength) },
54
83
  duplex: "half",
55
84
  signal: controller.signal,
56
85
  });
57
86
  if (!resp.ok) {
58
87
  const t = await resp.text().catch(() => "");
59
- throw new Error(`Presigned upload failed: HTTP ${resp.status} ${t.slice(0, 200)}`);
88
+ throw presignedPutError(resp.status, t);
60
89
  }
61
90
  }
62
91
  finally {
63
92
  clearTimeout(timer);
93
+ if (tempPath)
94
+ await fs.rm(tempPath, { force: true }).catch(() => { });
64
95
  }
65
96
  }
66
97
  // ---------------------------------------------------------------------------
package/dist/tools/cfd.js CHANGED
@@ -27,6 +27,10 @@ COMMON MISTAKES: omitting feature_columns (required); choosing a reference_mesh
27
27
  emd_method: z.enum(["exact", "sinkhorn"]).optional().describe("EMD solver: exact LP (default) or sinkhorn (fast approximation for large grids)"),
28
28
  component_planes_physical: z.boolean().optional().describe("Physical-scale component-plane colorbars (default true)"),
29
29
  figures: z.boolean().optional().describe("Generate publication figures (default true)"),
30
+ transforms: z.record(z.enum(["log", "log1p", "log10", "sqrt", "square", "abs", "invert", "none"])).optional().describe("Per-feature transform applied before normalization (e.g. log1p to compress k/epsilon/omega). Same preprocessing engine as barsom training."),
31
+ normalize: z.union([z.enum(["all", "auto", "mad", "sigmoidal", "sepd"]), z.array(z.string())]).optional().describe("Normalization mode for SOM features (default auto = joint z-score of non-cyclic features). Use mad for heavy-tailed channels."),
32
+ normalization_methods: z.record(z.enum(["zscore", "mad", "sigmoidal", "sepd", "none"])).optional().describe("Per-feature normalization override; keys must be in feature_columns."),
33
+ row_range: z.tuple([z.number().int().min(1), z.number().int().min(1)]).optional().describe("1-based inclusive [start, end] row slice applied during preprocessing (and to mesh labels / cell volumes)."),
30
34
  label: z.string().optional().describe("Optional job label"),
31
35
  }, async (args) => {
32
36
  const { dataset_id, label, ...rest } = args;
@@ -4,54 +4,140 @@ import { z } from "zod";
4
4
  import fs from "node:fs/promises";
5
5
  import path from "node:path";
6
6
  import { registerAuditedTool } from "../audit.js";
7
- import { apiCall, resolveFilePathForUpload, textResult, pollUntilComplete, UPLOAD_DATASET_TIMEOUT_MS, LARGE_UPLOAD_BYTES, PRESIGNED_PUT_TIMEOUT_MS, POLL_STAGE_MAX_MS, streamFileSha256, putPresignedStream, } from "../shared.js";
7
+ import { apiCall, getWorkspaceRootAsync, resolveFilePathForUpload, textResult, pollUntilComplete, UPLOAD_DATASET_TIMEOUT_MS, LARGE_UPLOAD_BYTES, PRESIGNED_PUT_TIMEOUT_MS, POLL_STAGE_MAX_MS, streamFileSha256, putPresignedStream, } from "../shared.js";
8
+ import { GZIP_UPLOAD_HINT } from "../upload_hints.js";
9
+ /**
10
+ * Normalize a nullable string field from the API. Returns "" for absent values,
11
+ * empty strings, and the literal SQL/serialization sentinels "missing"/"null".
12
+ */
13
+ function cleanNullable(v) {
14
+ if (v == null)
15
+ return "";
16
+ const s = String(v).trim();
17
+ return s === "" || s.toLowerCase() === "missing" || s.toLowerCase() === "null" ? "" : s;
18
+ }
8
19
  export function registerDatasetsTool(server) {
9
- registerAuditedTool(server, "barmesh_datasets", `Upload, preview, or list the combined per-cell mesh CSV used for convergence analysis.
20
+ registerAuditedTool(server, "barmesh_datasets", `Upload, preview, list, get, subset, or delete the combined per-cell mesh CSV used for convergence analysis.
21
+
22
+ Formats: plain CSV/TSV or gzip (.csv.gz / .tsv.gz). For files above ~64 MB, prefer .csv.gz (often 2–3× smaller); large uploads use presigned direct-to-storage PUT and accept gzip bodies.
10
23
 
11
24
  | Action | Use when |
12
25
  |--------|----------|
13
26
  | upload | You have prepared a combined per-cell CSV (mesh_id + feature columns + cell volume V). Do this first. |
14
27
  | preview | After upload — verify the mesh column, feature columns, and volume column are present and numeric. |
28
+ | get | Fetch one dataset by id — status, staging fields, ingest_error (use after upload or when staging is slow). |
15
29
  | list | Find dataset IDs for analysis. |
30
+ | subset | Shrink a huge per-cell table server-side (row_range, filters, or sample_n). |
31
+ | delete | Remove a dataset permanently. |
32
+
33
+ action=upload: PREFER file_path — server reads from workspace root (token-efficient). Accepts .csv, .tsv, .csv.gz, .tsv.gz. Use csv_data only for small inline pastes (<10KB). If plain CSV exceeds the 5 GB upload cap, gzip it first (.csv.gz).
16
34
 
17
35
  BEST FOR: One combined CSV holding all meshes of a refinement study (one row per cell, a mesh label column, the physical channels, and a cell-volume column).
18
36
  NOT FOR: Raw OpenFOAM case directories — extract a per-cell CSV first (see barmesh_prepare_mesh_data).
19
37
  COMMON MISTAKES: omitting the cell-volume column (defaults to equal weights, which weakens the fingerprint); inconsistent feature columns across meshes.
20
38
  ESCALATION: If preview shows a feature column as non-numeric, fix the extraction and re-upload.`, {
21
- action: z.enum(["upload", "preview", "list"]).describe("upload: add the combined CSV; preview: inspect columns; list: see datasets"),
22
- name: z.string().optional().describe("Dataset name (required for upload)"),
23
- file_path: z.string().optional().describe("Path to the combined CSV (PREFERRED): absolute, file:// URI, or relative to the workspace root"),
39
+ action: z
40
+ .enum(["upload", "preview", "list", "get", "subset", "delete"])
41
+ .describe("upload: add CSV or .csv.gz; preview: inspect columns; list: see all datasets; get: fetch one dataset metadata (status/staging); subset: create filtered subset; delete: remove dataset"),
42
+ name: z.string().optional().describe("Dataset name (required for upload and subset)"),
43
+ file_path: z
44
+ .string()
45
+ .optional()
46
+ .describe("Path to local CSV or .csv.gz (PREFERRED): absolute path, file:// URI, or path relative to the workspace root. NOTE: relative paths resolve against the MCP workspace root — in Cursor/IDE clients that root is often the MCP install dir, not your project, so set BARIVIA_WORKSPACE_ROOT in the MCP config env (or pass an absolute path) if a relative path is 'not accessible'. Use .csv.gz for large mesh tables."),
24
47
  csv_data: z.string().optional().describe("Inline CSV string for small pastes only (<10KB). Prefer file_path."),
25
- dataset_id: z.string().optional().describe("Dataset ID (required for preview)"),
48
+ dataset_id: z.string().optional().describe("Dataset ID (required for preview, get, subset, and delete)"),
26
49
  n_rows: z.number().int().optional().default(5).describe("Sample rows to return (preview only)"),
27
- }, async (args) => {
28
- const { action, name, file_path, csv_data, dataset_id, n_rows } = args;
50
+ row_range: z
51
+ .tuple([z.number().int(), z.number().int()])
52
+ .optional()
53
+ .describe("For subset: [start, end] 1-based inclusive row range (e.g. [1, 2000])"),
54
+ filters: z.preprocess((v) => {
55
+ if (v === undefined || v === null)
56
+ return v;
57
+ if (Array.isArray(v))
58
+ return v;
59
+ if (typeof v === "object" && v !== null && "column" in v)
60
+ return [v];
61
+ return v;
62
+ }, z
63
+ .array(z.object({
64
+ column: z.string(),
65
+ op: z.enum(["eq", "ne", "in", "gt", "lt", "gte", "lte", "between"]),
66
+ value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
67
+ }))
68
+ .optional()
69
+ .describe("For subset: filter conditions (AND logic). Single object or array.")),
70
+ filter: z
71
+ .object({
72
+ column: z.string(),
73
+ op: z.enum(["eq", "ne", "in", "gt", "lt", "gte", "lte", "between"]),
74
+ value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
75
+ })
76
+ .optional()
77
+ .describe("Deprecated — use filters instead. Single filter condition."),
78
+ sample_n: z
79
+ .number()
80
+ .int()
81
+ .min(1)
82
+ .optional()
83
+ .describe("action=subset: keep a random N-row sample (seeded, row order preserved). Use to shrink a huge table server-side."),
84
+ sample_seed: z
85
+ .number()
86
+ .int()
87
+ .optional()
88
+ .describe("action=subset: RNG seed for sample_n (default 42)."),
89
+ }, async ({ action, name, file_path, csv_data, dataset_id, n_rows, row_range, filters, filter, sample_n, sample_seed, }) => {
29
90
  if (action === "upload") {
30
91
  if (!name)
31
92
  throw new Error("barmesh_datasets(upload) requires name.");
32
93
  let body;
33
94
  if (file_path && file_path.length > 0) {
34
- // Preflight: warm plan/limits and reject over-limit uploads before reading the file.
35
95
  await apiCall("GET", "/v1/system/info");
36
96
  const resolved = await resolveFilePathForUpload(file_path, server);
37
- const ext = path.extname(resolved).toLowerCase();
38
- if (ext !== ".csv" && ext !== ".tsv") {
39
- throw new Error("Only .csv and .tsv files can be uploaded as datasets.");
97
+ const lower = resolved.toLowerCase();
98
+ const isGzipInput = lower.endsWith(".gz");
99
+ const baseExt = path.extname(isGzipInput ? lower.slice(0, -3) : lower);
100
+ if (baseExt !== ".csv" && baseExt !== ".tsv") {
101
+ throw new Error("Only .csv, .tsv, .csv.gz, or .tsv.gz files can be uploaded as datasets.");
40
102
  }
41
103
  const HARD_MAX_BYTES = 5 * 1024 * 1024 * 1024; // 5 GB
42
- const stat = await fs.stat(resolved);
104
+ let stat;
105
+ try {
106
+ stat = await fs.stat(resolved);
107
+ }
108
+ catch {
109
+ throw new Error(`File not accessible at resolved path. Easiest fix: pass an ABSOLUTE path ` +
110
+ `(e.g. "/home/you/project/data.csv" or "C:\\\\Users\\\\you\\\\data.csv") or a file:// URI. ` +
111
+ `Relative paths resolve against the MCP workspace root (current: ${await getWorkspaceRootAsync(server)}); ` +
112
+ `set BARIVIA_WORKSPACE_ROOT in your MCP config env to your project directory to use them.`);
113
+ }
43
114
  if (stat.size > HARD_MAX_BYTES) {
44
- throw new Error(`File too large (${(stat.size / 1024 / 1024 / 1024).toFixed(2)} GB). Maximum upload size is 5 GB.`);
115
+ const gzipHint = isGzipInput ? "" : ` ${GZIP_UPLOAD_HINT}`;
116
+ throw new Error(`File too large (${(stat.size / 1024 / 1024 / 1024).toFixed(2)} GB). Maximum upload size is 5 GB.${gzipHint}`);
45
117
  }
46
118
  if (stat.size >= LARGE_UPLOAD_BYTES) {
47
119
  const idem = await streamFileSha256(resolved);
48
- const init = (await apiCall("POST", "/v1/datasets/upload-url", { name, size_bytes: stat.size }, { "Idempotency-Key": idem }));
120
+ let init;
121
+ try {
122
+ init = (await apiCall("POST", "/v1/datasets/upload-url", { name, size_bytes: stat.size }, { "Idempotency-Key": idem }));
123
+ }
124
+ catch (e) {
125
+ const msg = e instanceof Error ? e.message : String(e);
126
+ if (msg.includes("dataset_too_large") && !isGzipInput) {
127
+ throw new Error(`${msg} ${GZIP_UPLOAD_HINT}`);
128
+ }
129
+ throw e;
130
+ }
49
131
  const datasetId = (init.dataset_id ?? init.id);
50
- if (init.idempotent_replay) {
51
- return textResult({ id: datasetId, status: init.status, idempotent_replay: true,
52
- suggested_next_step: `barmesh_datasets(action=preview, dataset_id=${datasetId})` });
132
+ if (init.idempotent_replay && !init.upload_url) {
133
+ return textResult({
134
+ id: datasetId,
135
+ status: init.status,
136
+ idempotent_replay: true,
137
+ suggested_next_step: `barmesh_datasets(action=preview, dataset_id=${datasetId})`,
138
+ });
53
139
  }
54
- await putPresignedStream(init.upload_url, resolved, init.content_type ?? "application/octet-stream", PRESIGNED_PUT_TIMEOUT_MS);
140
+ await putPresignedStream(init.upload_url, resolved, init.content_type ?? "application/octet-stream", PRESIGNED_PUT_TIMEOUT_MS, isGzipInput);
55
141
  const fin = (await apiCall("POST", `/v1/datasets/${datasetId}/finalize`, {}));
56
142
  const jobId = (fin.id ?? fin.job_id);
57
143
  const poll = await pollUntilComplete(jobId, POLL_STAGE_MAX_MS);
@@ -59,10 +145,28 @@ ESCALATION: If preview shows a feature column as non-numeric, fix the extraction
59
145
  return textResult({ id: datasetId, status: "failed", error: poll.error ?? "staging failed" });
60
146
  }
61
147
  const ready = poll.status === "completed";
62
- return textResult({ id: datasetId, status: ready ? "ready" : "staging", job_id: jobId,
148
+ return textResult({
149
+ id: datasetId,
150
+ status: ready ? "ready" : "staging",
151
+ job_id: jobId,
63
152
  suggested_next_step: ready
64
- ? `barmesh_datasets(action=preview, dataset_id=${datasetId})`
65
- : `Still staging; poll barmesh_jobs(action=status, job_id="${jobId}").` });
153
+ ? `barmesh_datasets(action=preview, dataset_id=${datasetId}) to verify mesh, feature, and volume columns.`
154
+ : `Still staging; poll barmesh_jobs(action=status, job_id="${jobId}") then barmesh_datasets(action=preview, dataset_id=${datasetId}).`,
155
+ });
156
+ }
157
+ if (isGzipInput) {
158
+ const gzBytes = await fs.readFile(resolved);
159
+ const data = (await apiCall("POST", "/v1/datasets", gzBytes, {
160
+ "X-Dataset-Name": name,
161
+ "Content-Type": "text/csv",
162
+ "Content-Encoding": "gzip",
163
+ "Idempotency-Key": createHash("sha256").update(`${name}\n`).update(gzBytes).digest("hex"),
164
+ }, UPLOAD_DATASET_TIMEOUT_MS));
165
+ const gid = data.id ?? data.dataset_id;
166
+ if (gid != null) {
167
+ data.suggested_next_step = `Next: barmesh_datasets(action=preview, dataset_id=${gid}) to verify the mesh, feature, and volume columns.`;
168
+ }
169
+ return textResult(data);
66
170
  }
67
171
  body = await fs.readFile(resolved, "utf-8");
68
172
  }
@@ -76,8 +180,6 @@ ESCALATION: If preview shows a feature column as non-numeric, fix the extraction
76
180
  const uploadHeaders = {
77
181
  "X-Dataset-Name": name,
78
182
  "Content-Type": "text/csv",
79
- // Deterministic key so a timed-out retry of the SAME upload reconciles to
80
- // the original dataset server-side instead of creating a duplicate.
81
183
  "Idempotency-Key": createHash("sha256").update(`${name}\n`).update(body).digest("hex"),
82
184
  };
83
185
  let uploadBody = body;
@@ -87,8 +189,9 @@ ESCALATION: If preview shows a feature column as non-numeric, fix the extraction
87
189
  }
88
190
  const data = (await apiCall("POST", "/v1/datasets", uploadBody, uploadHeaders, UPLOAD_DATASET_TIMEOUT_MS));
89
191
  const id = data.id ?? data.dataset_id;
90
- if (id != null)
192
+ if (id != null) {
91
193
  data.suggested_next_step = `Next: barmesh_datasets(action=preview, dataset_id=${id}) to verify the mesh, feature, and volume columns.`;
194
+ }
92
195
  return textResult(data);
93
196
  }
94
197
  if (action === "preview") {
@@ -97,8 +200,68 @@ ESCALATION: If preview shows a feature column as non-numeric, fix the extraction
97
200
  const data = await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`);
98
201
  return textResult(data);
99
202
  }
100
- // list
101
- const data = await apiCall("GET", "/v1/datasets");
102
- return textResult(data);
203
+ if (action === "subset") {
204
+ if (!dataset_id)
205
+ throw new Error("barmesh_datasets(subset) requires dataset_id.");
206
+ if (!name)
207
+ throw new Error("barmesh_datasets(subset) requires name.");
208
+ const allFilters = filters ?? (filter ? [filter] : undefined);
209
+ if (row_range === undefined && allFilters === undefined && sample_n === undefined) {
210
+ throw new Error("barmesh_datasets(subset) requires at least one of row_range, filters, or sample_n.");
211
+ }
212
+ const body = { name };
213
+ if (row_range !== undefined)
214
+ body.row_range = row_range;
215
+ if (allFilters !== undefined)
216
+ body.filters = allFilters;
217
+ if (sample_n !== undefined)
218
+ body.sample_n = sample_n;
219
+ if (sample_seed !== undefined)
220
+ body.sample_seed = sample_seed;
221
+ const data = await apiCall("POST", `/v1/datasets/${dataset_id}/subset`, body);
222
+ return textResult(data);
223
+ }
224
+ if (action === "list") {
225
+ const data = (await apiCall("GET", "/v1/datasets"));
226
+ if (Array.isArray(data)) {
227
+ const lines = data.map((ds) => {
228
+ const id = String(ds.id ?? "");
229
+ const dsName = String(ds.name ?? "");
230
+ const rows = ds.rows != null ? Number(ds.rows) : "?";
231
+ const cols = ds.cols != null ? Number(ds.cols) : "?";
232
+ const st = ds.status != null ? String(ds.status) : "ready";
233
+ const statusBit = st !== "ready" ? ` | status=${st}` : "";
234
+ const ingestErr = cleanNullable(ds.ingest_error);
235
+ const err = ingestErr ? ` | ingest_error=${ingestErr}` : "";
236
+ return `${dsName} (${id}) — ${rows}×${cols}${statusBit}${err}`;
237
+ });
238
+ return { content: [{ type: "text", text: lines.length > 0 ? lines.join("\n") : "No datasets." }] };
239
+ }
240
+ return textResult(data);
241
+ }
242
+ if (action === "get") {
243
+ if (!dataset_id)
244
+ throw new Error("barmesh_datasets(get) requires dataset_id.");
245
+ const ds = (await apiCall("GET", `/v1/datasets/${dataset_id}`));
246
+ const lines = [
247
+ `Dataset: ${ds.name ?? "?"} (${ds.id ?? dataset_id})`,
248
+ `Status: ${ds.status ?? "ready"}`,
249
+ `Rows × cols: ${ds.rows ?? "?"} × ${ds.cols ?? "?"}`,
250
+ ds.size_bytes != null ? `Size: ${Number(ds.size_bytes).toLocaleString()} bytes` : "",
251
+ ds.staged_prefix != null ? `Staged prefix: ${String(ds.staged_prefix)}` : "",
252
+ ds.staged_version != null ? `Staged version: ${String(ds.staged_version)}` : "",
253
+ ds.stage_job_id != null ? `Stage job: ${String(ds.stage_job_id)} (poll barmesh_jobs(action=status))` : "",
254
+ cleanNullable(ds.ingest_error) ? `Ingest error: ${cleanNullable(ds.ingest_error)}` : "",
255
+ ds.created_at != null ? `Created: ${String(ds.created_at)}` : "",
256
+ ].filter(Boolean);
257
+ return { content: [{ type: "text", text: lines.join("\n") }] };
258
+ }
259
+ if (action === "delete") {
260
+ if (!dataset_id)
261
+ throw new Error("barmesh_datasets(delete) requires dataset_id.");
262
+ const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
263
+ return textResult(data);
264
+ }
265
+ throw new Error("Invalid action");
103
266
  });
104
267
  }
@@ -0,0 +1 @@
1
+ export const GZIP_UPLOAD_HINT = "Try gzip: save as .csv.gz (often 2–3× smaller). The upload limit applies to the compressed file size on presigned/large uploads.";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@barivia/barmesh-mcp",
3
- "version": "0.3.1",
3
+ "version": "0.4.1",
4
4
  "description": "barmesh MCP proxy — SOM-based CFD mesh-convergence and Richardson/GCI analysis on the Barivia cloud API",
5
5
  "keywords": [
6
6
  "mcp",