@barivia/barmesh-mcp 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,7 +55,8 @@ API key; otherwise the analysis calls return HTTP 403. Contact Barivia to enable
55
55
  | `barmesh_datasets` | Upload / preview / list / get / subset / delete the mesh CSV. |
56
56
  | `barmesh_mesh_convergence` | SOM fingerprint distances (async job). |
57
57
  | `barmesh_richardson` | Richardson/GCI on scalar QoIs (async job). |
58
- | `barmesh_jobs` | Poll job status / list jobs (auto-polls CFD prepare + finalize when applicable). Reports phase, epoch/total, elapsed, ETA, and QE live during the SOM training. |
58
+ | `barmesh_jobs` | Poll job status / block until terminal (`action=monitor`) / list jobs. Auto-polls CFD prepare + finalize when applicable. Reports phase, epoch/total, elapsed, ETA, and QE live during the SOM training. |
59
+ | `barmesh_training_monitor` | Alias for `barmesh_jobs(action=monitor)` — server-side poll with throttled snapshots until terminal (preferred after submit). |
59
60
  | `barmesh_results` | Distances, convergence reading, and figures. `action=get` inlines headline PNGs; `action=download` saves artifacts to disk; `action=render` produces publication PDFs on demand. |
60
61
  | `barmesh_results_explorer` | Interactive MCP App with a **figure dropdown above the plot** (or standalone localhost page) to browse every figure after a job completes. |
61
62
  | `barmesh_send_feedback` | Send a short note or bug report to the Barivia team. |
@@ -76,11 +77,14 @@ API key; otherwise the analysis calls return HTTP 403. Contact Barivia to enable
76
77
  figures). PNG previews inline; PDF/SVG offered for download when rendered.
77
78
  - **Uploads:** large CSVs use presigned PUT with explicit `Content-Length`; `.csv.gz` /
78
79
  `.tsv.gz` accepted. Pin `@barivia/barmesh-mcp@0.5.2` (clear `~/.npm/_npx` if stale).
79
- - **Live progress:** `barmesh_jobs(action=status)` exposes phase, epoch/total, elapsed,
80
- ETA, QE during SOM training, plus cfd_finalize sub-status when figures are deferred.
80
+ - **Live progress:** `barmesh_training_monitor(job_id)` or `barmesh_jobs(action=monitor)` block
81
+ server-side with compact snapshots (phase, epoch, QE/TE, ETA, ordering_errors tail) until
82
+ terminal or `block_until_sec` (default 900). Waits for `cfd_finalize` by default. One-shot:
83
+ `barmesh_jobs(action=status)`.
81
84
 
82
85
  ### Migration notes
83
86
 
87
+ - **`barmesh_training_monitor` (0.5.3):** server-side blocking monitor with throttled snapshots — preferred after job submit instead of manual `barmesh_jobs(status)` loops. Equivalent to `barmesh_jobs(action=monitor)`.
84
88
  - **`send_feedback` → `barmesh_send_feedback` (0.3.0):** the feedback tool was renamed so it no longer collides with the `@barivia/barsom-mcp` tool of the same name when both servers are enabled in one client. Update any direct call sites; the behavior is unchanged.
85
89
 
86
90
  ## Data format (mesh_convergence)
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
1
  #!/usr/bin/env node
2
- import{McpServer as e}from"@modelcontextprotocol/sdk/server/mcp.js";import{StdioServerTransport as r}from"@modelcontextprotocol/sdk/server/stdio.js";import{getUiCapability as s,registerAppResource as o,RESOURCE_MIME_TYPE as n}from"@modelcontextprotocol/ext-apps/server";import{startVizServer as t}from"./viz-server.js";import{API_KEY as i,CLIENT_VERSION as a,apiCall as c,apiRawCall as m,loadViewHtml as l,setVizPort as d,setClientSupportsMcpApps as p}from"./shared.js";import{registerGuideTool as h}from"./tools/guide.js";import{registerDatasetsTool as u}from"./tools/datasets.js";import{registerCfdTools as f}from"./tools/cfd.js";import{registerJobsTool as b}from"./tools/jobs.js";import{registerResultsTool as v}from"./tools/results.js";import{registerResultsExplorerTool as y,RESULTS_EXPLORER_URI as _}from"./tools/barmesh_results_explorer.js";import{registerFeedbackTool as g}from"./tools/feedback.js";i||(console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config."),process.exit(1));(async function(){const i=new e({name:"barmesh",version:a},{instructions:"# Barivia barmesh — CFD mesh-convergence analytics\n\nSOM-based mesh-convergence verification: compare CFD meshes of a refinement study by the\nvolume-weighted distribution their cells form on a shared self-organizing map, plus\nclassical Richardson/GCI on scalar quantities.\n\n## Two tracks\n- barmesh_mesh_convergence: high-dimensional field comparison. Symmetric KL and\n Wasserstein-1 (EMD) distances between each mesh's SOM fingerprint and a reference,\n and between consecutive meshes. Decreasing, plateauing distances toward the finest\n mesh indicate sufficiency. Complements (does not replace) numerical uncertainty analysis.\n- barmesh_richardson: classical grid-convergence index on scalar QoIs.\n\n## Workflow (read-only first)\n1. barmesh_guide_workflow — orient and confirm your plan includes CFD tools.\n2. barmesh_prepare_mesh_data — recipe for the combined per-cell CSV (mesh_id + features + cell volume V).\n3. barmesh_datasets(action=upload) then preview.\n4. barmesh_mesh_convergence (and/or barmesh_richardson) — returns a job id.\n5. barmesh_jobs(action=status) — poll every 10-20s, for minutes if needed.\n6. barmesh_results(action=get) — distances, convergence reading, and figures; then\n barmesh_results_explorer(job_id) to browse every figure interactively.\n\nThese tools are gated by the 'cfd' entitlement; analysis calls return 403 if your plan\ndoes not include it."});o(i,_,_,{mimeType:n},async()=>{const e=await l("barmesh-results-explorer");return{contents:[{uri:_,mimeType:n,text:e??"<html><body>Results Explorer view not built yet. Run: npm run build:views</body></html>"}]}}),h(i),y(i),u(i),f(i),b(i),v(i),g(i);try{const e=await t(c,m,l);d(e)}catch(e){process.env.BARIVIA_VIZ_PORT&&console.error("barmesh viz server failed to start:",e)}const w=i.server;w.oninitialized=()=>{const e=w.getClientCapabilities(),r=s(e);p(!!r?.mimeTypes?.includes(n))};const j=new r;await i.connect(j),console.error(`barmesh-mcp ${a} ready (API: ${process.env.BARIVIA_API_URL??"https://api.barivia.se"})`)})().catch(e=>{console.error("Fatal error starting barmesh-mcp:",e),process.exit(1)});
2
+ import{McpServer as e}from"@modelcontextprotocol/sdk/server/mcp.js";import{StdioServerTransport as r}from"@modelcontextprotocol/sdk/server/stdio.js";import{getUiCapability as s,registerAppResource as o,RESOURCE_MIME_TYPE as t}from"@modelcontextprotocol/ext-apps/server";import{startVizServer as n}from"./viz-server.js";import{API_KEY as i,CLIENT_VERSION as a,apiCall as c,apiRawCall as m,loadViewHtml as l,setVizPort as d,setClientSupportsMcpApps as h}from"./shared.js";import{registerGuideTool as p}from"./tools/guide.js";import{registerDatasetsTool as b}from"./tools/datasets.js";import{registerCfdTools as f}from"./tools/cfd.js";import{registerJobsTool as u}from"./tools/jobs.js";import{registerResultsTool as _}from"./tools/results.js";import{registerResultsExplorerTool as v,RESULTS_EXPLORER_URI as g}from"./tools/barmesh_results_explorer.js";import{registerTrainingMonitorTool as y}from"./tools/training_monitor.js";import{registerFeedbackTool as j}from"./tools/feedback.js";i||(console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config."),process.exit(1));(async function(){const i=new e({name:"barmesh",version:a},{instructions:"# Barivia barmesh — CFD mesh-convergence analytics\n\nSOM-based mesh-convergence verification: compare CFD meshes of a refinement study by the\nvolume-weighted distribution their cells form on a shared self-organizing map, plus\nclassical Richardson/GCI on scalar quantities.\n\n## Two tracks\n- barmesh_mesh_convergence: high-dimensional field comparison. Symmetric KL and\n Wasserstein-1 (EMD) distances between each mesh's SOM fingerprint and a reference,\n and between consecutive meshes. Decreasing, plateauing distances toward the finest\n mesh indicate sufficiency. Complements (does not replace) numerical uncertainty analysis.\n- barmesh_richardson: classical grid-convergence index on scalar QoIs.\n\n## Workflow (read-only first)\n1. barmesh_guide_workflow — orient and confirm your plan includes CFD tools.\n2. barmesh_prepare_mesh_data — recipe for the combined per-cell CSV (mesh_id + features + cell volume V).\n3. barmesh_datasets(action=upload) then preview.\n4. barmesh_mesh_convergence (and/or barmesh_richardson) — returns a job id.\n5. barmesh_training_monitor(job_id) or barmesh_jobs(action=monitor) — blocks with throttled snapshots until done (preferred); or barmesh_jobs(status) for one-shot polls every 10-20s.\n6. barmesh_results(action=get) — distances, convergence reading, and figures; then\n barmesh_results_explorer(job_id) to browse every figure interactively.\n\nThese tools are gated by the 'cfd' entitlement; analysis calls return 403 if your plan\ndoes not include it."});o(i,g,g,{mimeType:t},async()=>{const e=await l("barmesh-results-explorer");return{contents:[{uri:g,mimeType:t,text:e??"<html><body>Results Explorer view not built yet. Run: npm run build:views</body></html>"}]}}),p(i),v(i),b(i),f(i),u(i),y(i),_(i),j(i);try{const e=await n(c,m,l);d(e)}catch(e){process.env.BARIVIA_VIZ_PORT&&console.error("barmesh viz server failed to start:",e)}const w=i.server;w.oninitialized=()=>{const e=w.getClientCapabilities(),r=s(e);h(!!r?.mimeTypes?.includes(t))};const x=new r;await i.connect(x),console.error(`barmesh-mcp ${a} ready (API: ${process.env.BARIVIA_API_URL??"https://api.barivia.se"})`)})().catch(e=>{console.error("Fatal error starting barmesh-mcp:",e),process.exit(1)});
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Server-side job monitor for barmesh async workflows.
3
+ * Polls GET /v1/jobs/:id until terminal (or block_until timeout) and emits
4
+ * compact throttled snapshots so agents avoid manual status poll loops.
5
+ */
6
+ import { apiCall } from "./shared.js";
7
+ import { formatJobStatusText } from "./job_status_format.js";
8
+ import { pollCfdFinalizeIfPresent, refreshJobAfterFinalize } from "./cfd_finalize.js";
9
+ export const DEFAULT_BLOCK_UNTIL_SEC = 900;
10
+ export const DEFAULT_POLL_INTERVAL_SEC = 5;
11
+ export const MIN_POLL_INTERVAL_SEC = 5;
12
+ export const HEARTBEAT_POLLS = 1;
13
+ function sleep(ms) {
14
+ return new Promise((r) => setTimeout(r, ms));
15
+ }
16
+ function num(data, key) {
17
+ const v = data[key];
18
+ if (v == null || Number.isNaN(Number(v)))
19
+ return undefined;
20
+ return Number(v);
21
+ }
22
+ function str(data, key) {
23
+ const v = data[key];
24
+ if (v == null || String(v) === "")
25
+ return undefined;
26
+ return String(v);
27
+ }
28
+ function tailOrderingErrors(data, n = 4) {
29
+ const raw = data.ordering_errors;
30
+ if (!Array.isArray(raw) || raw.length === 0)
31
+ return undefined;
32
+ return raw.slice(-n).map((x) => Number(x)).filter((x) => !Number.isNaN(x));
33
+ }
34
+ export function snapshotFromJob(data, elapsedSec, note) {
35
+ const status = String(data.status ?? "unknown");
36
+ const progress_pct = (data.progress ?? 0) * 100;
37
+ const snap = {
38
+ elapsed_sec: elapsedSec,
39
+ status,
40
+ progress_pct: Math.round(progress_pct * 10) / 10,
41
+ };
42
+ const phase = str(data, "progress_phase");
43
+ if (phase)
44
+ snap.phase = phase;
45
+ const epoch = num(data, "epoch");
46
+ const total = num(data, "total_epochs");
47
+ if (epoch != null)
48
+ snap.epoch = epoch;
49
+ if (total != null)
50
+ snap.total_epochs = total;
51
+ const qe = num(data, "quantization_error");
52
+ const te = num(data, "topographic_error");
53
+ if (qe != null)
54
+ snap.qe = Math.round(qe * 10_000) / 10_000;
55
+ if (te != null)
56
+ snap.te = Math.round(te * 10_000) / 10_000;
57
+ const eta = num(data, "training_eta_sec");
58
+ if (eta != null && eta > 0)
59
+ snap.eta_sec = Math.round(eta);
60
+ const tail = tailOrderingErrors(data);
61
+ if (tail && tail.length > 0)
62
+ snap.ordering_errors_tail = tail;
63
+ if (note)
64
+ snap.note = note;
65
+ return snap;
66
+ }
67
+ /** True when a new snapshot is worth recording (phase/epoch/progress/status change). */
68
+ export function shouldRecordSnapshot(prev, next) {
69
+ if (!prev)
70
+ return true;
71
+ if (prev.status !== next.status)
72
+ return true;
73
+ if (next.note)
74
+ return true;
75
+ if (prev.phase !== next.phase)
76
+ return true;
77
+ if (prev.epoch !== next.epoch)
78
+ return true;
79
+ if (Math.abs(prev.progress_pct - next.progress_pct) >= 1)
80
+ return true;
81
+ if (prev.qe !== next.qe || prev.te !== next.te)
82
+ return true;
83
+ return false;
84
+ }
85
+ export function formatSnapshotLine(s) {
86
+ const parts = [`[+${s.elapsed_sec}s] ${s.status} ${s.progress_pct.toFixed(1)}%`];
87
+ if (s.phase)
88
+ parts.push(`phase ${s.phase}`);
89
+ if (s.epoch != null && s.total_epochs != null)
90
+ parts.push(`epoch ${s.epoch}/${s.total_epochs}`);
91
+ if (s.qe != null)
92
+ parts.push(`QE ${s.qe.toFixed(4)}`);
93
+ if (s.te != null)
94
+ parts.push(`TE ${s.te.toFixed(4)}`);
95
+ if (s.eta_sec != null)
96
+ parts.push(`ETA ~${s.eta_sec}s`);
97
+ if (s.ordering_errors_tail?.length) {
98
+ parts.push(`ordering_errors tail [${s.ordering_errors_tail.map((x) => x.toFixed(4)).join(", ")}]`);
99
+ }
100
+ if (s.note)
101
+ parts.push(s.note);
102
+ return parts.join(" | ");
103
+ }
104
+ export function formatMonitorText(result, opts) {
105
+ const lines = [
106
+ `Job ${result.job_id} monitor (block_until=${opts.block_until_sec}s, poll=${opts.poll_interval_sec}s):`,
107
+ ];
108
+ for (const s of result.snapshots)
109
+ lines.push(formatSnapshotLine(s));
110
+ lines.push("");
111
+ if (result.timed_out) {
112
+ lines.push(`Timed out before terminal state. Last status: ${result.status_text}`);
113
+ lines.push(`Re-run barmesh_training_monitor(job_id="${result.job_id}") or barmesh_jobs(action=monitor, job_id="${result.job_id}").`);
114
+ }
115
+ else {
116
+ lines.push(`Terminal: ${result.status_text}`);
117
+ }
118
+ lines.push(result.suggested_next_step);
119
+ return lines.join("\n");
120
+ }
121
+ function suggestedNextStep(job_id, data) {
122
+ const status = String(data.status ?? "");
123
+ if (status === "completed") {
124
+ const finalizeId = str(data, "finalize_job_id");
125
+ if (finalizeId) {
126
+ return `Compute done; cfd_finalize may still be running. Poll barmesh_jobs(status) or re-run monitor with wait_finalize=true, then barmesh_results(action=get, job_id="${job_id}").`;
127
+ }
128
+ return `Next: barmesh_results(action=get, job_id="${job_id}") or barmesh_results_explorer(job_id="${job_id}").`;
129
+ }
130
+ if (status === "failed") {
131
+ const stage = str(data, "failure_stage");
132
+ return `Job failed${stage ? ` at ${stage}` : ""}. Read the error above before retrying.`;
133
+ }
134
+ if (status === "cancelled")
135
+ return `Job cancelled. Confirm with barmesh_jobs(action=status, job_id="${job_id}").`;
136
+ return `Still running — re-run barmesh_training_monitor(job_id="${job_id}") to continue waiting.`;
137
+ }
138
+ async function fetchTrainingLogHint(job_id, data) {
139
+ const status = String(data.status ?? "");
140
+ if (status !== "completed" && status !== "failed")
141
+ return undefined;
142
+ try {
143
+ await apiCall("GET", `/v1/results/${job_id}/training-log`);
144
+ return `Training log available at GET /v1/results/${job_id}/training-log (learning_curve.png in results after finalize).`;
145
+ }
146
+ catch {
147
+ return undefined;
148
+ }
149
+ }
150
+ export async function monitorJob(job_id, options = {}) {
151
+ const block_until_sec = Math.max(30, options.block_until_sec ?? DEFAULT_BLOCK_UNTIL_SEC);
152
+ const poll_interval_sec = Math.max(MIN_POLL_INTERVAL_SEC, options.poll_interval_sec ?? DEFAULT_POLL_INTERVAL_SEC);
153
+ const wait_finalize = options.wait_finalize !== false;
154
+ const blockMs = block_until_sec * 1000;
155
+ const pollMs = poll_interval_sec * 1000;
156
+ const start = Date.now();
157
+ const snapshots = [];
158
+ let lastSnap = null;
159
+ let data = {};
160
+ let heartbeat = 0;
161
+ while (Date.now() - start < blockMs) {
162
+ data = (await apiCall("GET", `/v1/jobs/${job_id}`));
163
+ const elapsedSec = Math.round((Date.now() - start) / 1000);
164
+ const snap = snapshotFromJob(data, elapsedSec);
165
+ heartbeat += 1;
166
+ const heartbeatDue = heartbeat >= HEARTBEAT_POLLS;
167
+ if (shouldRecordSnapshot(lastSnap, snap) || heartbeatDue) {
168
+ snapshots.push(snap);
169
+ lastSnap = snap;
170
+ heartbeat = 0;
171
+ }
172
+ const status = String(data.status ?? "");
173
+ if (status === "completed" || status === "failed" || status === "cancelled") {
174
+ break;
175
+ }
176
+ await sleep(pollMs);
177
+ }
178
+ const status = String(data.status ?? "");
179
+ const terminal = status === "completed" || status === "failed" || status === "cancelled";
180
+ const timed_out = !terminal;
181
+ if (terminal && status === "completed" && wait_finalize && data.finalize_job_id) {
182
+ const finalizeId = String(data.finalize_job_id);
183
+ const elapsedSec = Math.round((Date.now() - start) / 1000);
184
+ snapshots.push(snapshotFromJob(data, elapsedSec, `cfd_finalize ${finalizeId} started — waiting for figure render`));
185
+ try {
186
+ const { note } = await pollCfdFinalizeIfPresent(job_id, data, Math.max(0, blockMs - (Date.now() - start)));
187
+ data = await refreshJobAfterFinalize(job_id);
188
+ snapshots.push(snapshotFromJob(data, Math.round((Date.now() - start) / 1000), note ?? `cfd_finalize ${finalizeId} completed`));
189
+ }
190
+ catch (err) {
191
+ snapshots.push(snapshotFromJob(data, Math.round((Date.now() - start) / 1000), `cfd_finalize failed: ${err.message}`));
192
+ }
193
+ }
194
+ const statusText = await formatJobStatusText(job_id, data);
195
+ const logHint = terminal ? await fetchTrainingLogHint(job_id, data) : undefined;
196
+ let suggested = suggestedNextStep(job_id, data);
197
+ if (logHint)
198
+ suggested += ` ${logHint}`;
199
+ return {
200
+ job_id,
201
+ terminal,
202
+ timed_out,
203
+ snapshots,
204
+ status_text: statusText,
205
+ data,
206
+ suggested_next_step: suggested,
207
+ };
208
+ }
package/dist/shared.js CHANGED
@@ -22,7 +22,7 @@ export const FETCH_TIMEOUT_MS = parseInt(process.env.BARIVIA_FETCH_TIMEOUT_MS ??
22
22
  export const MAX_RETRIES = 2;
23
23
  export const RETRYABLE_STATUS = new Set([502, 503, 504]);
24
24
  /** Single source of truth for the proxy version. Keep in sync with package.json on bump. */
25
- export const CLIENT_VERSION = "0.5.2";
25
+ export const CLIENT_VERSION = "0.5.3";
26
26
  export const PUBLIC_SITE_ORIGIN = "https://barivia.se";
27
27
  /** Large per-cell CSV uploads may exceed the default fetch timeout. */
28
28
  export const UPLOAD_DATASET_TIMEOUT_MS = 180_000;
package/dist/tools/cfd.js CHANGED
@@ -47,7 +47,7 @@ COMMON MISTAKES: omitting feature_columns (required); choosing a reference_mesh
47
47
  const id = data.id;
48
48
  if (id != null) {
49
49
  const prep = data.prepare_job_id != null ? " (dataset prepare complete)" : "";
50
- data.suggested_next_step = `Poll barmesh_jobs(action=status, job_id=${id})${prep}; on completion call barmesh_results(action=get, job_id=${id}).`;
50
+ data.suggested_next_step = `Run barmesh_training_monitor(job_id="${id}") or barmesh_jobs(action=monitor, job_id="${id}")${prep}; on completion call barmesh_results(action=get, job_id="${id}").`;
51
51
  }
52
52
  return textResult(data);
53
53
  });
@@ -80,7 +80,7 @@ COMMON MISTAKES: not providing h_column or n_cells_column; mixing QoIs with diff
80
80
  const data = (await apiCall("POST", "/v1/cfd/richardson", body));
81
81
  const id = data.id;
82
82
  if (id != null)
83
- data.suggested_next_step = `Poll barmesh_jobs(action=status, job_id=${id}); on completion call barmesh_results(action=get, job_id=${id}).`;
83
+ data.suggested_next_step = `Run barmesh_training_monitor(job_id="${id}") or barmesh_jobs(action=monitor, job_id="${id}"); on completion call barmesh_results(action=get, job_id="${id}").`;
84
84
  return textResult(data);
85
85
  });
86
86
  }
@@ -2,7 +2,7 @@ import { registerAuditedTool } from "../audit.js";
2
2
  import { apiCall, textResult } from "../shared.js";
3
3
  const OFFLINE_GUIDE = `barmesh: CFD mesh-convergence on the Barivia API.
4
4
  Two tracks: barmesh_mesh_convergence (SOM fingerprint distances) and barmesh_richardson (classical GCI).
5
- Workflow: barmesh_prepare_mesh_data -> barmesh_datasets(upload) -> barmesh_mesh_convergence / barmesh_richardson -> barmesh_jobs(status) -> barmesh_results(get).
5
+ Workflow: barmesh_prepare_mesh_data -> barmesh_datasets(upload) -> barmesh_mesh_convergence / barmesh_richardson -> barmesh_training_monitor (or barmesh_jobs monitor/status) -> barmesh_results(get).
6
6
  (API unreachable; this is the offline summary. Set BARIVIA_API_KEY / BARIVIA_API_URL.)`;
7
7
  const OFFLINE_PREP = `barmesh mesh-data prep (offline summary; API unreachable):
8
8
  Build ONE combined per-cell CSV across all meshes of the refinement study:
@@ -3,16 +3,41 @@ import { registerAuditedTool } from "../audit.js";
3
3
  import { apiCall, textResult } from "../shared.js";
4
4
  import { pollCfdFinalizeIfPresent, refreshJobAfterFinalize } from "../cfd_finalize.js";
5
5
  import { formatJobStatusText } from "../job_status_format.js";
6
+ import { DEFAULT_BLOCK_UNTIL_SEC, DEFAULT_POLL_INTERVAL_SEC } from "../job_monitor.js";
7
+ import { runMonitor } from "./training_monitor.js";
6
8
  export function registerJobsTool(server) {
7
- registerAuditedTool(server, "barmesh_jobs", `Check job status or list jobs.
9
+ registerAuditedTool(server, "barmesh_jobs", `Check job status, block until terminal, or list jobs.
8
10
 
9
- BEST FOR: Polling a submitted barmesh_mesh_convergence or barmesh_richardson job until status is completed/failed.
10
- ASYNC PROTOCOL: Poll action=status every 10-20s. Keep polling for several minutes datacenter-scale grids plus EMD are slow; do not give up after one poll. When status=completed, call barmesh_results(action=get, job_id=...).
11
+ BEST FOR: action=monitor after submit (one call, throttled snapshots preferred for agents). action=status for a single one-shot check.
12
+ ASYNC PROTOCOL: monitor blocks server-side until completed/failed or block_until timeout (default ${DEFAULT_BLOCK_UNTIL_SEC}s, poll every ${DEFAULT_POLL_INTERVAL_SEC}s). status is one-shot; poll every 10-20s manually if not using monitor. When status=completed, call barmesh_results(action=get, job_id=...).
11
13
  ESCALATION: status=failed returns an error message and (when available) a failure_stage; read it before retrying.`, {
12
- action: z.enum(["status", "list"]).describe("status: check one job; list: recent jobs"),
13
- job_id: z.string().optional().describe("Job ID (required for status)"),
14
+ action: z
15
+ .enum(["status", "monitor", "list"])
16
+ .describe("status: one-shot check; monitor: block until terminal with snapshots; list: recent jobs"),
17
+ job_id: z.string().optional().describe("Job ID (required for status and monitor)"),
18
+ block_until_sec: z
19
+ .number()
20
+ .int()
21
+ .min(30)
22
+ .optional()
23
+ .describe(`action=monitor only: max wait seconds (default ${DEFAULT_BLOCK_UNTIL_SEC})`),
24
+ poll_interval_sec: z
25
+ .number()
26
+ .int()
27
+ .min(5)
28
+ .optional()
29
+ .describe(`action=monitor only: poll interval (default ${DEFAULT_POLL_INTERVAL_SEC})`),
30
+ wait_finalize: z
31
+ .boolean()
32
+ .optional()
33
+ .describe("action=monitor only: wait for cfd_finalize (default true)"),
14
34
  }, async (args) => {
15
- const { action, job_id } = args;
35
+ const { action, job_id, block_until_sec, poll_interval_sec, wait_finalize } = args;
36
+ if (action === "monitor") {
37
+ if (!job_id)
38
+ throw new Error("barmesh_jobs(monitor) requires job_id.");
39
+ return runMonitor({ job_id, block_until_sec, poll_interval_sec, wait_finalize });
40
+ }
16
41
  if (action === "status") {
17
42
  if (!job_id)
18
43
  throw new Error("barmesh_jobs(status) requires job_id.");
@@ -0,0 +1,56 @@
1
+ import { z } from "zod";
2
+ import { registerAuditedTool } from "../audit.js";
3
+ import { textResult } from "../shared.js";
4
+ import { DEFAULT_BLOCK_UNTIL_SEC, DEFAULT_POLL_INTERVAL_SEC, formatMonitorText, monitorJob, } from "../job_monitor.js";
5
+ const MONITOR_DESCRIPTION = `Block until a barmesh_mesh_convergence or barmesh_richardson job reaches a terminal state, emitting compact progress snapshots along the way.
6
+
7
+ BEST FOR: After job submit — one call replaces manual barmesh_jobs(status) poll loops (10–20s × several minutes).
8
+ ASYNC PROTOCOL: Server-side poll every ~5s (configurable) until completed/failed/cancelled or block_until timeout. Snapshots include phase, epoch/total, QE/TE, ETA, and ordering_errors tail when live.
9
+ FINALIZE: When defer_figures is used, waits for cfd_finalize and adds a snapshot when figure render starts/completes.
10
+ NOT REQUIRED: barmesh_jobs(action=status) still works for one-shot checks; this tool is the agent-friendly blocking variant (barsom training_monitor parity for headless workflows).
11
+ ESCALATION: On timeout, re-run with a higher block_until_sec; on failed, read failure_stage in the final snapshot.`;
12
+ const monitorSchema = {
13
+ job_id: z.string().describe("Job ID from barmesh_mesh_convergence or barmesh_richardson"),
14
+ block_until_sec: z
15
+ .number()
16
+ .int()
17
+ .min(30)
18
+ .optional()
19
+ .describe(`Max seconds to wait (default ${DEFAULT_BLOCK_UNTIL_SEC}; mesh jobs often need 6–10 min)`),
20
+ poll_interval_sec: z
21
+ .number()
22
+ .int()
23
+ .min(5)
24
+ .optional()
25
+ .describe(`Seconds between status polls (default ${DEFAULT_POLL_INTERVAL_SEC}; do not go below 5)`),
26
+ wait_finalize: z
27
+ .boolean()
28
+ .optional()
29
+ .describe("When true (default), wait for cfd_finalize after compute completes before returning"),
30
+ };
31
+ async function runMonitor(args) {
32
+ const block_until_sec = args.block_until_sec ?? DEFAULT_BLOCK_UNTIL_SEC;
33
+ const poll_interval_sec = args.poll_interval_sec ?? DEFAULT_POLL_INTERVAL_SEC;
34
+ const result = await monitorJob(args.job_id, {
35
+ block_until_sec,
36
+ poll_interval_sec,
37
+ wait_finalize: args.wait_finalize,
38
+ });
39
+ const text = formatMonitorText(result, { block_until_sec, poll_interval_sec });
40
+ return textResult({
41
+ ...result.data,
42
+ monitor: {
43
+ job_id: result.job_id,
44
+ terminal: result.terminal,
45
+ timed_out: result.timed_out,
46
+ snapshots: result.snapshots,
47
+ status_text: result.status_text,
48
+ suggested_next_step: result.suggested_next_step,
49
+ },
50
+ status_text: text,
51
+ });
52
+ }
53
+ export function registerTrainingMonitorTool(server) {
54
+ registerAuditedTool(server, "barmesh_training_monitor", MONITOR_DESCRIPTION, monitorSchema, runMonitor);
55
+ }
56
+ export { runMonitor, MONITOR_DESCRIPTION, monitorSchema };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@barivia/barmesh-mcp",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "barmesh MCP proxy — SOM-based CFD mesh-convergence and Richardson/GCI analysis on the Barivia cloud API",
5
5
  "keywords": [
6
6
  "mcp",