@barivia/barsom-mcp 0.3.2 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/index.js +1 -2445
- package/dist/index.js.map +1 -1
- package/package.json +4 -2
package/dist/index.js
CHANGED
|
@@ -1,2446 +1,2 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* @barivia/barsom-mcp
|
|
4
|
-
*
|
|
5
|
-
* Thin MCP proxy: implements stdio MCP locally and forwards every tool call
|
|
6
|
-
* to the Barivia cloud REST API. Users configure BARIVIA_API_KEY and
|
|
7
|
-
* BARIVIA_API_URL as environment variables.
|
|
8
|
-
*
|
|
9
|
-
* Usage (in MCP client config, e.g. Cursor / Claude Desktop):
|
|
10
|
-
|
|
11
|
-
* {
|
|
12
|
-
* "mcpServers": {
|
|
13
|
-
* "analytics-engine": {
|
|
14
|
-
* "command": "npx",
|
|
15
|
-
* "args": ["-y", "@barivia/barsom-mcp"],
|
|
16
|
-
* "env": {
|
|
17
|
-
* "BARIVIA_API_KEY": "bv_live_xxxx",
|
|
18
|
-
* "BARIVIA_API_URL": "https://api.barivia.se"
|
|
19
|
-
* }
|
|
20
|
-
* }
|
|
21
|
-
* }
|
|
22
|
-
* }
|
|
23
|
-
*/
|
|
24
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
25
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
26
|
-
import { z } from "zod";
|
|
27
|
-
import { registerAppResource, registerAppTool, RESOURCE_MIME_TYPE } from "@modelcontextprotocol/ext-apps/server";
|
|
28
|
-
import fs from "node:fs/promises";
|
|
29
|
-
import path from "node:path";
|
|
30
|
-
// ---------------------------------------------------------------------------
|
|
31
|
-
// Config
|
|
32
|
-
// ---------------------------------------------------------------------------
|
|
33
|
-
const API_URL = process.env.BARIVIA_API_URL ??
|
|
34
|
-
process.env.BARSOM_API_URL ??
|
|
35
|
-
"https://api.barivia.se";
|
|
36
|
-
const API_KEY = process.env.BARIVIA_API_KEY ?? process.env.BARSOM_API_KEY ?? "";
|
|
37
|
-
if (!API_KEY) {
|
|
38
|
-
console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config.");
|
|
39
|
-
process.exit(1);
|
|
40
|
-
}
|
|
41
|
-
// ---------------------------------------------------------------------------
|
|
42
|
-
// Helpers
|
|
43
|
-
// ---------------------------------------------------------------------------
|
|
44
|
-
const FETCH_TIMEOUT_MS = parseInt(process.env.BARIVIA_FETCH_TIMEOUT_MS ?? "30000", 10);
|
|
45
|
-
const MAX_RETRIES = 2;
|
|
46
|
-
const RETRYABLE_STATUS = new Set([502, 503, 504]);
|
|
47
|
-
function isTransientError(err, status) {
|
|
48
|
-
if (status !== undefined && RETRYABLE_STATUS.has(status))
|
|
49
|
-
return true;
|
|
50
|
-
if (err instanceof DOMException && err.name === "AbortError")
|
|
51
|
-
return true;
|
|
52
|
-
if (err instanceof TypeError)
|
|
53
|
-
return true; // network-level fetch failure
|
|
54
|
-
return false;
|
|
55
|
-
}
|
|
56
|
-
async function fetchWithTimeout(url, init, timeoutMs = FETCH_TIMEOUT_MS) {
|
|
57
|
-
const controller = new AbortController();
|
|
58
|
-
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
59
|
-
try {
|
|
60
|
-
return await fetch(url, { ...init, signal: controller.signal });
|
|
61
|
-
}
|
|
62
|
-
finally {
|
|
63
|
-
clearTimeout(timer);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
async function apiCall(method, path, body, extraHeaders) {
|
|
67
|
-
const url = `${API_URL}${path}`;
|
|
68
|
-
const contentType = extraHeaders?.["Content-Type"] ?? "application/json";
|
|
69
|
-
const requestId = Math.random().toString(36).slice(2, 10);
|
|
70
|
-
const headers = {
|
|
71
|
-
Authorization: `Bearer ${API_KEY}`,
|
|
72
|
-
"Content-Type": contentType,
|
|
73
|
-
"X-Request-ID": requestId,
|
|
74
|
-
...extraHeaders,
|
|
75
|
-
};
|
|
76
|
-
let serializedBody;
|
|
77
|
-
if (body !== undefined) {
|
|
78
|
-
serializedBody =
|
|
79
|
-
contentType === "application/json" ? JSON.stringify(body) : String(body);
|
|
80
|
-
}
|
|
81
|
-
let lastError;
|
|
82
|
-
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
83
|
-
try {
|
|
84
|
-
const resp = await fetchWithTimeout(url, {
|
|
85
|
-
method,
|
|
86
|
-
headers,
|
|
87
|
-
body: serializedBody,
|
|
88
|
-
});
|
|
89
|
-
const text = await resp.text();
|
|
90
|
-
if (!resp.ok) {
|
|
91
|
-
if (attempt < MAX_RETRIES && isTransientError(null, resp.status)) {
|
|
92
|
-
await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt));
|
|
93
|
-
continue;
|
|
94
|
-
}
|
|
95
|
-
const errBody = (() => { try {
|
|
96
|
-
return JSON.parse(text);
|
|
97
|
-
}
|
|
98
|
-
catch {
|
|
99
|
-
return null;
|
|
100
|
-
} })();
|
|
101
|
-
const detail = errBody?.error ?? text;
|
|
102
|
-
const hint = resp.status === 400 ? " Check parameter types and required fields."
|
|
103
|
-
: resp.status === 404 ? " The resource may not exist or may have been deleted."
|
|
104
|
-
: resp.status === 409 ? " The job may not be in the expected state."
|
|
105
|
-
: resp.status === 429 ? " Rate limit exceeded — wait a moment and retry."
|
|
106
|
-
: "";
|
|
107
|
-
throw new Error(`${detail}${hint}`);
|
|
108
|
-
}
|
|
109
|
-
return JSON.parse(text);
|
|
110
|
-
}
|
|
111
|
-
catch (err) {
|
|
112
|
-
lastError = err;
|
|
113
|
-
if (attempt < MAX_RETRIES && isTransientError(err)) {
|
|
114
|
-
await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt));
|
|
115
|
-
continue;
|
|
116
|
-
}
|
|
117
|
-
throw err;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
throw lastError;
|
|
121
|
-
}
|
|
122
|
-
/** Fetch raw bytes from the API (for image downloads). */
|
|
123
|
-
async function apiRawCall(path) {
|
|
124
|
-
const url = `${API_URL}${path}`;
|
|
125
|
-
let lastError;
|
|
126
|
-
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
127
|
-
try {
|
|
128
|
-
const resp = await fetchWithTimeout(url, {
|
|
129
|
-
method: "GET",
|
|
130
|
-
headers: { Authorization: `Bearer ${API_KEY}` },
|
|
131
|
-
});
|
|
132
|
-
if (!resp.ok) {
|
|
133
|
-
if (attempt < MAX_RETRIES && isTransientError(null, resp.status)) {
|
|
134
|
-
await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt));
|
|
135
|
-
continue;
|
|
136
|
-
}
|
|
137
|
-
throw new Error(`API GET ${path} returned ${resp.status}`);
|
|
138
|
-
}
|
|
139
|
-
const arrayBuf = await resp.arrayBuffer();
|
|
140
|
-
return {
|
|
141
|
-
data: Buffer.from(arrayBuf),
|
|
142
|
-
contentType: resp.headers.get("content-type") ?? "application/octet-stream",
|
|
143
|
-
};
|
|
144
|
-
}
|
|
145
|
-
catch (err) {
|
|
146
|
-
lastError = err;
|
|
147
|
-
if (attempt < MAX_RETRIES && isTransientError(err)) {
|
|
148
|
-
await new Promise((r) => setTimeout(r, 1000 * 2 ** attempt));
|
|
149
|
-
continue;
|
|
150
|
-
}
|
|
151
|
-
throw err;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
throw lastError;
|
|
155
|
-
}
|
|
156
|
-
function textResult(data) {
|
|
157
|
-
return {
|
|
158
|
-
content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
async function pollUntilComplete(jobId, maxWaitMs = 30_000, intervalMs = 1000) {
|
|
162
|
-
const start = Date.now();
|
|
163
|
-
while (Date.now() - start < maxWaitMs) {
|
|
164
|
-
const data = (await apiCall("GET", `/v1/jobs/${jobId}`));
|
|
165
|
-
const status = data.status;
|
|
166
|
-
if (status === "completed" || status === "failed" || status === "cancelled") {
|
|
167
|
-
return {
|
|
168
|
-
status,
|
|
169
|
-
result_ref: data.result_ref,
|
|
170
|
-
error: data.error,
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
await new Promise((r) => setTimeout(r, intervalMs));
|
|
174
|
-
}
|
|
175
|
-
return { status: "timeout" };
|
|
176
|
-
}
|
|
177
|
-
// ---------------------------------------------------------------------------
|
|
178
|
-
// MCP Server
|
|
179
|
-
// ---------------------------------------------------------------------------
|
|
180
|
-
const server = new McpServer({
|
|
181
|
-
name: "analytics-engine",
|
|
182
|
-
version: "0.4.0",
|
|
183
|
-
});
|
|
184
|
-
// ---------------------------------------------------------------------------
|
|
185
|
-
// MCP Apps: Register UI resources for interactive dashboards
|
|
186
|
-
// ---------------------------------------------------------------------------
|
|
187
|
-
const BASE_DIR = import.meta.dirname ?? path.dirname(new URL(import.meta.url).pathname);
|
|
188
|
-
async function loadViewHtml(viewName) {
|
|
189
|
-
const candidates = [
|
|
190
|
-
path.join(BASE_DIR, "views", "src", "views", viewName, "index.html"),
|
|
191
|
-
path.join(BASE_DIR, "views", viewName, "index.html"),
|
|
192
|
-
path.join(BASE_DIR, "..", "dist", "views", "src", "views", viewName, "index.html"),
|
|
193
|
-
];
|
|
194
|
-
for (const p of candidates) {
|
|
195
|
-
try {
|
|
196
|
-
return await fs.readFile(p, "utf-8");
|
|
197
|
-
}
|
|
198
|
-
catch {
|
|
199
|
-
continue;
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
return null;
|
|
203
|
-
}
|
|
204
|
-
const SOM_EXPLORER_URI = "ui://barsom/som-explorer";
|
|
205
|
-
const DATA_PREVIEW_URI = "ui://barsom/data-preview";
|
|
206
|
-
const TRAINING_MONITOR_URI = "ui://barsom/training-monitor";
|
|
207
|
-
registerAppResource(server, SOM_EXPLORER_URI, SOM_EXPLORER_URI, { mimeType: RESOURCE_MIME_TYPE }, async () => {
|
|
208
|
-
const html = await loadViewHtml("som-explorer");
|
|
209
|
-
return {
|
|
210
|
-
contents: [
|
|
211
|
-
{
|
|
212
|
-
uri: SOM_EXPLORER_URI,
|
|
213
|
-
mimeType: RESOURCE_MIME_TYPE,
|
|
214
|
-
text: html ?? "<html><body>SOM Explorer view not built yet. Run: npm run build:views</body></html>",
|
|
215
|
-
},
|
|
216
|
-
],
|
|
217
|
-
};
|
|
218
|
-
});
|
|
219
|
-
registerAppResource(server, DATA_PREVIEW_URI, DATA_PREVIEW_URI, { mimeType: RESOURCE_MIME_TYPE }, async () => {
|
|
220
|
-
const html = await loadViewHtml("data-preview");
|
|
221
|
-
return {
|
|
222
|
-
contents: [{
|
|
223
|
-
uri: DATA_PREVIEW_URI,
|
|
224
|
-
mimeType: RESOURCE_MIME_TYPE,
|
|
225
|
-
text: html ?? "<html><body>Data Preview view not built yet.</body></html>",
|
|
226
|
-
}],
|
|
227
|
-
};
|
|
228
|
-
});
|
|
229
|
-
registerAppResource(server, TRAINING_MONITOR_URI, TRAINING_MONITOR_URI, { mimeType: RESOURCE_MIME_TYPE }, async () => {
|
|
230
|
-
const html = await loadViewHtml("training-monitor");
|
|
231
|
-
return {
|
|
232
|
-
contents: [{
|
|
233
|
-
uri: TRAINING_MONITOR_URI,
|
|
234
|
-
mimeType: RESOURCE_MIME_TYPE,
|
|
235
|
-
text: html ?? "<html><body>Training Monitor view not built yet.</body></html>",
|
|
236
|
-
}],
|
|
237
|
-
};
|
|
238
|
-
});
|
|
239
|
-
// ---- explore_som (MCP App) ----
|
|
240
|
-
registerAppTool(server, "explore_som", {
|
|
241
|
-
title: "Explore SOM",
|
|
242
|
-
description: "Interactive SOM explorer dashboard. Opens an inline visualization " +
|
|
243
|
-
"where you can toggle features, click nodes, and export figures. " +
|
|
244
|
-
"Use this after get_results for a richer, interactive exploration experience. " +
|
|
245
|
-
"Falls back to text+image on hosts that don't support MCP Apps.",
|
|
246
|
-
inputSchema: {
|
|
247
|
-
job_id: z.string().describe("Job ID of a completed SOM training job"),
|
|
248
|
-
},
|
|
249
|
-
_meta: { ui: { resourceUri: SOM_EXPLORER_URI } },
|
|
250
|
-
}, async ({ job_id }) => {
|
|
251
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
252
|
-
const summary = (data.summary ?? {});
|
|
253
|
-
const content = [];
|
|
254
|
-
content.push({
|
|
255
|
-
type: "text",
|
|
256
|
-
text: JSON.stringify({
|
|
257
|
-
job_id,
|
|
258
|
-
summary,
|
|
259
|
-
download_urls: data.download_urls,
|
|
260
|
-
}),
|
|
261
|
-
});
|
|
262
|
-
const imgExt = summary.output_format ?? "pdf";
|
|
263
|
-
await tryAttachImage(content, job_id, `combined.${imgExt}`);
|
|
264
|
-
return { content };
|
|
265
|
-
});
|
|
266
|
-
// ---- datasets ----
|
|
267
|
-
server.tool("datasets", `Manage datasets: upload, preview, subset, or delete.
|
|
268
|
-
|
|
269
|
-
action=upload: Upload a CSV for SOM analysis. Prefer file_path over csv_data so the MCP reads the file directly. Returns dataset ID and metadata. Then use datasets(action=preview) before train_som.
|
|
270
|
-
action=preview: Show columns, stats, sample rows, cyclic/datetime detections. ALWAYS preview before train_som on an unfamiliar dataset.
|
|
271
|
-
action=subset: Create a new dataset from a subset of an existing one. Requires name and at least one of row_range or filters.
|
|
272
|
-
- row_range: [start, end] 1-based inclusive (e.g. [1, 2000] for first 2000 rows)
|
|
273
|
-
- filters: array of conditions, ALL must match (AND logic). Each: { column, op, value }.
|
|
274
|
-
Operators:
|
|
275
|
-
eq — exact match (string or number): { column: "region", op: "eq", value: "Europe" }
|
|
276
|
-
ne — not equal: { column: "status", op: "ne", value: "error" }
|
|
277
|
-
in — value in set: { column: "grade", op: "in", value: ["A", "B"] }
|
|
278
|
-
gt/lt — above/below threshold: { column: "temp", op: "gt", value: 20 }
|
|
279
|
-
gte/lte — at or above/below: { column: "price", op: "gte", value: 100 }
|
|
280
|
-
between — closed interval [lo, hi]: { column: "age", op: "between", value: [18, 65] }
|
|
281
|
-
- Combine row_range + filters to slice both rows and values.
|
|
282
|
-
- Single filter object is also accepted (auto-wrapped).
|
|
283
|
-
action=delete: Remove a dataset and free the slot.
|
|
284
|
-
|
|
285
|
-
BEST FOR: Tabular numeric data. CSV with header required. Use list(type=datasets) to see existing datasets.`, {
|
|
286
|
-
action: z
|
|
287
|
-
.enum(["upload", "preview", "subset", "delete"])
|
|
288
|
-
.describe("upload: add a CSV; preview: inspect before training; subset: create subset dataset; delete: remove dataset"),
|
|
289
|
-
name: z.string().optional().describe("Dataset name (required for action=upload and subset)"),
|
|
290
|
-
file_path: z.string().optional().describe("Path to local CSV (for upload; prefer over csv_data)"),
|
|
291
|
-
csv_data: z.string().optional().describe("Inline CSV string (for upload; use for small data)"),
|
|
292
|
-
dataset_id: z.string().optional().describe("Dataset ID (required for preview, subset, and delete)"),
|
|
293
|
-
n_rows: z.number().int().optional().default(5).describe("Sample rows to return (preview only)"),
|
|
294
|
-
row_range: z
|
|
295
|
-
.tuple([z.number().int(), z.number().int()])
|
|
296
|
-
.optional()
|
|
297
|
-
.describe("For subset: [start, end] 1-based inclusive row range (e.g. [1, 2000])"),
|
|
298
|
-
filters: z.preprocess((v) => {
|
|
299
|
-
if (v === undefined || v === null)
|
|
300
|
-
return v;
|
|
301
|
-
if (Array.isArray(v))
|
|
302
|
-
return v;
|
|
303
|
-
if (typeof v === "object" && v !== null && "column" in v)
|
|
304
|
-
return [v];
|
|
305
|
-
return v;
|
|
306
|
-
}, z
|
|
307
|
-
.array(z.object({
|
|
308
|
-
column: z.string(),
|
|
309
|
-
op: z.enum(["eq", "ne", "in", "gt", "lt", "gte", "lte", "between"]),
|
|
310
|
-
value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
|
|
311
|
-
}))
|
|
312
|
-
.optional()
|
|
313
|
-
.describe("For subset: filter conditions (AND logic). Single object or array. " +
|
|
314
|
-
"ops: eq, ne, in, gt, lt, gte, lte, between. " +
|
|
315
|
-
"Examples: { column: 'temp', op: 'between', value: [15, 30] }, { column: 'region', op: 'eq', value: 'Europe' }")),
|
|
316
|
-
filter: z
|
|
317
|
-
.object({
|
|
318
|
-
column: z.string(),
|
|
319
|
-
op: z.enum(["eq", "ne", "in", "gt", "lt", "gte", "lte", "between"]),
|
|
320
|
-
value: z.union([z.string(), z.number(), z.array(z.union([z.string(), z.number()]))]),
|
|
321
|
-
})
|
|
322
|
-
.optional()
|
|
323
|
-
.describe("Deprecated — use filters instead. Single filter condition."),
|
|
324
|
-
}, async ({ action, name, file_path, csv_data, dataset_id, n_rows, row_range, filters, filter }) => {
|
|
325
|
-
if (action === "upload") {
|
|
326
|
-
if (!name)
|
|
327
|
-
throw new Error("datasets(upload) requires name");
|
|
328
|
-
let body;
|
|
329
|
-
if (file_path) {
|
|
330
|
-
const resolved = path.resolve(file_path);
|
|
331
|
-
try {
|
|
332
|
-
body = await fs.readFile(resolved, "utf-8");
|
|
333
|
-
}
|
|
334
|
-
catch (err) {
|
|
335
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
336
|
-
throw new Error(`Cannot read file "${resolved}": ${msg}`);
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
else if (csv_data && csv_data.length > 0) {
|
|
340
|
-
body = csv_data;
|
|
341
|
-
}
|
|
342
|
-
else {
|
|
343
|
-
throw new Error("datasets(upload) requires file_path or csv_data");
|
|
344
|
-
}
|
|
345
|
-
const data = await apiCall("POST", "/v1/datasets", body, {
|
|
346
|
-
"X-Dataset-Name": name,
|
|
347
|
-
"Content-Type": "text/csv",
|
|
348
|
-
});
|
|
349
|
-
return textResult(data);
|
|
350
|
-
}
|
|
351
|
-
if (action === "preview") {
|
|
352
|
-
if (!dataset_id)
|
|
353
|
-
throw new Error("datasets(preview) requires dataset_id");
|
|
354
|
-
const data = (await apiCall("GET", `/v1/datasets/${dataset_id}/preview?n_rows=${n_rows ?? 5}`));
|
|
355
|
-
const cols = data.columns ?? [];
|
|
356
|
-
const stats = data.column_stats ?? [];
|
|
357
|
-
const hints = data.cyclic_hints ?? [];
|
|
358
|
-
const samples = data.sample_rows ?? [];
|
|
359
|
-
const dtCols = data.datetime_columns ?? [];
|
|
360
|
-
const temporalSugg = data.temporal_suggestions ?? [];
|
|
361
|
-
const fmt = (v) => v === null || v === undefined ? "—" : Number(v).toFixed(3);
|
|
362
|
-
const lines = [
|
|
363
|
-
`Dataset: ${data.name} (${data.dataset_id})`,
|
|
364
|
-
`${data.total_rows} rows × ${data.total_cols} columns`,
|
|
365
|
-
``,
|
|
366
|
-
`Column Statistics:`,
|
|
367
|
-
`| Column | Min | Max | Mean | Std | Nulls | Numeric |`,
|
|
368
|
-
`|--------|-----|-----|------|-----|-------|---------|`,
|
|
369
|
-
];
|
|
370
|
-
for (const s of stats) {
|
|
371
|
-
lines.push(`| ${s.column} | ${fmt(s.min)} | ${fmt(s.max)} | ${fmt(s.mean)} | ${fmt(s.std)} | ${s.null_count ?? 0} | ${s.is_numeric !== false ? "yes" : "no"} |`);
|
|
372
|
-
}
|
|
373
|
-
if (hints.length > 0) {
|
|
374
|
-
lines.push(``, `Detected Cyclic Feature Hints:`);
|
|
375
|
-
for (const h of hints) {
|
|
376
|
-
lines.push(` • ${h.column} — period=${h.period} (${h.reason})`);
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
if (dtCols.length > 0) {
|
|
380
|
-
lines.push(``, `Detected Datetime Columns:`);
|
|
381
|
-
for (const dc of dtCols) {
|
|
382
|
-
const formats = dc.detected_formats ?? [];
|
|
383
|
-
const fmtStrs = formats
|
|
384
|
-
.map((f) => `${f.format} — ${f.description} (${(f.match_rate * 100).toFixed(0)}% match)`)
|
|
385
|
-
.join("; ");
|
|
386
|
-
lines.push(` • ${dc.column}: sample="${dc.sample}" → ${fmtStrs}`);
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
if (temporalSugg.length > 0) {
|
|
390
|
-
lines.push(``, `Temporal Feature Suggestions (require user approval):`);
|
|
391
|
-
for (const ts of temporalSugg) {
|
|
392
|
-
lines.push(` • Columns: ${ts.columns.join(" + ")} → format: "${ts.format}"`);
|
|
393
|
-
lines.push(` Available components: ${ts.available_components.join(", ")}`);
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
if (samples.length > 0) {
|
|
397
|
-
lines.push(``, `Sample Rows (first ${samples.length}):`);
|
|
398
|
-
lines.push(`| ${cols.join(" | ")} |`);
|
|
399
|
-
lines.push(`| ${cols.map(() => "---").join(" | ")} |`);
|
|
400
|
-
for (const row of samples) {
|
|
401
|
-
lines.push(`| ${cols.map((c) => String(row[c] ?? "")).join(" | ")} |`);
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
405
|
-
}
|
|
406
|
-
if (action === "subset") {
|
|
407
|
-
if (!dataset_id)
|
|
408
|
-
throw new Error("datasets(subset) requires dataset_id");
|
|
409
|
-
if (!name)
|
|
410
|
-
throw new Error("datasets(subset) requires name");
|
|
411
|
-
const allFilters = filters ?? (filter ? [filter] : undefined);
|
|
412
|
-
if (row_range === undefined && allFilters === undefined) {
|
|
413
|
-
throw new Error("datasets(subset) requires at least one of row_range or filters");
|
|
414
|
-
}
|
|
415
|
-
const body = { name };
|
|
416
|
-
if (row_range !== undefined)
|
|
417
|
-
body.row_range = row_range;
|
|
418
|
-
if (allFilters !== undefined)
|
|
419
|
-
body.filters = allFilters;
|
|
420
|
-
const data = await apiCall("POST", `/v1/datasets/${dataset_id}/subset`, JSON.stringify(body), {
|
|
421
|
-
"Content-Type": "application/json",
|
|
422
|
-
});
|
|
423
|
-
return textResult(data);
|
|
424
|
-
}
|
|
425
|
-
if (action === "delete") {
|
|
426
|
-
if (!dataset_id)
|
|
427
|
-
throw new Error("datasets(delete) requires dataset_id");
|
|
428
|
-
const data = await apiCall("DELETE", `/v1/datasets/${dataset_id}`);
|
|
429
|
-
return textResult(data);
|
|
430
|
-
}
|
|
431
|
-
throw new Error("Invalid action");
|
|
432
|
-
});
|
|
433
|
-
// ---- train_som ----
|
|
434
|
-
server.tool("train_som", `Train a Self-Organizing Map on the dataset. Returns a job_id for polling.
|
|
435
|
-
|
|
436
|
-
BEST FOR: Exploratory analysis of multivariate numeric data — clustering, regime
|
|
437
|
-
detection, process monitoring, anomaly visualization, dimensionality reduction.
|
|
438
|
-
NOT FOR: Time-series forecasting, classification, or text/image data.
|
|
439
|
-
|
|
440
|
-
TIMING (8700 samples, CPU):
|
|
441
|
-
- 10x10 grid, 10 epochs: ~30s
|
|
442
|
-
- 20x20 grid, 30 epochs: ~3–5 min
|
|
443
|
-
- 40x40 grid, 60 epochs: ~15–30 min
|
|
444
|
-
GPU cuts these by 3–5x. Check license_info for available resources.
|
|
445
|
-
|
|
446
|
-
BEFORE calling, ask the user:
|
|
447
|
-
1. Which columns to include? (use 'columns' to restrict — start with 5–10 most relevant)
|
|
448
|
-
2. Any cyclic features? (hour=24, weekday=7, angle=360 → cyclic_features)
|
|
449
|
-
3. Any skewed columns? (suggest transforms: log, sqrt, rank)
|
|
450
|
-
4. Feature weights? (weight > 1 emphasizes, 0 disables)
|
|
451
|
-
5. Quick exploration or refined map?
|
|
452
|
-
|
|
453
|
-
PRESET TABLE:
|
|
454
|
-
| preset | grid | epochs | batch_size |
|
|
455
|
-
| quick | 15x15 | [15, 5] | 48 |
|
|
456
|
-
| standard | 25x25 | [30, 15] | 48 |
|
|
457
|
-
| refined | 40x40 | [50, 25] | 32 |
|
|
458
|
-
| high_res | 60x60 | [60, 40] | 32 |
|
|
459
|
-
|
|
460
|
-
TRAINING PHASES:
|
|
461
|
-
- Ordering: large neighborhoods → global structure. sigma_f controls end-radius (default 1.0).
|
|
462
|
-
- Convergence: small neighborhoods → fine-tuning. Skip with epochs=[N, 0].
|
|
463
|
-
- sigma_f 0.5–0.7 → sharper clusters. sigma_f 1.0–2.0 → smoother transitions.
|
|
464
|
-
|
|
465
|
-
TRANSFORMS: Per-column preprocessing before normalization.
|
|
466
|
-
transforms: {revenue: "log", volume: "log1p", pressure: "sqrt"}
|
|
467
|
-
Suggest when datasets(action=preview) shows large value ranges or right-skewed distributions.
|
|
468
|
-
|
|
469
|
-
TEMPORAL FEATURES: NEVER auto-apply. Always ask which components to extract.
|
|
470
|
-
temporal_features: [{columns: ['Date'], format: 'dd.mm.yyyy', extract: ['day_of_year'], cyclic: true}]
|
|
471
|
-
|
|
472
|
-
MODEL TYPES: SOM (default), SOM-SOFT (uncertainty), RSOM (time-series), RSOM-SOFT (both).
|
|
473
|
-
|
|
474
|
-
COMMON MISTAKES:
|
|
475
|
-
- Too many features without column selection dilutes the map. Start with 5–10.
|
|
476
|
-
- Forgetting cyclic encoding for periodic variables (hours, angles) causes topology artifacts.
|
|
477
|
-
- Grid too small for the data → high QE. Grid too large → sparse nodes. Use sqrt(5*sqrt(N)).
|
|
478
|
-
- Not log-transforming skewed columns → a few outliers dominate the normalization.
|
|
479
|
-
- Using default batch_size for quality-sensitive work: set batch_size=32–64 for sharper maps.
|
|
480
|
-
- Skipping convergence phase: ordering alone gives rough structure; convergence refines it.
|
|
481
|
-
- Not checking get_job_export(export="training_log"): if QE is still dropping, add more epochs.
|
|
482
|
-
|
|
483
|
-
QUALITY TARGETS: QE < 1.5 good, TE < 0.05 good, explained variance > 0.8 good.
|
|
484
|
-
If QE > 2 → more epochs or larger grid. If TE > 0.15 → larger grid or periodic=true.
|
|
485
|
-
|
|
486
|
-
OUTPUT: default format pdf, default colormap coolwarm. Override with output_format (png/pdf/svg), output_dpi (standard/retina/print), colormap (e.g. viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral).
|
|
487
|
-
|
|
488
|
-
After training, use get_results → analyze(clusters) → component_planes → feature_correlation.
|
|
489
|
-
See docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.`, {
|
|
490
|
-
dataset_id: z.string().describe("Dataset ID from datasets(action=upload) or list(type=datasets)"),
|
|
491
|
-
preset: z
|
|
492
|
-
.enum(["quick", "standard", "refined", "high_res"])
|
|
493
|
-
.optional()
|
|
494
|
-
.describe("Training preset — sets sensible defaults for grid, epochs, and batch_size. " +
|
|
495
|
-
"Explicit params override preset values. " +
|
|
496
|
-
"quick: 15×15, [15,5], batch=48. " +
|
|
497
|
-
"standard: 25×25, [30,15], batch=48, best with GPU. " +
|
|
498
|
-
"refined: 40×40, [50,25], batch=32, best with GPU. " +
|
|
499
|
-
"high_res: 60×60, [60,40], batch=32, best with GPU."),
|
|
500
|
-
grid_x: z
|
|
501
|
-
.number()
|
|
502
|
-
.int()
|
|
503
|
-
.optional()
|
|
504
|
-
.describe("Grid width (omit for auto from data size)"),
|
|
505
|
-
grid_y: z
|
|
506
|
-
.number()
|
|
507
|
-
.int()
|
|
508
|
-
.optional()
|
|
509
|
-
.describe("Grid height (omit for auto from data size)"),
|
|
510
|
-
epochs: z.preprocess((v) => {
|
|
511
|
-
if (v === undefined || v === null)
|
|
512
|
-
return v;
|
|
513
|
-
if (typeof v === "string") {
|
|
514
|
-
const n = parseInt(v, 10);
|
|
515
|
-
if (!Number.isNaN(n))
|
|
516
|
-
return n;
|
|
517
|
-
const m = v.match(/^\[\s*(\d+)\s*,\s*(\d+)\s*\]$/);
|
|
518
|
-
if (m)
|
|
519
|
-
return [parseInt(m[1], 10), parseInt(m[2], 10)];
|
|
520
|
-
}
|
|
521
|
-
return v;
|
|
522
|
-
}, z
|
|
523
|
-
.union([z.number().int(), z.array(z.number().int()).length(2)])
|
|
524
|
-
.optional()
|
|
525
|
-
.describe("epochs: integer or [ordering, convergence] array, not a string. Example: 40 or [40, 20]. Set convergence=0 to skip phase 2 (e.g. [15, 0]).")),
|
|
526
|
-
model: z
|
|
527
|
-
.enum(["SOM", "RSOM", "SOM-SOFT", "RSOM-SOFT"])
|
|
528
|
-
.optional()
|
|
529
|
-
.default("SOM")
|
|
530
|
-
.describe("SOM model type. SOM=standard, SOM-SOFT=GTM-style soft responsibilities, RSOM=recurrent (time-series), RSOM-SOFT=recurrent+soft."),
|
|
531
|
-
periodic: z
|
|
532
|
-
.boolean()
|
|
533
|
-
.optional()
|
|
534
|
-
.default(true)
|
|
535
|
-
.describe("Use periodic (toroidal) boundaries"),
|
|
536
|
-
columns: z
|
|
537
|
-
.array(z.string())
|
|
538
|
-
.optional()
|
|
539
|
-
.describe("Subset of CSV column names to train on. Omit to use all columns. Useful to exclude irrelevant features."),
|
|
540
|
-
cyclic_features: z
|
|
541
|
-
.array(z.object({
|
|
542
|
-
feature: z.string().describe("Column name (e.g., 'weekday')"),
|
|
543
|
-
period: z
|
|
544
|
-
.number()
|
|
545
|
-
.describe("Period (e.g., 7 for weekday, 24 for hour, 360 for angle)"),
|
|
546
|
-
}))
|
|
547
|
-
.optional()
|
|
548
|
-
.describe("Features to encode as cyclic (cos, sin) pairs"),
|
|
549
|
-
temporal_features: z
|
|
550
|
-
.array(z.object({
|
|
551
|
-
columns: z
|
|
552
|
-
.array(z.string())
|
|
553
|
-
.describe("Column name(s) containing datetime strings, combined in order (e.g. ['Date', 'Time'])"),
|
|
554
|
-
format: z
|
|
555
|
-
.string()
|
|
556
|
-
.describe("Julia Dates format string from the whitelist (e.g. 'dd.mm.yyyy HH:MM'). Must match the combined column values."),
|
|
557
|
-
extract: z
|
|
558
|
-
.array(z.enum([
|
|
559
|
-
"hour_of_day",
|
|
560
|
-
"day_of_year",
|
|
561
|
-
"month",
|
|
562
|
-
"day_of_week",
|
|
563
|
-
"minute_of_hour",
|
|
564
|
-
]))
|
|
565
|
-
.describe("Which temporal components to extract"),
|
|
566
|
-
cyclic: z
|
|
567
|
-
.boolean()
|
|
568
|
-
.default(true)
|
|
569
|
-
.describe("Encode extracted components as cyclic sin/cos pairs (default true)"),
|
|
570
|
-
separator: z
|
|
571
|
-
.string()
|
|
572
|
-
.optional()
|
|
573
|
-
.describe("Separator when combining multiple columns (default ' '). Use 'T' for ISO 8601."),
|
|
574
|
-
}))
|
|
575
|
-
.optional()
|
|
576
|
-
.describe("Temporal feature extraction from datetime columns. Parses dates/times and extracts components. NEVER add this without user approval."),
|
|
577
|
-
feature_weights: z
|
|
578
|
-
.record(z.number())
|
|
579
|
-
.optional()
|
|
580
|
-
.describe("Per-feature importance weights as {column_name: weight}. Applied after normalization (column *= sqrt(weight)). weight=0 disables, >1 emphasizes, <1 de-emphasizes. Cyclic shorthand: {'day_of_year': 2.0} auto-expands to both _cos and _sin."),
|
|
581
|
-
transforms: z
|
|
582
|
-
.record(z.enum([
|
|
583
|
-
"log",
|
|
584
|
-
"log1p",
|
|
585
|
-
"log10",
|
|
586
|
-
"sqrt",
|
|
587
|
-
"square",
|
|
588
|
-
"abs",
|
|
589
|
-
"invert",
|
|
590
|
-
"rank",
|
|
591
|
-
"none",
|
|
592
|
-
]))
|
|
593
|
-
.optional()
|
|
594
|
-
.describe("Per-column preprocessing applied BEFORE normalization. Example: {revenue: 'log', pressure: 'sqrt'}. " +
|
|
595
|
-
"'log' = natural log (fails on <=0), 'log1p' = log(1+x) (safe for zeros), " +
|
|
596
|
-
"'sqrt' = square root, 'rank' = replace with rank order, 'invert' = 1/x. " +
|
|
597
|
-
"Suggest log/log1p for right-skewed distributions (prices, volumes, counts)."),
|
|
598
|
-
normalize: z
|
|
599
|
-
.union([z.enum(["all", "auto"]), z.array(z.string())])
|
|
600
|
-
.optional()
|
|
601
|
-
.default("auto")
|
|
602
|
-
.describe("Normalization mode. 'auto' skips already-cyclic features."),
|
|
603
|
-
sigma_f: z.preprocess((v) => {
|
|
604
|
-
if (v === undefined || v === null)
|
|
605
|
-
return v;
|
|
606
|
-
if (typeof v === "string") {
|
|
607
|
-
const n = parseFloat(v);
|
|
608
|
-
if (!Number.isNaN(n))
|
|
609
|
-
return n;
|
|
610
|
-
}
|
|
611
|
-
return v;
|
|
612
|
-
}, z
|
|
613
|
-
.number()
|
|
614
|
-
.optional()
|
|
615
|
-
.describe("Final neighborhood radius at end of ordering phase (default 1.0). Lower values (0.5–0.7) produce sharper cluster boundaries.")),
|
|
616
|
-
learning_rate: z.preprocess((v) => {
|
|
617
|
-
if (v === undefined || v === null)
|
|
618
|
-
return v;
|
|
619
|
-
if (typeof v === "string") {
|
|
620
|
-
const n = parseFloat(v);
|
|
621
|
-
if (!Number.isNaN(n))
|
|
622
|
-
return n;
|
|
623
|
-
}
|
|
624
|
-
return v;
|
|
625
|
-
}, z
|
|
626
|
-
.union([
|
|
627
|
-
z.number(),
|
|
628
|
-
z.object({
|
|
629
|
-
ordering: z.tuple([z.number(), z.number()]),
|
|
630
|
-
convergence: z.tuple([z.number(), z.number()]),
|
|
631
|
-
}),
|
|
632
|
-
])
|
|
633
|
-
.optional()
|
|
634
|
-
.describe("Learning rate control. Number = sets ordering final rate (e.g. 0.05). Object = full control: {ordering: [eta_0, eta_f], convergence: [eta_0, eta_f]}. Default: ordering 0.1→0.01, convergence 0.01→0.001.")),
|
|
635
|
-
batch_size: z
|
|
636
|
-
.number()
|
|
637
|
-
.int()
|
|
638
|
-
.optional()
|
|
639
|
-
.describe("Training batch size (default: auto ≈ n_samples/10, max 256). Smaller batches (e.g. 32–64) often sharpen features and can improve map quality (QE, explained variance) at the cost of more steps per epoch. Larger batches = faster epochs but coarser updates; try 64–256 for large datasets (>10k samples)."),
|
|
640
|
-
backend: z
|
|
641
|
-
.enum(["auto", "cpu", "cuda", "cuda_graphs"])
|
|
642
|
-
.optional()
|
|
643
|
-
.default("auto")
|
|
644
|
-
.describe("Compute backend. 'auto' uses CUDA if GPU is available (recommended). 'cpu' forces CPU. 'cuda_graphs' uses CUDA graph capture for maximum GPU throughput."),
|
|
645
|
-
output_format: z
|
|
646
|
-
.enum(["png", "pdf", "svg"])
|
|
647
|
-
.optional()
|
|
648
|
-
.default("pdf")
|
|
649
|
-
.describe("Image output format. PDF (default) for publication-quality vector graphics, PNG for quick viewing, SVG for web embedding."),
|
|
650
|
-
output_dpi: z
|
|
651
|
-
.enum(["standard", "retina", "print"])
|
|
652
|
-
.optional()
|
|
653
|
-
.default("retina")
|
|
654
|
-
.describe("Resolution for PNG output: standard (1x), retina (2x, default), print (4x). Ignored for PDF/SVG."),
|
|
655
|
-
colormap: z
|
|
656
|
-
.string()
|
|
657
|
-
.optional()
|
|
658
|
-
.describe("Override default colormap (coolwarm) for component planes and hit histogram. Examples: viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix always uses grays, cyclic features use twilight."),
|
|
659
|
-
row_range: z
|
|
660
|
-
.tuple([z.number().int().min(1), z.number().int().min(1)])
|
|
661
|
-
.optional()
|
|
662
|
-
.describe("Train on a subset of rows only: [start, end] 1-based inclusive. Alternative to creating a subset dataset with datasets(action=subset)."),
|
|
663
|
-
}, async ({ dataset_id, preset, grid_x, grid_y, epochs, model, periodic, columns, transforms, cyclic_features, temporal_features, feature_weights, normalize, sigma_f, learning_rate, batch_size, backend, output_format, output_dpi, colormap, row_range, }) => {
|
|
664
|
-
const PRESETS = {
|
|
665
|
-
quick: { grid: [15, 15], epochs: [15, 5], batch_size: 48 },
|
|
666
|
-
standard: { grid: [25, 25], epochs: [30, 15], batch_size: 48, backend: "cuda" },
|
|
667
|
-
refined: { grid: [40, 40], epochs: [50, 25], batch_size: 32, backend: "cuda" },
|
|
668
|
-
high_res: { grid: [60, 60], epochs: [60, 40], batch_size: 32, backend: "cuda" },
|
|
669
|
-
};
|
|
670
|
-
const p = preset ? PRESETS[preset] : undefined;
|
|
671
|
-
const params = {
|
|
672
|
-
model,
|
|
673
|
-
periodic,
|
|
674
|
-
normalize,
|
|
675
|
-
};
|
|
676
|
-
if (grid_x !== undefined && grid_y !== undefined) {
|
|
677
|
-
params.grid = [grid_x, grid_y];
|
|
678
|
-
}
|
|
679
|
-
else if (p) {
|
|
680
|
-
params.grid = p.grid;
|
|
681
|
-
}
|
|
682
|
-
if (epochs !== undefined) {
|
|
683
|
-
params.epochs = epochs;
|
|
684
|
-
}
|
|
685
|
-
else if (p) {
|
|
686
|
-
params.epochs = p.epochs;
|
|
687
|
-
}
|
|
688
|
-
if (cyclic_features && cyclic_features.length > 0) {
|
|
689
|
-
params.cyclic_features = cyclic_features;
|
|
690
|
-
}
|
|
691
|
-
if (columns && columns.length > 0) {
|
|
692
|
-
params.columns = columns;
|
|
693
|
-
}
|
|
694
|
-
if (transforms && Object.keys(transforms).length > 0) {
|
|
695
|
-
params.transforms = transforms;
|
|
696
|
-
}
|
|
697
|
-
if (temporal_features && temporal_features.length > 0) {
|
|
698
|
-
params.temporal_features = temporal_features;
|
|
699
|
-
}
|
|
700
|
-
if (feature_weights && Object.keys(feature_weights).length > 0) {
|
|
701
|
-
params.feature_weights = feature_weights;
|
|
702
|
-
}
|
|
703
|
-
if (sigma_f !== undefined) {
|
|
704
|
-
params.sigma_f = sigma_f;
|
|
705
|
-
}
|
|
706
|
-
if (learning_rate !== undefined) {
|
|
707
|
-
params.learning_rate = learning_rate;
|
|
708
|
-
}
|
|
709
|
-
if (batch_size !== undefined) {
|
|
710
|
-
params.batch_size = batch_size;
|
|
711
|
-
}
|
|
712
|
-
else if (p) {
|
|
713
|
-
params.batch_size = p.batch_size;
|
|
714
|
-
}
|
|
715
|
-
if (backend !== undefined && backend !== "auto") {
|
|
716
|
-
params.backend = backend;
|
|
717
|
-
}
|
|
718
|
-
else if (p?.backend) {
|
|
719
|
-
params.backend = p.backend;
|
|
720
|
-
}
|
|
721
|
-
params.output_format = output_format ?? "pdf";
|
|
722
|
-
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
723
|
-
if (output_dpi && output_dpi !== "retina") {
|
|
724
|
-
params.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
725
|
-
}
|
|
726
|
-
if (colormap) {
|
|
727
|
-
params.colormap = colormap;
|
|
728
|
-
}
|
|
729
|
-
if (row_range && row_range.length >= 2 && row_range[0] <= row_range[1]) {
|
|
730
|
-
params.row_range = row_range;
|
|
731
|
-
}
|
|
732
|
-
const data = (await apiCall("POST", "/v1/jobs", {
|
|
733
|
-
dataset_id,
|
|
734
|
-
params,
|
|
735
|
-
}));
|
|
736
|
-
try {
|
|
737
|
-
const sys = (await apiCall("GET", "/v1/system/info"));
|
|
738
|
-
const p = Number(sys.status?.pending_jobs ?? sys.pending_jobs ?? 0);
|
|
739
|
-
const totalEta = Number(sys.training_time_estimates_seconds?.total ??
|
|
740
|
-
(sys.gpu_available ? 45 : 120));
|
|
741
|
-
// Simple calculation: wait time is jobs ahead of us * time per job
|
|
742
|
-
const waitMinutes = Math.round((p * totalEta) / 60);
|
|
743
|
-
if (waitMinutes > 1) {
|
|
744
|
-
data.estimated_wait_minutes = waitMinutes;
|
|
745
|
-
data.message = `Job submitted. You are #${p + 1} in queue. Estimated wait before start: ~${waitMinutes} min.`;
|
|
746
|
-
}
|
|
747
|
-
else {
|
|
748
|
-
data.message = "Job submitted. Should start momentarily.";
|
|
749
|
-
}
|
|
750
|
-
}
|
|
751
|
-
catch (e) {
|
|
752
|
-
// Ignore system info errors
|
|
753
|
-
}
|
|
754
|
-
return textResult(data);
|
|
755
|
-
});
|
|
756
|
-
// ---- get_job_status ----
|
|
757
|
-
server.tool("get_job_status", `Check status and progress of a training or analysis job.
|
|
758
|
-
|
|
759
|
-
TIMING: Poll every 3–5s for small jobs, every 10–15s for large grids.
|
|
760
|
-
Typical completion times (CPU, 8700 samples):
|
|
761
|
-
10x10, 10 epochs: ~30s | 20x20, 30 epochs: ~3–5 min | 40x40, 60 epochs: ~15–30 min
|
|
762
|
-
|
|
763
|
-
When status is 'completed', call get_results to retrieve the map and metrics.
|
|
764
|
-
When status is 'failed', show the error to the user and suggest parameter adjustments.`, {
|
|
765
|
-
job_id: z.string().describe("Job ID from train_som"),
|
|
766
|
-
}, async ({ job_id }) => {
|
|
767
|
-
const data = (await apiCall("GET", `/v1/jobs/${job_id}`));
|
|
768
|
-
const status = data.status;
|
|
769
|
-
const progress = (data.progress ?? 0) * 100;
|
|
770
|
-
const label = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
771
|
-
const jobDesc = label ? `Job ${label} (id: ${job_id})` : `Job ${job_id}`;
|
|
772
|
-
let text = `${jobDesc}: ${status} (${progress.toFixed(1)}%)`;
|
|
773
|
-
if (status === "completed") {
|
|
774
|
-
text += ` | Results ready. Use get_results(job_id="${job_id}") to retrieve.`;
|
|
775
|
-
}
|
|
776
|
-
else if (status === "failed") {
|
|
777
|
-
text += ` | Error: ${data.error ?? "unknown"}`;
|
|
778
|
-
}
|
|
779
|
-
return { content: [{ type: "text", text }] };
|
|
780
|
-
});
|
|
781
|
-
/** Resolve get_results figures param to list of image filenames to fetch. */
|
|
782
|
-
function getResultsImagesToFetch(jobType, summary, figures, includeIndividual) {
|
|
783
|
-
const ext = summary.output_format ?? "pdf";
|
|
784
|
-
if (jobType === "transition_flow") {
|
|
785
|
-
const lag = summary.lag ?? 1;
|
|
786
|
-
return [`transition_flow_lag${lag}.${ext}`];
|
|
787
|
-
}
|
|
788
|
-
if (jobType === "project_variable") {
|
|
789
|
-
const varName = summary.variable_name ?? "variable";
|
|
790
|
-
const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
|
|
791
|
-
return [`projected_${safe}.${ext}`];
|
|
792
|
-
}
|
|
793
|
-
if (jobType === "derive_variable") {
|
|
794
|
-
const varName = summary.variable_name ?? "variable";
|
|
795
|
-
const safe = String(varName).replace(/[^a-zA-Z0-9_]/g, "_");
|
|
796
|
-
return [`projected_${safe}.${ext}`];
|
|
797
|
-
}
|
|
798
|
-
// train_som
|
|
799
|
-
const features = summary.features ?? [];
|
|
800
|
-
const combinedName = `combined.${ext}`;
|
|
801
|
-
const umatrixName = `umatrix.${ext}`;
|
|
802
|
-
const hitHistName = `hit_histogram.${ext}`;
|
|
803
|
-
const correlationName = `correlation.${ext}`;
|
|
804
|
-
const componentNames = features.map((f, i) => `component_${i + 1}_${f.replace(/[^a-zA-Z0-9_]/g, "_")}.${ext}`);
|
|
805
|
-
const allList = [combinedName, umatrixName, hitHistName, correlationName, ...componentNames];
|
|
806
|
-
if (figures === undefined || figures === "default") {
|
|
807
|
-
return includeIndividual ? allList : [combinedName];
|
|
808
|
-
}
|
|
809
|
-
if (figures === "combined_only")
|
|
810
|
-
return [combinedName];
|
|
811
|
-
if (figures === "all")
|
|
812
|
-
return allList;
|
|
813
|
-
if (Array.isArray(figures)) {
|
|
814
|
-
const nameToFile = {
|
|
815
|
-
combined: combinedName,
|
|
816
|
-
umatrix: umatrixName,
|
|
817
|
-
hit_histogram: hitHistName,
|
|
818
|
-
correlation: correlationName,
|
|
819
|
-
};
|
|
820
|
-
features.forEach((_, i) => {
|
|
821
|
-
nameToFile[`component_${i + 1}`] = componentNames[i];
|
|
822
|
-
});
|
|
823
|
-
return figures
|
|
824
|
-
.map((key) => {
|
|
825
|
-
const k = key.trim().toLowerCase();
|
|
826
|
-
if (nameToFile[k])
|
|
827
|
-
return nameToFile[k];
|
|
828
|
-
if (key.includes("."))
|
|
829
|
-
return key;
|
|
830
|
-
return null;
|
|
831
|
-
})
|
|
832
|
-
.filter((f) => f != null);
|
|
833
|
-
}
|
|
834
|
-
return [combinedName];
|
|
835
|
-
}
|
|
836
|
-
// ---- get_results ----
|
|
837
|
-
server.tool("get_results", `Retrieve results of a completed SOM training, projection, or derived variable job.
|
|
838
|
-
|
|
839
|
-
BEST FOR: Getting the first look at a trained SOM — combined visualization + quality metrics.
|
|
840
|
-
TIMING: Near-instant (reads pre-computed results from S3).
|
|
841
|
-
|
|
842
|
-
Returns: text summary with metrics and inline images (combined view and all plots shown directly in chat).
|
|
843
|
-
|
|
844
|
-
DOWNLOAD LINKS: Links to API-domain or presigned URLs may not work when clicked (MCP holds the API key, not the browser). Images are inlined. For weights use get_job_export(export="weights"); for node stats use get_job_export(export="nodes"). If the user wants to save a file, offer to fetch via the appropriate tool.
|
|
845
|
-
|
|
846
|
-
OPTIONS:
|
|
847
|
-
- figures: request specific plots only. Omit for default (combined only; or all if include_individual=true).
|
|
848
|
-
- "combined_only": only the combined view.
|
|
849
|
-
- "all": combined + umatrix + hit_histogram + all component planes.
|
|
850
|
-
- Array of logical names: e.g. figures: ["umatrix"] for just the U-matrix, or figures: ["combined","hit_histogram","correlation"] or ["combined","umatrix","component_1","component_2"]. Logical names: combined, umatrix, hit_histogram, correlation, component_1, component_2, ... (component_N = Nth feature). Pass an array to fetch/save only those figures.
|
|
851
|
-
- include_individual=true: when figures is omitted, shows each component plane, U-matrix, and hit histogram
|
|
852
|
-
as separate inline images. Ignored when figures is set.
|
|
853
|
-
|
|
854
|
-
AFTER showing results, guide the user:
|
|
855
|
-
1. "The U-matrix shows [N] distinct regions. Does this match expected groupings?"
|
|
856
|
-
2. "QE=X, TE=Y — [assessment]. Would you like to retrain with different params?"
|
|
857
|
-
3. "Which features show interesting patterns in the component planes?"
|
|
858
|
-
4. If QE > 2: suggest more epochs or larger grid
|
|
859
|
-
5. If TE > 0.15: suggest larger grid
|
|
860
|
-
6. If explained variance < 0.7: suggest transforms, feature selection, or more training
|
|
861
|
-
|
|
862
|
-
WORKFLOW: get_results → analyze(clusters) → component_planes → feature_correlation.
|
|
863
|
-
Request specific figures with get_results(job_id, figures=[...]) (e.g. figures: ["umatrix"] or figures: ["combined","hit_histogram"]) or run analyze(job_id, analysis_type) for a single view.
|
|
864
|
-
Use get_job_export(export="training_log") for the learning curve (QE vs epoch — healthy=steady decline then plateau).
|
|
865
|
-
Use analyze(job_id, "quality_report") for extended metrics (trustworthiness, neighborhood preservation).
|
|
866
|
-
|
|
867
|
-
METRIC INTERPRETATION:
|
|
868
|
-
- QE < 1.5: good fit. QE > 2: consider more epochs, larger grid, or batch_size=32.
|
|
869
|
-
- TE < 0.05: good topology. TE > 0.15: grid too small.
|
|
870
|
-
- Explained variance > 0.8: good. < 0.7: try transforms, fewer features, or more training.`, {
|
|
871
|
-
job_id: z.string().describe("Job ID of a completed job"),
|
|
872
|
-
figures: z
|
|
873
|
-
.union([
|
|
874
|
-
z.enum(["default", "combined_only", "all"]),
|
|
875
|
-
z.array(z.string()),
|
|
876
|
-
])
|
|
877
|
-
.optional()
|
|
878
|
-
.describe("Which figures to return. Omit or 'default' for combined only (or all if include_individual=true). 'combined_only': just combined view. 'all': combined + umatrix + hit_histogram + correlation + all component planes. Or array of logical names to fetch only those: combined, umatrix, hit_histogram, correlation, component_1, component_2, ..."),
|
|
879
|
-
include_individual: z
|
|
880
|
-
.boolean()
|
|
881
|
-
.optional()
|
|
882
|
-
.default(false)
|
|
883
|
-
.describe("If true and figures is omitted, inline each individual plot (component planes, u-matrix, hit histogram). Ignored when figures is set."),
|
|
884
|
-
}, async ({ job_id, figures, include_individual }) => {
|
|
885
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
886
|
-
const summary = (data.summary ?? {});
|
|
887
|
-
const downloadUrls = (data.download_urls ?? {});
|
|
888
|
-
const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
889
|
-
const resultsHeader = jobLabel
|
|
890
|
-
? `Results for ${jobLabel} (job_id: ${job_id})`
|
|
891
|
-
: `Results for job_id: ${job_id}`;
|
|
892
|
-
const content = [];
|
|
893
|
-
const inlinedImages = new Set();
|
|
894
|
-
const jobType = summary.job_type ?? "train_som";
|
|
895
|
-
// ── Dispatch by job type ──────────────────────────────────────────────────
|
|
896
|
-
const fmtExt = summary.output_format ?? "pdf";
|
|
897
|
-
if (jobType === "transition_flow") {
|
|
898
|
-
const lag = summary.lag ?? 1;
|
|
899
|
-
const flowImg = `transition_flow_lag${lag}.${fmtExt}`;
|
|
900
|
-
const stats = summary.flow_stats ?? {};
|
|
901
|
-
content.push({
|
|
902
|
-
type: "text",
|
|
903
|
-
text: [
|
|
904
|
-
`Transition Flow ${resultsHeader}`,
|
|
905
|
-
`Parent SOM: ${summary.parent_job_id ?? "N/A"} | Lag: ${lag} | Samples: ${summary.n_samples ?? 0}`,
|
|
906
|
-
``,
|
|
907
|
-
`Flow Statistics:`,
|
|
908
|
-
` Mean flow magnitude: ${stats.mean_magnitude !== undefined ? Number(stats.mean_magnitude).toFixed(4) : "N/A"}`,
|
|
909
|
-
` Max flow magnitude: ${stats.max_magnitude !== undefined ? Number(stats.max_magnitude).toFixed(4) : "N/A"}`,
|
|
910
|
-
` Nodes with flow: ${stats.n_nodes_with_flow ?? "N/A"}`,
|
|
911
|
-
``,
|
|
912
|
-
`Arrows show net directional drift between consecutive BMU assignments.`,
|
|
913
|
-
`Long/bright arrows = frequent state transitions. Short arrows = stable states.`,
|
|
914
|
-
`Background = U-matrix (cluster boundaries). Arrows in dark regions = intra-cluster.`,
|
|
915
|
-
``,
|
|
916
|
-
`Use transition_flow(lag=N) with larger N to reveal longer-term temporal structure.`,
|
|
917
|
-
].join("\n"),
|
|
918
|
-
});
|
|
919
|
-
for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
|
|
920
|
-
await tryAttachImage(content, job_id, name);
|
|
921
|
-
inlinedImages.add(name);
|
|
922
|
-
}
|
|
923
|
-
}
|
|
924
|
-
else if (jobType === "project_variable") {
|
|
925
|
-
const varName = summary.variable_name ?? "variable";
|
|
926
|
-
const agg = summary.aggregation ?? "mean";
|
|
927
|
-
const stats = summary.variable_stats ?? {};
|
|
928
|
-
const projImg = `projected_${varName}.${fmtExt}`;
|
|
929
|
-
content.push({
|
|
930
|
-
type: "text",
|
|
931
|
-
text: [
|
|
932
|
-
`Projected Variable: ${varName} (${agg}) — ${resultsHeader}`,
|
|
933
|
-
`Parent SOM: ${summary.parent_job_id ?? "N/A"} | Samples: ${summary.n_samples ?? 0}`,
|
|
934
|
-
``,
|
|
935
|
-
`Variable Statistics (per-node ${agg}):`,
|
|
936
|
-
` Min: ${stats.min !== undefined ? Number(stats.min).toFixed(3) : "N/A"}`,
|
|
937
|
-
` Max: ${stats.max !== undefined ? Number(stats.max).toFixed(3) : "N/A"}`,
|
|
938
|
-
` Mean: ${stats.mean !== undefined ? Number(stats.mean).toFixed(3) : "N/A"}`,
|
|
939
|
-
` Nodes with data: ${stats.n_nodes_with_data ?? "N/A"} / ${(Number(stats.n_nodes_with_data ?? 0) + Number(stats.n_nodes_empty ?? 0))}`,
|
|
940
|
-
``,
|
|
941
|
-
`Non-random spatial patterns indicate the variable correlates with the SOM's`,
|
|
942
|
-
`learned feature space, even if it wasn't used in training.`,
|
|
943
|
-
].join("\n"),
|
|
944
|
-
});
|
|
945
|
-
for (const name of getResultsImagesToFetch(jobType, summary, figures, include_individual)) {
|
|
946
|
-
await tryAttachImage(content, job_id, name);
|
|
947
|
-
inlinedImages.add(name);
|
|
948
|
-
}
|
|
949
|
-
}
|
|
950
|
-
else {
|
|
951
|
-
// ── Default: train_som results ──────────────────────────────────────────
|
|
952
|
-
const grid = summary.grid ?? [0, 0];
|
|
953
|
-
const features = summary.features ?? [];
|
|
954
|
-
const epochs = summary.epochs;
|
|
955
|
-
const epochStr = Array.isArray(epochs)
|
|
956
|
-
? epochs[1] === 0
|
|
957
|
-
? `${epochs[0]} ordering only`
|
|
958
|
-
: `${epochs[0]} ordering + ${epochs[1]} convergence`
|
|
959
|
-
: String(epochs ?? "N/A");
|
|
960
|
-
const fmt = (v) => v !== null && v !== undefined ? Number(v).toFixed(4) : "N/A";
|
|
961
|
-
const duration = summary.training_duration_seconds;
|
|
962
|
-
const ordErrors = summary.ordering_errors;
|
|
963
|
-
const textSummary = [
|
|
964
|
-
`SOM Training ${resultsHeader}`,
|
|
965
|
-
`Grid: ${grid[0]}×${grid[1]} | Features: ${summary.n_features ?? 0} | Samples: ${summary.n_samples ?? 0}`,
|
|
966
|
-
`Model: ${summary.model ?? "SOM"} | Epochs: ${epochStr}`,
|
|
967
|
-
`Periodic: ${summary.periodic ?? true} | Normalize: ${summary.normalize ?? "auto"}`,
|
|
968
|
-
summary.sigma_f !== undefined ? `Sigma_f: ${summary.sigma_f}` : "",
|
|
969
|
-
duration !== undefined ? `Training duration: ${duration}s` : "",
|
|
970
|
-
``,
|
|
971
|
-
`Quality Metrics:`,
|
|
972
|
-
` Quantization Error: ${fmt(summary.quantization_error)} (lower is better)`,
|
|
973
|
-
` Topographic Error: ${fmt(summary.topographic_error)} (lower is better, <0.1 is good)`,
|
|
974
|
-
` Explained Variance: ${fmt(summary.explained_variance)} (higher is better, >0.7 is good)`,
|
|
975
|
-
` Silhouette Score: ${fmt(summary.silhouette)} (higher is better, [-1, 1])`,
|
|
976
|
-
` Davies-Bouldin: ${fmt(summary.davies_bouldin)} (lower is better)`,
|
|
977
|
-
` Calinski-Harabasz: ${fmt(summary.calinski_harabasz)} (higher is better)`,
|
|
978
|
-
ordErrors && ordErrors.length > 0
|
|
979
|
-
? ` Final ordering QE: ${ordErrors.at(-1)?.toFixed(4)} (use get_job_export(export="training_log") for full curve)`
|
|
980
|
-
: "",
|
|
981
|
-
``,
|
|
982
|
-
`Features: ${features.join(", ")}`,
|
|
983
|
-
summary.selected_columns
|
|
984
|
-
? `Selected columns: ${summary.selected_columns.join(", ")}`
|
|
985
|
-
: "",
|
|
986
|
-
summary.transforms
|
|
987
|
-
? `Transforms: ${Object.entries(summary.transforms).map(([k, v]) => `${k}=${v}`).join(", ")}`
|
|
988
|
-
: "",
|
|
989
|
-
``,
|
|
990
|
-
`Use analyze() for deeper insights and quality_report; get_job_export(export="training_log") for learning curves.`,
|
|
991
|
-
]
|
|
992
|
-
.filter((l) => l !== "")
|
|
993
|
-
.join("\n");
|
|
994
|
-
content.push({ type: "text", text: textSummary });
|
|
995
|
-
const imgExt = summary.output_format ?? "pdf";
|
|
996
|
-
const imagesToFetch = getResultsImagesToFetch(jobType, summary, figures, include_individual);
|
|
997
|
-
for (const name of imagesToFetch) {
|
|
998
|
-
await tryAttachImage(content, job_id, name);
|
|
999
|
-
inlinedImages.add(name);
|
|
1000
|
-
}
|
|
1001
|
-
}
|
|
1002
|
-
// Inline remaining image files; for JSON provide tool hints (no clickable URLs — auth required)
|
|
1003
|
-
const files = summary.files ?? [];
|
|
1004
|
-
const isImage = (f) => f.endsWith(".png") || f.endsWith(".svg") || f.endsWith(".pdf");
|
|
1005
|
-
for (const fname of files) {
|
|
1006
|
-
if (isImage(fname) && !inlinedImages.has(fname)) {
|
|
1007
|
-
await tryAttachImage(content, job_id, fname);
|
|
1008
|
-
}
|
|
1009
|
-
else if (fname.endsWith(".json")) {
|
|
1010
|
-
const hint = fname === "weights.json"
|
|
1011
|
-
? `Use get_job_export(export="weights") for full weight matrix including node_coords.`
|
|
1012
|
-
: fname === "node_stats.json"
|
|
1013
|
-
? `Use get_job_export(export="nodes") for per-node statistics.`
|
|
1014
|
-
: fname === "summary.json"
|
|
1015
|
-
? null
|
|
1016
|
-
: `Use get_job_export for structured data (weights or nodes).`;
|
|
1017
|
-
if (hint) {
|
|
1018
|
-
content.push({ type: "text", text: `${fname}: ${hint}` });
|
|
1019
|
-
}
|
|
1020
|
-
}
|
|
1021
|
-
}
|
|
1022
|
-
// List available artifacts so the LLM can offer to fetch specific views
|
|
1023
|
-
if (files.length > 0) {
|
|
1024
|
-
const features = summary.features ?? [];
|
|
1025
|
-
const logicalNames = jobType === "train_som" || jobType === "render_variant"
|
|
1026
|
-
? `Logical names: combined, umatrix, hit_histogram, correlation, ${features.map((_, i) => `component_${i + 1}`).join(", ")}. `
|
|
1027
|
-
: "";
|
|
1028
|
-
content.push({
|
|
1029
|
-
type: "text",
|
|
1030
|
-
text: `Available to fetch individually: ${files.join(", ")}. ${logicalNames}Use get_results(job_id, figures=[...]) to request specific plots, get_results(job_id, include_individual=true) or figures="all" to inline all plots, or analyze(job_id, analysis_type) for a specific view (u_matrix, component_planes, bmu_hits, clusters, quality_report, etc.).`,
|
|
1031
|
-
});
|
|
1032
|
-
}
|
|
1033
|
-
return { content };
|
|
1034
|
-
});
|
|
1035
|
-
// ---- recolor_som ----
|
|
1036
|
-
server.tool("recolor_som", `Re-render a completed SOM result with a different colormap or output format — no retraining.
|
|
1037
|
-
|
|
1038
|
-
Use when the user wants to see the same combined (or other) plot with another color scheme (e.g. plasma, inferno, coolwarm). You can also use this to re-export figures in a different format (e.g. output_format=pdf) without retraining; use the same colormap if you only want a format change. Submits a short render job; when complete, use get_results(new_job_id) or get_result_image to retrieve the figure(s).
|
|
1039
|
-
|
|
1040
|
-
Colormaps (default: coolwarm): e.g. viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix and cyclic panels keep fixed colormaps (grays, twilight).`, {
|
|
1041
|
-
job_id: z.string().describe("Job ID of a completed SOM training job (parent)"),
|
|
1042
|
-
colormap: z.string().describe("Colormap name (default: coolwarm). E.g. viridis, plasma, inferno, magma, coolwarm)"),
|
|
1043
|
-
figures: z
|
|
1044
|
-
.array(z.string())
|
|
1045
|
-
.optional()
|
|
1046
|
-
.default(["combined"])
|
|
1047
|
-
.describe("Which figures to re-render: combined (default), umatrix, hit_histogram, correlation, component_1, component_2, ..."),
|
|
1048
|
-
output_format: z.enum(["png", "pdf", "svg"]).optional().default("pdf"),
|
|
1049
|
-
output_dpi: z.number().int().min(1).max(4).optional().default(2),
|
|
1050
|
-
}, async ({ job_id, colormap, figures, output_format, output_dpi }) => {
|
|
1051
|
-
const body = { colormap, figures, output_format, output_dpi };
|
|
1052
|
-
const data = (await apiCall("POST", `/v1/results/${job_id}/render`, JSON.stringify(body), {
|
|
1053
|
-
"Content-Type": "application/json",
|
|
1054
|
-
}));
|
|
1055
|
-
const newJobId = data.id;
|
|
1056
|
-
const content = [
|
|
1057
|
-
{
|
|
1058
|
-
type: "text",
|
|
1059
|
-
text: [
|
|
1060
|
-
`Re-render job submitted with colormap "${colormap}".`,
|
|
1061
|
-
`New job_id: ${newJobId}. Poll get_job_status(job_id="${newJobId}") until status is 'completed', then use get_results(job_id="${newJobId}") or get_result_image to retrieve the recolored plot(s). No retraining was performed.`,
|
|
1062
|
-
].join("\n"),
|
|
1063
|
-
},
|
|
1064
|
-
];
|
|
1065
|
-
return { content };
|
|
1066
|
-
});
|
|
1067
|
-
// ---- download_results ----
|
|
1068
|
-
server.tool("download_results", `Save result figures (and optionally JSON) to a folder on disk. Use so the user can open, share, or version files locally without writing their own download script.
|
|
1069
|
-
|
|
1070
|
-
folder: path to the directory (e.g. "." for current/workspace, "./results", or absolute path). When folder is a generic path like "." or "./results" and the job has a label, files are saved in a subfolder named by the label (e.g. ./results/Winedata_a1b2c3_badger_thong_oil). You can also pass a path that already includes the label.
|
|
1071
|
-
figures: "all" (default) = all image files from the job; "images" = same; or pass an array of filenames to save only those (e.g. ["combined.pdf", "umatrix.pdf", "correlation.pdf"]). Default export format is PDF.
|
|
1072
|
-
include_json: if true, also save summary.json (and other JSON artifacts) into the same folder.`, {
|
|
1073
|
-
job_id: z.string().describe("Job ID of a completed job"),
|
|
1074
|
-
folder: z.string().describe("Directory path to save files (e.g. '.' or './results'). When the job has a label, a subfolder with that label may be used. Relative paths are relative to process cwd (usually project root)."),
|
|
1075
|
-
figures: z
|
|
1076
|
-
.union([z.enum(["all", "images"]), z.array(z.string())])
|
|
1077
|
-
.optional()
|
|
1078
|
-
.default("all")
|
|
1079
|
-
.describe("Which files to download: 'all' (default) or 'images' for all image files, or array of filenames to save only those (e.g. ['combined.pdf', 'umatrix.pdf', 'correlation.pdf'])."),
|
|
1080
|
-
include_json: z.boolean().optional().default(false).describe("If true, also download summary.json and other JSON files"),
|
|
1081
|
-
}, async ({ job_id, folder, figures, include_json }) => {
|
|
1082
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
1083
|
-
const summary = (data.summary ?? {});
|
|
1084
|
-
const jobLabel = data.label != null && data.label !== "" ? String(data.label) : null;
|
|
1085
|
-
const files = summary.files ?? [];
|
|
1086
|
-
const isImage = (f) => f.endsWith(".png") || f.endsWith(".svg") || f.endsWith(".pdf");
|
|
1087
|
-
const isJson = (f) => f.endsWith(".json");
|
|
1088
|
-
let toDownload;
|
|
1089
|
-
if (figures === "all" || figures === "images") {
|
|
1090
|
-
toDownload = include_json ? files : files.filter(isImage);
|
|
1091
|
-
}
|
|
1092
|
-
else {
|
|
1093
|
-
toDownload = figures;
|
|
1094
|
-
if (include_json && !toDownload.includes("summary.json")) {
|
|
1095
|
-
toDownload = [...toDownload, "summary.json"];
|
|
1096
|
-
}
|
|
1097
|
-
}
|
|
1098
|
-
let resolvedDir = path.resolve(folder);
|
|
1099
|
-
const useLabelSubfolder = jobLabel &&
|
|
1100
|
-
(folder === "." || folder === "./results" || folder === "results");
|
|
1101
|
-
if (useLabelSubfolder) {
|
|
1102
|
-
resolvedDir = path.join(resolvedDir, jobLabel);
|
|
1103
|
-
}
|
|
1104
|
-
await fs.mkdir(resolvedDir, { recursive: true });
|
|
1105
|
-
const saved = [];
|
|
1106
|
-
for (const filename of toDownload) {
|
|
1107
|
-
try {
|
|
1108
|
-
const { data: buf } = await apiRawCall(`/v1/results/${job_id}/image/${filename}`);
|
|
1109
|
-
const outPath = path.join(resolvedDir, filename);
|
|
1110
|
-
await fs.writeFile(outPath, buf);
|
|
1111
|
-
saved.push(filename);
|
|
1112
|
-
}
|
|
1113
|
-
catch {
|
|
1114
|
-
// Skip missing or failed files
|
|
1115
|
-
}
|
|
1116
|
-
}
|
|
1117
|
-
const text = saved.length > 0
|
|
1118
|
-
? `Saved ${saved.length} file(s) to ${resolvedDir}: ${saved.join(", ")}`
|
|
1119
|
-
: `No files saved (job may have no matching files or download failed). Check job_id and that the job is completed.`;
|
|
1120
|
-
return { content: [{ type: "text", text }] };
|
|
1121
|
-
});
|
|
1122
|
-
// ---- analyze ----
|
|
1123
|
-
server.tool("analyze", `Run a specific analysis on SOM results. Use after get_results to drill into aspects.
|
|
1124
|
-
Request specific plots: get_results(job_id, figures=[...]) for chosen figures (e.g. figures: ["umatrix"]) or analyze(job_id, analysis_type) for a single analysis view.
|
|
1125
|
-
|
|
1126
|
-
Available analysis types and when to use them:
|
|
1127
|
-
|
|
1128
|
-
u_matrix — Cluster boundary map. Ask: "Do boundary regions match expected
|
|
1129
|
-
groupings? How many clusters do you see?"
|
|
1130
|
-
component_planes — Per-feature distributions. Ask: "Which features show similar
|
|
1131
|
-
spatial patterns? Those are correlated in your data."
|
|
1132
|
-
bmu_hits — Data density per node. Ask: "Are there dense hot-spots?
|
|
1133
|
-
Sparse regions may be interpolated or rare states."
|
|
1134
|
-
clusters — Quality assessment with recommendations. Always run this after
|
|
1135
|
-
get_results to decide whether to retrain.
|
|
1136
|
-
feature_importance — Rank features by SOM contribution (component plane variance).
|
|
1137
|
-
Ask: "Does this ranking match your domain knowledge?"
|
|
1138
|
-
feature_correlation — Compare all component planes side-by-side for correlation.
|
|
1139
|
-
Ask: "Which pairs of features move together? Are any redundant?"
|
|
1140
|
-
transition_flow — Temporal BMU transition patterns (best for time-series data).
|
|
1141
|
-
Ask: "Do you see cyclic or directional patterns in the transitions?"
|
|
1142
|
-
local_density — Identify cluster cores vs boundaries. Ask: "Where are the
|
|
1143
|
-
high-density regions? Do they correspond to known operating modes?"
|
|
1144
|
-
feature_gradient — Spatial rate of change per feature. Ask: "Where does this
|
|
1145
|
-
feature change most rapidly? Does it align with cluster boundaries?"
|
|
1146
|
-
quality_report — Comprehensive quality report: QE, TE, silhouette, trustworthiness,
|
|
1147
|
-
neighborhood preservation, topographic product, and recommendations.
|
|
1148
|
-
|
|
1149
|
-
WORKFLOW RECOMMENDATION:
|
|
1150
|
-
1. Start with clusters → check quality metrics and recommendations
|
|
1151
|
-
2. Then u_matrix → identify cluster boundaries (dark=cores, bright=boundaries)
|
|
1152
|
-
3. Then component_planes or feature_importance → understand feature roles
|
|
1153
|
-
(similar spatial patterns in component planes = correlated features)
|
|
1154
|
-
4. Then feature_correlation → find redundant or linked features
|
|
1155
|
-
5. For time-series: add transition_flow and local_density
|
|
1156
|
-
|
|
1157
|
-
INTERPRETATION TIPS:
|
|
1158
|
-
- U-matrix: count dark "basins" to estimate cluster count; bright bands are boundaries.
|
|
1159
|
-
- Component planes: compare side-by-side; similar patterns mean correlated features.
|
|
1160
|
-
- BMU hits: empty nodes suggest oversized grid or data gaps.
|
|
1161
|
-
- If quality is low, retrain with: larger grid, more epochs, smaller batch_size, or transforms.`, {
|
|
1162
|
-
job_id: z.string().describe("Job ID of a completed job"),
|
|
1163
|
-
analysis_type: z
|
|
1164
|
-
.enum([
|
|
1165
|
-
"u_matrix",
|
|
1166
|
-
"component_planes",
|
|
1167
|
-
"bmu_hits",
|
|
1168
|
-
"clusters",
|
|
1169
|
-
"feature_importance",
|
|
1170
|
-
"feature_correlation",
|
|
1171
|
-
"transition_flow",
|
|
1172
|
-
"local_density",
|
|
1173
|
-
"feature_gradient",
|
|
1174
|
-
"quality_report",
|
|
1175
|
-
])
|
|
1176
|
-
.describe("Type of analysis to run"),
|
|
1177
|
-
params: z
|
|
1178
|
-
.record(z.unknown())
|
|
1179
|
-
.optional()
|
|
1180
|
-
.describe("Analysis-specific parameters. For component_planes/feature_gradient: {features: [col,...]} to restrict to specific columns."),
|
|
1181
|
-
}, async ({ job_id, analysis_type, params: extraParams }) => {
|
|
1182
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}`));
|
|
1183
|
-
const summary = (data.summary ?? {});
|
|
1184
|
-
const features = summary.features ?? [];
|
|
1185
|
-
const grid = summary.grid ?? [0, 0];
|
|
1186
|
-
const ext = summary.output_format ?? "pdf";
|
|
1187
|
-
const content = [];
|
|
1188
|
-
if (analysis_type === "u_matrix") {
|
|
1189
|
-
content.push({
|
|
1190
|
-
type: "text",
|
|
1191
|
-
text: [
|
|
1192
|
-
`U-Matrix Analysis (job: ${job_id})`,
|
|
1193
|
-
`Grid: ${grid[0]}×${grid[1]}`,
|
|
1194
|
-
``,
|
|
1195
|
-
`The U-matrix shows average distances between neighboring nodes.`,
|
|
1196
|
-
` High values (bright/white) = cluster boundaries`,
|
|
1197
|
-
` Low values (dark) = cluster cores`,
|
|
1198
|
-
``,
|
|
1199
|
-
`What to look for:`,
|
|
1200
|
-
` - Dark islands separated by bright ridges = distinct clusters`,
|
|
1201
|
-
` - Gradual transitions = continuous variation, no hard boundaries`,
|
|
1202
|
-
` - Uniform brightness = poorly organized map (try more epochs)`,
|
|
1203
|
-
].join("\n"),
|
|
1204
|
-
});
|
|
1205
|
-
await tryAttachImage(content, job_id, `umatrix.${ext}`);
|
|
1206
|
-
}
|
|
1207
|
-
else if (analysis_type === "component_planes") {
|
|
1208
|
-
const requested = extraParams?.features ?? features;
|
|
1209
|
-
content.push({
|
|
1210
|
-
type: "text",
|
|
1211
|
-
text: [
|
|
1212
|
-
`Component Planes (job: ${job_id})`,
|
|
1213
|
-
`Features: ${requested.join(", ")}`,
|
|
1214
|
-
``,
|
|
1215
|
-
`Each panel shows one feature's distribution across the SOM.`,
|
|
1216
|
-
` Similar color patterns = correlated features`,
|
|
1217
|
-
` Inverse patterns = negatively correlated features`,
|
|
1218
|
-
` Unique patterns = independent structure drivers`,
|
|
1219
|
-
].join("\n"),
|
|
1220
|
-
});
|
|
1221
|
-
for (let i = 0; i < features.length; i++) {
|
|
1222
|
-
if (!requested.includes(features[i]))
|
|
1223
|
-
continue;
|
|
1224
|
-
const safeName = features[i].replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1225
|
-
await tryAttachImage(content, job_id, `component_${i + 1}_${safeName}.${ext}`);
|
|
1226
|
-
}
|
|
1227
|
-
}
|
|
1228
|
-
else if (analysis_type === "bmu_hits") {
|
|
1229
|
-
content.push({
|
|
1230
|
-
type: "text",
|
|
1231
|
-
text: [
|
|
1232
|
-
`BMU Hit Histogram (job: ${job_id})`,
|
|
1233
|
-
`Grid: ${grid[0]}×${grid[1]} | Samples: ${summary.n_samples ?? 0}`,
|
|
1234
|
-
``,
|
|
1235
|
-
`Shows data density per SOM node.`,
|
|
1236
|
-
` Large values (yellow/bright) = dense data regions (common operating states)`,
|
|
1237
|
-
` Zero/low (dark purple) = sparse or interpolated areas`,
|
|
1238
|
-
``,
|
|
1239
|
-
`Cross-reference with U-matrix: dense nodes inside dark U-matrix regions`,
|
|
1240
|
-
`indicate well-populated cluster cores.`,
|
|
1241
|
-
].join("\n"),
|
|
1242
|
-
});
|
|
1243
|
-
await tryAttachImage(content, job_id, `hit_histogram.${ext}`);
|
|
1244
|
-
}
|
|
1245
|
-
else if (analysis_type === "clusters") {
|
|
1246
|
-
const qe = summary.quantization_error;
|
|
1247
|
-
const te = summary.topographic_error;
|
|
1248
|
-
const ev = summary.explained_variance;
|
|
1249
|
-
const sil = summary.silhouette;
|
|
1250
|
-
const qeLabel = qe === undefined ? "N/A" :
|
|
1251
|
-
qe < 0.5 ? "excellent" :
|
|
1252
|
-
qe < 1.0 ? "good" :
|
|
1253
|
-
qe < 2.0 ? "fair" : "poor";
|
|
1254
|
-
const teLabel = te === undefined ? "N/A" :
|
|
1255
|
-
te < 0.05 ? "excellent" :
|
|
1256
|
-
te < 0.10 ? "good" :
|
|
1257
|
-
te < 0.20 ? "fair" : "poor";
|
|
1258
|
-
const fmt = (v) => v !== undefined ? v.toFixed(4) : "N/A";
|
|
1259
|
-
const recommendations = [];
|
|
1260
|
-
if (te !== undefined && te > 0.15) {
|
|
1261
|
-
recommendations.push(`Topographic error ${(te * 100).toFixed(1)}% is high — try a larger grid or more epochs.`);
|
|
1262
|
-
}
|
|
1263
|
-
if (qe !== undefined && qe > 2.0) {
|
|
1264
|
-
recommendations.push(`Quantization error ${qe.toFixed(3)} is high — try more epochs, a larger grid, or check for outliers.`);
|
|
1265
|
-
}
|
|
1266
|
-
if (ev !== undefined && ev < 0.7) {
|
|
1267
|
-
recommendations.push(`Explained variance ${(ev * 100).toFixed(1)}% is low — try more epochs, a larger grid, or feature weighting.`);
|
|
1268
|
-
}
|
|
1269
|
-
if (sil !== undefined && sil < 0.1) {
|
|
1270
|
-
recommendations.push(`Low silhouette score — clusters overlap. Try sigma_f=0.5 or more training.`);
|
|
1271
|
-
}
|
|
1272
|
-
if (recommendations.length === 0) {
|
|
1273
|
-
recommendations.push(`Metrics look healthy. Proceed with component plane and feature analysis.`);
|
|
1274
|
-
}
|
|
1275
|
-
const gridStr = grid[0] > 0 ? `${grid[0]}×${grid[1]}` : "N/A";
|
|
1276
|
-
content.push({
|
|
1277
|
-
type: "text",
|
|
1278
|
-
text: [
|
|
1279
|
-
`Cluster Quality Assessment (job: ${job_id})`,
|
|
1280
|
-
`Grid: ${gridStr} | Features: ${features.length} | Samples: ${summary.n_samples ?? "N/A"}`,
|
|
1281
|
-
``,
|
|
1282
|
-
`Quantization Error: ${fmt(qe)} (${qeLabel})`,
|
|
1283
|
-
`Topographic Error: ${fmt(te)} (${teLabel})`,
|
|
1284
|
-
`Explained Variance: ${fmt(ev)}`,
|
|
1285
|
-
`Silhouette Score: ${fmt(sil)}`,
|
|
1286
|
-
`Davies-Bouldin: ${fmt(summary.davies_bouldin)}`,
|
|
1287
|
-
`Calinski-Harabasz: ${fmt(summary.calinski_harabasz)}`,
|
|
1288
|
-
``,
|
|
1289
|
-
`Recommendations:`,
|
|
1290
|
-
...recommendations.map((r) => ` - ${r}`),
|
|
1291
|
-
].join("\n"),
|
|
1292
|
-
});
|
|
1293
|
-
}
|
|
1294
|
-
else if (analysis_type === "feature_importance") {
|
|
1295
|
-
content.push({
|
|
1296
|
-
type: "text",
|
|
1297
|
-
text: [
|
|
1298
|
-
`Feature Importance Analysis (job: ${job_id})`,
|
|
1299
|
-
`Grid: ${grid[0]}×${grid[1]} | Features: ${features.length}`,
|
|
1300
|
-
``,
|
|
1301
|
-
`Feature importance is determined by the variance of each component plane.`,
|
|
1302
|
-
`Higher variance = feature contributes more to the SOM structure.`,
|
|
1303
|
-
``,
|
|
1304
|
-
`Features analyzed: ${features.join(", ")}`,
|
|
1305
|
-
``,
|
|
1306
|
-
`Compare the component planes visually: features with the most varied`,
|
|
1307
|
-
`color gradients are the primary drivers of the cluster structure.`,
|
|
1308
|
-
`Features with near-uniform color contribute little to differentiation.`,
|
|
1309
|
-
].join("\n"),
|
|
1310
|
-
});
|
|
1311
|
-
await tryAttachImage(content, job_id, `combined.${ext}`);
|
|
1312
|
-
}
|
|
1313
|
-
else if (analysis_type === "feature_correlation") {
|
|
1314
|
-
content.push({
|
|
1315
|
-
type: "text",
|
|
1316
|
-
text: [
|
|
1317
|
-
`Feature Correlation Analysis (job: ${job_id})`,
|
|
1318
|
-
`Features: ${features.join(", ")}`,
|
|
1319
|
-
``,
|
|
1320
|
-
`Compare component planes side-by-side to identify correlated features.`,
|
|
1321
|
-
` Similar spatial patterns = positively correlated`,
|
|
1322
|
-
` Inverse/mirrored patterns = negatively correlated`,
|
|
1323
|
-
` Unrelated patterns = independent features`,
|
|
1324
|
-
``,
|
|
1325
|
-
`Correlated features may be redundant — consider disabling one via feature_weights: {col: 0}.`,
|
|
1326
|
-
].join("\n"),
|
|
1327
|
-
});
|
|
1328
|
-
for (let i = 0; i < features.length; i++) {
|
|
1329
|
-
const safeName = features[i].replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1330
|
-
await tryAttachImage(content, job_id, `component_${i + 1}_${safeName}.${ext}`);
|
|
1331
|
-
}
|
|
1332
|
-
}
|
|
1333
|
-
else if (analysis_type === "transition_flow") {
|
|
1334
|
-
content.push({
|
|
1335
|
-
type: "text",
|
|
1336
|
-
text: [
|
|
1337
|
-
`Transition Flow Analysis (job: ${job_id})`,
|
|
1338
|
-
`Grid: ${grid[0]}×${grid[1]} | Samples: ${summary.n_samples ?? 0}`,
|
|
1339
|
-
``,
|
|
1340
|
-
`Transition flow shows how data points move between SOM nodes in sequence.`,
|
|
1341
|
-
`This reveals temporal patterns and state machine behavior.`,
|
|
1342
|
-
``,
|
|
1343
|
-
`What to look for:`,
|
|
1344
|
-
` - Dense arrow clusters = frequent state transitions (common paths)`,
|
|
1345
|
-
` - Circular/cyclic flows = periodic behavior (daily/seasonal cycles)`,
|
|
1346
|
-
` - Long-range transitions = regime changes or anomalies`,
|
|
1347
|
-
``,
|
|
1348
|
-
`Note: Full transition flow arrows require server-side support (planned).`,
|
|
1349
|
-
`Currently showing U-matrix for cluster boundary context.`,
|
|
1350
|
-
].join("\n"),
|
|
1351
|
-
});
|
|
1352
|
-
await tryAttachImage(content, job_id, `umatrix.${ext}`);
|
|
1353
|
-
}
|
|
1354
|
-
else if (analysis_type === "local_density") {
|
|
1355
|
-
content.push({
|
|
1356
|
-
type: "text",
|
|
1357
|
-
text: [
|
|
1358
|
-
`Local Density & Cluster Analysis (job: ${job_id})`,
|
|
1359
|
-
`Grid: ${grid[0]}×${grid[1]} | Samples: ${summary.n_samples ?? 0}`,
|
|
1360
|
-
``,
|
|
1361
|
-
`Local density = inverse of U-matrix values.`,
|
|
1362
|
-
` High density (low U-matrix) = cluster cores (similar neighbors)`,
|
|
1363
|
-
` Low density (high U-matrix) = cluster boundaries (dissimilar neighbors)`,
|
|
1364
|
-
``,
|
|
1365
|
-
`Cross-reference hit histogram with U-matrix:`,
|
|
1366
|
-
` Dense hits + low U-matrix = populated cluster core (dominant operating mode)`,
|
|
1367
|
-
` Dense hits + high U-matrix = transition zone with many samples (worth investigating)`,
|
|
1368
|
-
` Sparse hits anywhere = rare state or interpolated region`,
|
|
1369
|
-
].join("\n"),
|
|
1370
|
-
});
|
|
1371
|
-
await tryAttachImage(content, job_id, `umatrix.${ext}`);
|
|
1372
|
-
await tryAttachImage(content, job_id, `hit_histogram.${ext}`);
|
|
1373
|
-
}
|
|
1374
|
-
else if (analysis_type === "feature_gradient") {
|
|
1375
|
-
const targetFeature = extraParams?.feature;
|
|
1376
|
-
content.push({
|
|
1377
|
-
type: "text",
|
|
1378
|
-
text: [
|
|
1379
|
-
`Feature Gradient Analysis (job: ${job_id})`,
|
|
1380
|
-
`Target: ${targetFeature ?? "all features"}`,
|
|
1381
|
-
`Grid: ${grid[0]}×${grid[1]}`,
|
|
1382
|
-
``,
|
|
1383
|
-
`Feature gradients show where each feature changes most rapidly on the SOM.`,
|
|
1384
|
-
` High gradient = feature transitions rapidly (boundary region for this feature)`,
|
|
1385
|
-
` Low gradient = feature is stable across this region`,
|
|
1386
|
-
``,
|
|
1387
|
-
`Compare with U-matrix: if feature gradients align with U-matrix boundaries,`,
|
|
1388
|
-
`this feature is a key driver of the cluster separation.`,
|
|
1389
|
-
].join("\n"),
|
|
1390
|
-
});
|
|
1391
|
-
if (targetFeature) {
|
|
1392
|
-
const idx = features.indexOf(targetFeature);
|
|
1393
|
-
if (idx >= 0) {
|
|
1394
|
-
const safeName = targetFeature.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1395
|
-
await tryAttachImage(content, job_id, `component_${idx + 1}_${safeName}.${ext}`);
|
|
1396
|
-
}
|
|
1397
|
-
}
|
|
1398
|
-
else {
|
|
1399
|
-
await tryAttachImage(content, job_id, `combined.${ext}`);
|
|
1400
|
-
}
|
|
1401
|
-
await tryAttachImage(content, job_id, `umatrix.${ext}`);
|
|
1402
|
-
}
|
|
1403
|
-
else if (analysis_type === "quality_report") {
|
|
1404
|
-
const qrData = (await apiCall("GET", `/v1/results/${job_id}/quality-report`));
|
|
1405
|
-
const std = qrData.standard_metrics ?? {};
|
|
1406
|
-
const clust = qrData.cluster_metrics ?? {};
|
|
1407
|
-
const topo = qrData.topology_metrics ?? {};
|
|
1408
|
-
const train = qrData.training ?? {};
|
|
1409
|
-
const qrGrid = qrData.grid ?? [0, 0];
|
|
1410
|
-
const fmt = (v) => v !== null && v !== undefined ? v.toFixed(4) : "—";
|
|
1411
|
-
const fmtPct = (v) => v !== null && v !== undefined ? `${(v * 100).toFixed(1)}%` : "—";
|
|
1412
|
-
const recommendations = [];
|
|
1413
|
-
const qe = std.quantization_error;
|
|
1414
|
-
const te = std.topographic_error;
|
|
1415
|
-
const ev = std.explained_variance;
|
|
1416
|
-
const sil = clust.silhouette;
|
|
1417
|
-
const trust = topo.trustworthiness;
|
|
1418
|
-
if (qe !== null && qe !== undefined && qe > 2.0)
|
|
1419
|
-
recommendations.push("QE is high → try more epochs or a larger grid");
|
|
1420
|
-
if (te !== null && te !== undefined && te > 0.15)
|
|
1421
|
-
recommendations.push("TE is high → topology is not well-preserved, try larger grid");
|
|
1422
|
-
if (ev !== null && ev !== undefined && ev < 0.7)
|
|
1423
|
-
recommendations.push("Explained variance < 70% → consider more training or feature selection");
|
|
1424
|
-
if (sil !== null && sil !== undefined && sil < 0.1)
|
|
1425
|
-
recommendations.push("Low silhouette → clusters overlap, try sigma_f=0.5 or more epochs");
|
|
1426
|
-
if (trust !== null && trust !== undefined && trust < 0.85)
|
|
1427
|
-
recommendations.push("Trustworthiness < 85% → local neighborhood structure is distorted");
|
|
1428
|
-
if (recommendations.length === 0)
|
|
1429
|
-
recommendations.push("All metrics look healthy — good map quality!");
|
|
1430
|
-
const epochs = train.epochs;
|
|
1431
|
-
const epochStr = epochs
|
|
1432
|
-
? epochs[1] === 0 ? `${epochs[0]} ordering only` : `${epochs[0]}+${epochs[1]}`
|
|
1433
|
-
: "—";
|
|
1434
|
-
const qrLines = [
|
|
1435
|
-
`Quality Report — Job ${job_id}`,
|
|
1436
|
-
`Grid: ${qrGrid[0]}×${qrGrid[1]} | Model: ${qrData.model ?? "SOM"} | Samples: ${qrData.n_samples ?? "?"}`,
|
|
1437
|
-
`Epochs: ${epochStr} | Duration: ${train.duration_seconds ? `${train.duration_seconds}s` : "—"}`,
|
|
1438
|
-
``,
|
|
1439
|
-
`Standard Metrics:`,
|
|
1440
|
-
` Quantization Error: ${fmt(std.quantization_error)} (lower is better)`,
|
|
1441
|
-
` Topographic Error: ${fmt(std.topographic_error)} (lower is better)`,
|
|
1442
|
-
` Distortion: ${fmt(std.distortion)}`,
|
|
1443
|
-
` Kaski-Lagus Error: ${fmt(std.kaski_lagus_error)} (lower is better)`,
|
|
1444
|
-
` Explained Variance: ${fmtPct(std.explained_variance)}`,
|
|
1445
|
-
``,
|
|
1446
|
-
`Cluster Quality Metrics:`,
|
|
1447
|
-
` Silhouette Score: ${fmt(clust.silhouette)} (higher is better, -1 to +1)`,
|
|
1448
|
-
` Davies-Bouldin: ${fmt(clust.davies_bouldin)} (lower is better)`,
|
|
1449
|
-
` Calinski-Harabasz: ${fmt(clust.calinski_harabasz)} (higher is better)`,
|
|
1450
|
-
``,
|
|
1451
|
-
`Topology Metrics:`,
|
|
1452
|
-
` Neighborhood Preservation: ${fmtPct(topo.neighborhood_preservation)} (higher is better)`,
|
|
1453
|
-
` Trustworthiness: ${fmtPct(topo.trustworthiness)} (higher is better)`,
|
|
1454
|
-
` Topographic Product: ${fmt(topo.topographic_product)} (near 0 is ideal)`,
|
|
1455
|
-
``,
|
|
1456
|
-
`Recommendations:`,
|
|
1457
|
-
...recommendations.map((r) => ` • ${r}`),
|
|
1458
|
-
];
|
|
1459
|
-
content.push({ type: "text", text: qrLines.join("\n") });
|
|
1460
|
-
}
|
|
1461
|
-
return { content };
|
|
1462
|
-
});
|
|
1463
|
-
// ---- compare_runs ----
|
|
1464
|
-
server.tool("compare_runs", `Compare metrics across multiple completed SOM training jobs.
|
|
1465
|
-
Returns a table of QE, TE, silhouette, and other metrics for each job.
|
|
1466
|
-
|
|
1467
|
-
Use to evaluate hyperparameter choices: grid size, epochs, sigma_f, model type, feature selection.
|
|
1468
|
-
|
|
1469
|
-
After comparing, ask the user:
|
|
1470
|
-
"Which job produced the best metrics for your goal?"
|
|
1471
|
-
- For visualization clarity: prioritize low topographic error (<0.1)
|
|
1472
|
-
- For tight clusters: prioritize low QE and high silhouette
|
|
1473
|
-
- For dimensionality reduction: prioritize high explained variance (>0.8)`, {
|
|
1474
|
-
job_ids: z
|
|
1475
|
-
.array(z.string())
|
|
1476
|
-
.min(2)
|
|
1477
|
-
.describe("Array of job IDs to compare (minimum 2)"),
|
|
1478
|
-
}, async ({ job_ids }) => {
|
|
1479
|
-
const ids = job_ids.join(",");
|
|
1480
|
-
const data = (await apiCall("GET", `/v1/jobs/compare?ids=${ids}`));
|
|
1481
|
-
const comparisons = (data.comparisons ?? []);
|
|
1482
|
-
const lines = [
|
|
1483
|
-
"| Job ID | Grid | Epochs | Model | QE | TE | Expl.Var | Silhouette |",
|
|
1484
|
-
"|--------|------|--------|-------|----|----|----------|------------|",
|
|
1485
|
-
];
|
|
1486
|
-
for (const c of comparisons) {
|
|
1487
|
-
if (c.error) {
|
|
1488
|
-
lines.push(`| ${c.job_id.slice(0, 8)}... | — | — | — | ${c.error} | — | — | — |`);
|
|
1489
|
-
continue;
|
|
1490
|
-
}
|
|
1491
|
-
const g = c.grid;
|
|
1492
|
-
const gridStr = g ? `${g[0]}×${g[1]}` : "—";
|
|
1493
|
-
const ep = c.epochs;
|
|
1494
|
-
const epStr = ep
|
|
1495
|
-
? ep[1] === 0
|
|
1496
|
-
? `${ep[0]}+0`
|
|
1497
|
-
: `${ep[0]}+${ep[1]}`
|
|
1498
|
-
: "—";
|
|
1499
|
-
const model = c.model ?? "—";
|
|
1500
|
-
const fmt = (v) => v !== null && v !== undefined ? Number(v).toFixed(4) : "—";
|
|
1501
|
-
lines.push(`| ${c.job_id.slice(0, 8)}... | ${gridStr} | ${epStr} | ${model} | ${fmt(c.quantization_error)} | ${fmt(c.topographic_error)} | ${fmt(c.explained_variance)} | ${fmt(c.silhouette)} |`);
|
|
1502
|
-
}
|
|
1503
|
-
return {
|
|
1504
|
-
content: [{ type: "text", text: lines.join("\n") }],
|
|
1505
|
-
};
|
|
1506
|
-
});
|
|
1507
|
-
// ---- manage_job ----
|
|
1508
|
-
server.tool("manage_job", `Cancel or delete a job.
|
|
1509
|
-
|
|
1510
|
-
action=cancel: Cancel a pending or running job. Not instant — worker checks between phases (expect up to 30s). Use when run is too slow, wrong params, or to free the worker. Partial results discarded.
|
|
1511
|
-
action=delete: Permanently delete a job and all S3 result files. Use to free storage, remove test runs, or clean up after cancel. WARNING: Job ID will no longer work with get_results or other tools.`, {
|
|
1512
|
-
job_id: z.string().describe("Job ID to cancel or delete"),
|
|
1513
|
-
action: z
|
|
1514
|
-
.enum(["cancel", "delete"])
|
|
1515
|
-
.describe("cancel: stop the job; delete: remove job and all result files"),
|
|
1516
|
-
}, async ({ job_id, action }) => {
|
|
1517
|
-
if (action === "cancel") {
|
|
1518
|
-
const data = await apiCall("POST", `/v1/jobs/${job_id}/cancel`);
|
|
1519
|
-
return textResult(data);
|
|
1520
|
-
}
|
|
1521
|
-
const data = await apiCall("DELETE", `/v1/jobs/${job_id}`);
|
|
1522
|
-
return textResult(data);
|
|
1523
|
-
});
|
|
1524
|
-
// ---- list ----
|
|
1525
|
-
server.tool("list", `List datasets or jobs.
|
|
1526
|
-
|
|
1527
|
-
type=datasets: List all datasets uploaded by the organization. Use to check what data is available before train_som or to find dataset IDs.
|
|
1528
|
-
type=jobs: List SOM training jobs (optionally filtered by dataset_id). Use to find job IDs for compare_runs, check completed vs pending, or review hyperparameters.`, {
|
|
1529
|
-
type: z
|
|
1530
|
-
.enum(["datasets", "jobs"])
|
|
1531
|
-
.describe("What to list: datasets or jobs"),
|
|
1532
|
-
dataset_id: z
|
|
1533
|
-
.string()
|
|
1534
|
-
.optional()
|
|
1535
|
-
.describe("Filter jobs by dataset ID (only used when type=jobs)"),
|
|
1536
|
-
}, async ({ type, dataset_id }) => {
|
|
1537
|
-
if (type === "datasets") {
|
|
1538
|
-
const data = await apiCall("GET", "/v1/datasets");
|
|
1539
|
-
return textResult(data);
|
|
1540
|
-
}
|
|
1541
|
-
const path = dataset_id
|
|
1542
|
-
? `/v1/jobs?dataset_id=${dataset_id}`
|
|
1543
|
-
: "/v1/jobs";
|
|
1544
|
-
const data = (await apiCall("GET", path));
|
|
1545
|
-
if (type === "jobs" && Array.isArray(data)) {
|
|
1546
|
-
const lines = data.map((job) => {
|
|
1547
|
-
const id = String(job.id ?? "");
|
|
1548
|
-
const status = String(job.status ?? "");
|
|
1549
|
-
const label = job.label != null && job.label !== "" ? String(job.label) : null;
|
|
1550
|
-
return label
|
|
1551
|
-
? `${label} (id: ${id}) — status: ${status}`
|
|
1552
|
-
: `id: ${id} — status: ${status}`;
|
|
1553
|
-
});
|
|
1554
|
-
const text = lines.length > 0 ? lines.join("\n") : "No jobs found.";
|
|
1555
|
-
return { content: [{ type: "text", text }] };
|
|
1556
|
-
}
|
|
1557
|
-
return textResult(data);
|
|
1558
|
-
});
|
|
1559
|
-
// ---- get_job_export ----
|
|
1560
|
-
server.tool("get_job_export", `Export structured data from a completed SOM training job.
|
|
1561
|
-
|
|
1562
|
-
export=training_log: Learning curve and diagnostics (per-epoch QE, sparklines, inline plot). Use to diagnose convergence, plateau, or divergence.
|
|
1563
|
-
export=weights: Raw weight matrix with node_coords, normalized/denormalized values, normalization stats. Use for external analysis or custom visualizations. Can be large (e.g. 600KB+ for 30×30×12).
|
|
1564
|
-
export=nodes: Per-node statistics (hit count, feature mean/std). Use to profile clusters and characterize operating modes.`, {
|
|
1565
|
-
job_id: z.string().describe("Job ID of a completed training job"),
|
|
1566
|
-
export: z
|
|
1567
|
-
.enum(["training_log", "weights", "nodes"])
|
|
1568
|
-
.describe("What to export: training_log, weights, or nodes"),
|
|
1569
|
-
}, async ({ job_id, export: exportType }) => {
|
|
1570
|
-
if (exportType === "training_log") {
|
|
1571
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}/training-log`));
|
|
1572
|
-
const ordErrors = data.ordering_errors ?? [];
|
|
1573
|
-
const convErrors = data.convergence_errors ?? [];
|
|
1574
|
-
const duration = data.training_duration_seconds;
|
|
1575
|
-
const epochs = data.epochs;
|
|
1576
|
-
const sparkline = (arr) => {
|
|
1577
|
-
if (arr.length === 0)
|
|
1578
|
-
return "(no data)";
|
|
1579
|
-
const blocks = "▁▂▃▄▅▆▇█";
|
|
1580
|
-
const min = Math.min(...arr);
|
|
1581
|
-
const max = Math.max(...arr);
|
|
1582
|
-
const range = max - min || 1;
|
|
1583
|
-
return arr
|
|
1584
|
-
.map((v) => blocks[Math.min(7, Math.floor(((v - min) / range) * 7))])
|
|
1585
|
-
.join("");
|
|
1586
|
-
};
|
|
1587
|
-
const lines = [
|
|
1588
|
-
`Training Log — Job ${job_id}`,
|
|
1589
|
-
`Grid: ${JSON.stringify(data.grid)} | Model: ${data.model ?? "SOM"}`,
|
|
1590
|
-
`Epochs: ${epochs ? `[${epochs[0]} ordering, ${epochs[1]} convergence]` : "N/A"}`,
|
|
1591
|
-
`Duration: ${duration !== null && duration !== undefined ? `${duration}s` : "N/A"}`,
|
|
1592
|
-
`Features: ${data.n_features ?? "?"} | Samples: ${data.n_samples ?? "?"}`,
|
|
1593
|
-
``,
|
|
1594
|
-
`Ordering Phase (${ordErrors.length} epochs):`,
|
|
1595
|
-
` Start QE: ${ordErrors[0]?.toFixed(4) ?? "—"} → End QE: ${ordErrors.at(-1)?.toFixed(4) ?? "—"}`,
|
|
1596
|
-
` Curve: ${sparkline(ordErrors)}`,
|
|
1597
|
-
];
|
|
1598
|
-
if (convErrors.length > 0) {
|
|
1599
|
-
lines.push(``, `Convergence Phase (${convErrors.length} epochs):`, ` Start QE: ${convErrors[0]?.toFixed(4) ?? "—"} → End QE: ${convErrors.at(-1)?.toFixed(4) ?? "—"}`, ` Curve: ${sparkline(convErrors)}`);
|
|
1600
|
-
}
|
|
1601
|
-
else if ((epochs?.[1] ?? 0) === 0) {
|
|
1602
|
-
lines.push(``, `Convergence phase: skipped (epochs[1]=0)`);
|
|
1603
|
-
}
|
|
1604
|
-
const finalQe = data.quantization_error;
|
|
1605
|
-
const finalEv = data.explained_variance;
|
|
1606
|
-
if (finalQe !== null && finalQe !== undefined) {
|
|
1607
|
-
lines.push(``, `Final QE: ${finalQe.toFixed(4)} | Explained Variance: ${(finalEv ?? 0).toFixed(4)}`);
|
|
1608
|
-
}
|
|
1609
|
-
const content = [
|
|
1610
|
-
{ type: "text", text: lines.join("\n") },
|
|
1611
|
-
];
|
|
1612
|
-
let attached = false;
|
|
1613
|
-
for (const lcExt of ["png", "pdf", "svg"]) {
|
|
1614
|
-
try {
|
|
1615
|
-
const { data: lcBuf } = await apiRawCall(`/v1/results/${job_id}/image/learning_curve.${lcExt}`);
|
|
1616
|
-
content.push({
|
|
1617
|
-
type: "image",
|
|
1618
|
-
data: lcBuf.toString("base64"),
|
|
1619
|
-
mimeType: mimeForFilename(`learning_curve.${lcExt}`),
|
|
1620
|
-
annotations: { audience: ["user"], priority: 0.8 },
|
|
1621
|
-
});
|
|
1622
|
-
attached = true;
|
|
1623
|
-
break;
|
|
1624
|
-
}
|
|
1625
|
-
catch {
|
|
1626
|
-
continue;
|
|
1627
|
-
}
|
|
1628
|
-
}
|
|
1629
|
-
if (!attached) {
|
|
1630
|
-
content.push({ type: "text", text: "(learning curve plot not available)" });
|
|
1631
|
-
}
|
|
1632
|
-
return { content };
|
|
1633
|
-
}
|
|
1634
|
-
if (exportType === "weights") {
|
|
1635
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}/weights`));
|
|
1636
|
-
const features = data.features ?? [];
|
|
1637
|
-
const nNodes = data.n_nodes ?? 0;
|
|
1638
|
-
const grid = data.grid ?? [0, 0];
|
|
1639
|
-
const lines = [
|
|
1640
|
-
`SOM Weights — Job ${job_id}`,
|
|
1641
|
-
`Grid: ${grid[0]}×${grid[1]} | Nodes: ${nNodes} | Features: ${features.length}`,
|
|
1642
|
-
`node_coords: [x,y] per node for topology`,
|
|
1643
|
-
`Features: ${features.join(", ")}`,
|
|
1644
|
-
``,
|
|
1645
|
-
`Normalization Stats:`,
|
|
1646
|
-
];
|
|
1647
|
-
const normStats = data.normalization_stats ?? {};
|
|
1648
|
-
for (const [feat, s] of Object.entries(normStats)) {
|
|
1649
|
-
lines.push(` ${feat}: mean=${s.mean?.toFixed(4)}, std=${s.std?.toFixed(4)}`);
|
|
1650
|
-
}
|
|
1651
|
-
lines.push(``, `Full weight matrix available in the response JSON (includes node_coords).`, `Use the denormalized_weights array for original-scale values.`);
|
|
1652
|
-
return {
|
|
1653
|
-
content: [
|
|
1654
|
-
{ type: "text", text: lines.join("\n") },
|
|
1655
|
-
{ type: "text", text: JSON.stringify(data, null, 2) },
|
|
1656
|
-
],
|
|
1657
|
-
};
|
|
1658
|
-
}
|
|
1659
|
-
// exportType === "nodes"
|
|
1660
|
-
const data = (await apiCall("GET", `/v1/results/${job_id}/nodes`));
|
|
1661
|
-
const topNodes = [...data]
|
|
1662
|
-
.sort((a, b) => (b.hit_count ?? 0) - (a.hit_count ?? 0))
|
|
1663
|
-
.slice(0, 10);
|
|
1664
|
-
const emptyNodes = data.filter((n) => n.hit_count === 0).length;
|
|
1665
|
-
const totalHits = data.reduce((sum, n) => sum + (n.hit_count ?? 0), 0);
|
|
1666
|
-
const lines = [
|
|
1667
|
-
`Node Statistics — Job ${job_id}`,
|
|
1668
|
-
`Total nodes: ${data.length} | Active: ${data.length - emptyNodes} | Empty: ${emptyNodes}`,
|
|
1669
|
-
`Total hits: ${totalHits}`,
|
|
1670
|
-
``,
|
|
1671
|
-
`Top 10 Most Populated Nodes:`,
|
|
1672
|
-
`| Node | Coords | Hits | Hit% |`,
|
|
1673
|
-
`|------|--------|------|------|`,
|
|
1674
|
-
];
|
|
1675
|
-
for (const n of topNodes) {
|
|
1676
|
-
if (n.hit_count === 0)
|
|
1677
|
-
break;
|
|
1678
|
-
const coords = n.coords;
|
|
1679
|
-
const pct = ((n.hit_count / totalHits) * 100).toFixed(1);
|
|
1680
|
-
lines.push(`| ${n.node_index} | (${coords?.[0]?.toFixed(1)}, ${coords?.[1]?.toFixed(1)}) | ${n.hit_count} | ${pct}% |`);
|
|
1681
|
-
}
|
|
1682
|
-
return {
|
|
1683
|
-
content: [
|
|
1684
|
-
{ type: "text", text: lines.join("\n") },
|
|
1685
|
-
{
|
|
1686
|
-
type: "text",
|
|
1687
|
-
text: `\nFull node statistics JSON:\n${JSON.stringify(data, null, 2)}`,
|
|
1688
|
-
},
|
|
1689
|
-
],
|
|
1690
|
-
};
|
|
1691
|
-
});
|
|
1692
|
-
// ---- project_variable ----
|
|
1693
|
-
server.tool("project_variable", `Project a pre-computed variable onto a trained SOM without retraining.
|
|
1694
|
-
|
|
1695
|
-
BEST FOR: Mapping external metrics (revenue, labels, anomaly scores) onto the
|
|
1696
|
-
trained SOM structure. For formula-based variables from the training dataset,
|
|
1697
|
-
prefer derive_variable with project_onto_job; use project_variable only for
|
|
1698
|
-
externally computed arrays.
|
|
1699
|
-
NOT FOR: Re-training or adding features to the map.
|
|
1700
|
-
|
|
1701
|
-
TIMING: ~5–15s (loads cached SOM, computes per-node aggregation, renders plot).
|
|
1702
|
-
|
|
1703
|
-
values: array of length n_samples (~11 bytes/sample). Must match training sample
|
|
1704
|
-
count exactly (same CSV row order). Aggregation controls how multiple samples
|
|
1705
|
-
per node are combined (mean/median/sum/max/count).
|
|
1706
|
-
|
|
1707
|
-
BEFORE calling, ask:
|
|
1708
|
-
- "What variable? Is it from the original data or externally computed?"
|
|
1709
|
-
- "How to aggregate per node: mean (typical), sum (totals), max (peaks)?"
|
|
1710
|
-
|
|
1711
|
-
COMMON MISTAKES:
|
|
1712
|
-
- Wrong number of values (must match n_samples from training)
|
|
1713
|
-
- Using mean aggregation for count data (use sum instead)
|
|
1714
|
-
- Not trying derive_variable first when the variable can be computed from columns
|
|
1715
|
-
|
|
1716
|
-
HINT: If values length mismatch, suggest derive_variable for formula-based variables.`, {
|
|
1717
|
-
job_id: z.string().describe("ID of the completed SOM training job"),
|
|
1718
|
-
variable_name: z.string().describe("Name for this variable (used in visualization labels)"),
|
|
1719
|
-
values: z
|
|
1720
|
-
.array(z.number())
|
|
1721
|
-
.describe("Array of values to project — one per training sample, in original CSV row order"),
|
|
1722
|
-
aggregation: z
|
|
1723
|
-
.enum(["mean", "median", "sum", "min", "max", "std", "count"])
|
|
1724
|
-
.optional()
|
|
1725
|
-
.default("mean")
|
|
1726
|
-
.describe("How to aggregate values for nodes with multiple samples"),
|
|
1727
|
-
output_format: z
|
|
1728
|
-
.enum(["png", "pdf", "svg"])
|
|
1729
|
-
.optional()
|
|
1730
|
-
.default("pdf")
|
|
1731
|
-
.describe("Image output format for the projection plot (default: pdf)."),
|
|
1732
|
-
output_dpi: z
|
|
1733
|
-
.enum(["standard", "retina", "print"])
|
|
1734
|
-
.optional()
|
|
1735
|
-
.default("retina")
|
|
1736
|
-
.describe("Resolution: standard (1x), retina (2x), print (4x)."),
|
|
1737
|
-
colormap: z
|
|
1738
|
-
.string()
|
|
1739
|
-
.optional()
|
|
1740
|
-
.describe("Override colormap for the projection plot (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
|
|
1741
|
-
}, async ({ job_id, variable_name, values, aggregation, output_format, output_dpi, colormap }) => {
|
|
1742
|
-
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
1743
|
-
const body = {
|
|
1744
|
-
variable_name,
|
|
1745
|
-
values,
|
|
1746
|
-
aggregation: aggregation ?? "mean",
|
|
1747
|
-
output_format: output_format ?? "pdf",
|
|
1748
|
-
};
|
|
1749
|
-
if (output_dpi && output_dpi !== "retina")
|
|
1750
|
-
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
1751
|
-
if (colormap)
|
|
1752
|
-
body.colormap = colormap;
|
|
1753
|
-
const data = (await apiCall("POST", `/v1/results/${job_id}/project`, body));
|
|
1754
|
-
const projJobId = data.id;
|
|
1755
|
-
const poll = await pollUntilComplete(projJobId);
|
|
1756
|
-
if (poll.status === "completed") {
|
|
1757
|
-
const results = (await apiCall("GET", `/v1/results/${projJobId}`));
|
|
1758
|
-
const summary = (results.summary ?? {});
|
|
1759
|
-
const stats = (summary.variable_stats ?? {});
|
|
1760
|
-
const content = [];
|
|
1761
|
-
content.push({
|
|
1762
|
-
type: "text",
|
|
1763
|
-
text: [
|
|
1764
|
-
`Projected Variable: ${variable_name} (${aggregation ?? "mean"}) — job: ${projJobId}`,
|
|
1765
|
-
`Parent SOM: ${job_id} | Samples: ${summary.n_samples ?? 0}`,
|
|
1766
|
-
``,
|
|
1767
|
-
`Variable Statistics (per-node ${aggregation ?? "mean"}):`,
|
|
1768
|
-
` Min: ${stats.min !== undefined ? Number(stats.min).toFixed(3) : "N/A"}`,
|
|
1769
|
-
` Max: ${stats.max !== undefined ? Number(stats.max).toFixed(3) : "N/A"}`,
|
|
1770
|
-
` Mean: ${stats.mean !== undefined ? Number(stats.mean).toFixed(3) : "N/A"}`,
|
|
1771
|
-
` Nodes with data: ${stats.n_nodes_with_data ?? "N/A"}`,
|
|
1772
|
-
].join("\n"),
|
|
1773
|
-
});
|
|
1774
|
-
const safeName = variable_name.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
1775
|
-
const imgExt = summary.output_format ?? output_format ?? "pdf";
|
|
1776
|
-
await tryAttachImage(content, projJobId, `projected_${safeName}.${imgExt}`);
|
|
1777
|
-
return { content };
|
|
1778
|
-
}
|
|
1779
|
-
if (poll.status === "failed") {
|
|
1780
|
-
return {
|
|
1781
|
-
content: [{
|
|
1782
|
-
type: "text",
|
|
1783
|
-
text: `Projection job ${projJobId} failed: ${poll.error ?? "unknown error"}`,
|
|
1784
|
-
}],
|
|
1785
|
-
};
|
|
1786
|
-
}
|
|
1787
|
-
return {
|
|
1788
|
-
content: [{
|
|
1789
|
-
type: "text",
|
|
1790
|
-
text: [
|
|
1791
|
-
`Variable projection job submitted but did not complete within 30s.`,
|
|
1792
|
-
`Projection job ID: ${projJobId}`,
|
|
1793
|
-
``,
|
|
1794
|
-
`Poll with: get_job_status(job_id="${projJobId}")`,
|
|
1795
|
-
`Retrieve with: get_results(job_id="${projJobId}")`,
|
|
1796
|
-
].join("\n"),
|
|
1797
|
-
}],
|
|
1798
|
-
};
|
|
1799
|
-
});
|
|
1800
|
-
// ---- transition_flow ----
|
|
1801
|
-
server.tool("transition_flow", `Compute temporal transition flow for a trained SOM.
|
|
1802
|
-
|
|
1803
|
-
Shows how data points move between SOM nodes over time — revealing directional
|
|
1804
|
-
patterns, cycles, and state machine behavior in sequential data.
|
|
1805
|
-
|
|
1806
|
-
**Requires time-ordered data.** Each row must represent a consecutive observation;
|
|
1807
|
-
the transition from row i to row i+lag is counted. If rows are not time-ordered,
|
|
1808
|
-
results will be meaningless.
|
|
1809
|
-
|
|
1810
|
-
Best used for:
|
|
1811
|
-
- **Time-series dynamics**: how does the system state evolve step-by-step?
|
|
1812
|
-
- **Cyclic processes**: daily/weekly patterns, recurring operating modes
|
|
1813
|
-
- **Process monitoring**: identify common transition paths and bottlenecks
|
|
1814
|
-
- **Regime detection**: find absorbing states (nodes with self-loops) vs transient hubs
|
|
1815
|
-
|
|
1816
|
-
**lag** controls the temporal horizon:
|
|
1817
|
-
- lag=1 (default): immediate next-step transitions — "where does the system go next?"
|
|
1818
|
-
- lag=N: transitions N steps apart — useful for periodic analysis (e.g. lag=24 for daily cycles in hourly data)
|
|
1819
|
-
- Try multiple lags to reveal different temporal scales.
|
|
1820
|
-
|
|
1821
|
-
**min_transitions** filters noisy arrows — only transitions observed at least this many times are drawn. Increase for cleaner plots on large datasets.
|
|
1822
|
-
|
|
1823
|
-
**top_k** controls how many top-flow nodes are reported in statistics.
|
|
1824
|
-
|
|
1825
|
-
BEFORE calling, confirm:
|
|
1826
|
-
- Data is time-ordered (chronological row sequence)
|
|
1827
|
-
- The lag makes sense for the time resolution (e.g. lag=1 for hourly data = 1 hour ahead)
|
|
1828
|
-
|
|
1829
|
-
After showing results, discuss:
|
|
1830
|
-
- Arrow direction and clustering patterns
|
|
1831
|
-
- Hub nodes (many transitions through them) vs absorbing nodes (self-loops)
|
|
1832
|
-
- Whether cyclic flow matches known periodic behavior`, {
|
|
1833
|
-
job_id: z.string().describe("ID of the completed SOM training job"),
|
|
1834
|
-
lag: z
|
|
1835
|
-
.number()
|
|
1836
|
-
.int()
|
|
1837
|
-
.min(1)
|
|
1838
|
-
.optional()
|
|
1839
|
-
.default(1)
|
|
1840
|
-
.describe("Step lag for transition pairs (default 1 = consecutive rows). Use larger values for periodic analysis (e.g. 24 for daily cycles in hourly data)."),
|
|
1841
|
-
min_transitions: z
|
|
1842
|
-
.number()
|
|
1843
|
-
.int()
|
|
1844
|
-
.min(1)
|
|
1845
|
-
.optional()
|
|
1846
|
-
.describe("Minimum transition count to draw an arrow (default: auto). Increase to filter noise on large datasets."),
|
|
1847
|
-
top_k: z
|
|
1848
|
-
.number()
|
|
1849
|
-
.int()
|
|
1850
|
-
.min(1)
|
|
1851
|
-
.optional()
|
|
1852
|
-
.default(10)
|
|
1853
|
-
.describe("Number of top-flow nodes to include in statistics (default 10)."),
|
|
1854
|
-
colormap: z
|
|
1855
|
-
.string()
|
|
1856
|
-
.optional()
|
|
1857
|
-
.describe("Colormap for the U-matrix background (default: grays). Try viridis, plasma, or inferno for more contrast."),
|
|
1858
|
-
output_format: z
|
|
1859
|
-
.enum(["png", "pdf", "svg"])
|
|
1860
|
-
.optional()
|
|
1861
|
-
.default("pdf")
|
|
1862
|
-
.describe("Image output format for the flow plot (default: pdf)."),
|
|
1863
|
-
output_dpi: z
|
|
1864
|
-
.enum(["standard", "retina", "print"])
|
|
1865
|
-
.optional()
|
|
1866
|
-
.default("retina")
|
|
1867
|
-
.describe("Resolution: standard (1x), retina (2x), print (4x)."),
|
|
1868
|
-
}, async ({ job_id, lag, min_transitions, top_k, colormap, output_format, output_dpi }) => {
|
|
1869
|
-
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
1870
|
-
const body = { lag: lag ?? 1, output_format: output_format ?? "pdf" };
|
|
1871
|
-
if (min_transitions !== undefined)
|
|
1872
|
-
body.min_transitions = min_transitions;
|
|
1873
|
-
if (top_k !== undefined)
|
|
1874
|
-
body.top_k = top_k;
|
|
1875
|
-
if (colormap !== undefined)
|
|
1876
|
-
body.colormap = colormap;
|
|
1877
|
-
if (output_dpi && output_dpi !== "retina")
|
|
1878
|
-
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
1879
|
-
const data = (await apiCall("POST", `/v1/results/${job_id}/transition-flow`, body));
|
|
1880
|
-
const flowJobId = data.id;
|
|
1881
|
-
const poll = await pollUntilComplete(flowJobId);
|
|
1882
|
-
if (poll.status === "completed") {
|
|
1883
|
-
const results = (await apiCall("GET", `/v1/results/${flowJobId}`));
|
|
1884
|
-
const summary = (results.summary ?? {});
|
|
1885
|
-
const stats = (summary.flow_stats ?? {});
|
|
1886
|
-
const content = [];
|
|
1887
|
-
content.push({
|
|
1888
|
-
type: "text",
|
|
1889
|
-
text: [
|
|
1890
|
-
`Transition Flow Results (job: ${flowJobId})`,
|
|
1891
|
-
`Parent SOM: ${job_id} | Lag: ${lag ?? 1} | Samples: ${summary.n_samples ?? 0}`,
|
|
1892
|
-
``,
|
|
1893
|
-
`Flow Statistics:`,
|
|
1894
|
-
` Active flow nodes: ${stats.active_flow_nodes ?? "N/A"}`,
|
|
1895
|
-
` Total transitions: ${stats.total_transitions ?? "N/A"}`,
|
|
1896
|
-
` Mean magnitude: ${stats.mean_magnitude !== undefined ? Number(stats.mean_magnitude).toFixed(4) : "N/A"}`,
|
|
1897
|
-
].join("\n"),
|
|
1898
|
-
});
|
|
1899
|
-
const imgExt = output_format ?? "pdf";
|
|
1900
|
-
await tryAttachImage(content, flowJobId, `transition_flow_lag${lag ?? 1}.${imgExt}`);
|
|
1901
|
-
return { content };
|
|
1902
|
-
}
|
|
1903
|
-
if (poll.status === "failed") {
|
|
1904
|
-
return {
|
|
1905
|
-
content: [{
|
|
1906
|
-
type: "text",
|
|
1907
|
-
text: `Transition flow job ${flowJobId} failed: ${poll.error ?? "unknown error"}`,
|
|
1908
|
-
}],
|
|
1909
|
-
};
|
|
1910
|
-
}
|
|
1911
|
-
return {
|
|
1912
|
-
content: [{
|
|
1913
|
-
type: "text",
|
|
1914
|
-
text: [
|
|
1915
|
-
`Transition flow job submitted but did not complete within 30s.`,
|
|
1916
|
-
`Flow job ID: ${flowJobId}`,
|
|
1917
|
-
`Parent job: ${job_id} | Lag: ${lag ?? 1}`,
|
|
1918
|
-
``,
|
|
1919
|
-
`Poll with: get_job_status(job_id="${flowJobId}")`,
|
|
1920
|
-
`Retrieve with: get_results(job_id="${flowJobId}")`,
|
|
1921
|
-
].join("\n"),
|
|
1922
|
-
}],
|
|
1923
|
-
};
|
|
1924
|
-
});
|
|
1925
|
-
// ---- derive_variable ----
|
|
1926
|
-
server.tool("derive_variable", `Create a derived variable from existing dataset columns using mathematical expressions.
|
|
1927
|
-
|
|
1928
|
-
BEST FOR: Computing ratios, differences, log transforms, rolling statistics,
|
|
1929
|
-
or any combination of existing columns — either to enrich a dataset before
|
|
1930
|
-
training or to project a computed variable onto an existing SOM.
|
|
1931
|
-
|
|
1932
|
-
TWO MODES:
|
|
1933
|
-
1. Add to dataset (default): computes the new column and appends it to the dataset CSV.
|
|
1934
|
-
The column is then available for future train_som calls via the 'columns' parameter.
|
|
1935
|
-
2. Project onto SOM: computes the column from the training dataset and projects it
|
|
1936
|
-
onto a trained SOM, returning the visualization. Use this to explore how
|
|
1937
|
-
derived quantities distribute across the learned map structure.
|
|
1938
|
-
|
|
1939
|
-
COMMON FORMULAS:
|
|
1940
|
-
- Ratio: "revenue / cost"
|
|
1941
|
-
- Difference: "US10Y - US3M"
|
|
1942
|
-
- Log return: "log(close) - log(open)"
|
|
1943
|
-
- Z-score: "(volume - rolling_mean(volume, 20)) / rolling_std(volume, 20)"
|
|
1944
|
-
- Magnitude: "sqrt(x^2 + y^2)"
|
|
1945
|
-
- Unit convert: "temperature - 273.15"
|
|
1946
|
-
- First diff: "diff(consumption)"
|
|
1947
|
-
|
|
1948
|
-
SUPPORTED FUNCTIONS:
|
|
1949
|
-
- Arithmetic: +, -, *, /, ^
|
|
1950
|
-
- Math: log, log1p, log10, exp, sqrt, abs, sign, clamp, min, max
|
|
1951
|
-
- Trig: sin, cos, tan, asin, acos, atan
|
|
1952
|
-
- Rolling: rolling_mean(col, window), rolling_std(col, window), rolling_min, rolling_max
|
|
1953
|
-
- Temporal: diff(col), diff(col, n)
|
|
1954
|
-
- Constants: pi, numeric literals
|
|
1955
|
-
|
|
1956
|
-
WORKFLOW: Ask the user what domain-specific variables they care about.
|
|
1957
|
-
Suggest derived variables based on the column names. For example, if
|
|
1958
|
-
the dataset has "revenue" and "cost", suggest "revenue - cost" as profit
|
|
1959
|
-
and "revenue / cost" as cost efficiency.
|
|
1960
|
-
|
|
1961
|
-
EXPRESSION REFERENCE: In expressions, use underscore-normalized column names (e.g. fixed_acidity not "fixed acidity"). Column names with spaces/special chars are converted to underscores — use that form. Operators: +, -, *, /, ^. Functions: log, sqrt, rolling_mean(col, window), diff(col), etc.
|
|
1962
|
-
|
|
1963
|
-
COMMON MISTAKES:
|
|
1964
|
-
- Division by zero: if denominator column has zeros, use options.missing="skip"
|
|
1965
|
-
- Rolling functions produce NaN for the first (window-1) rows
|
|
1966
|
-
- diff() produces NaN for the first row
|
|
1967
|
-
- Spaces in column names: use underscores (e.g. fixed_acidity not "fixed acidity")`, {
|
|
1968
|
-
dataset_id: z.string().describe("Dataset ID (source of column data)"),
|
|
1969
|
-
name: z.string().describe("Name for the derived variable (used in column header and visualization)"),
|
|
1970
|
-
expression: z
|
|
1971
|
-
.string()
|
|
1972
|
-
.describe("Mathematical expression referencing column names. " +
|
|
1973
|
-
"Examples: 'revenue / cost', 'log(price)', 'diff(temperature)', " +
|
|
1974
|
-
"'sqrt(x^2 + y^2)', 'rolling_mean(volume, 20)'"),
|
|
1975
|
-
project_onto_job: z
|
|
1976
|
-
.string()
|
|
1977
|
-
.optional()
|
|
1978
|
-
.describe("If provided, project the derived variable onto this SOM job instead of adding to dataset. " +
|
|
1979
|
-
"The job must be a completed train_som job."),
|
|
1980
|
-
aggregation: z
|
|
1981
|
-
.enum(["mean", "median", "sum", "min", "max", "std", "count"])
|
|
1982
|
-
.optional()
|
|
1983
|
-
.default("mean")
|
|
1984
|
-
.describe("How to aggregate values per SOM node (only used when project_onto_job is set)"),
|
|
1985
|
-
options: z
|
|
1986
|
-
.object({
|
|
1987
|
-
missing: z
|
|
1988
|
-
.enum(["skip", "zero", "interpolate"])
|
|
1989
|
-
.optional()
|
|
1990
|
-
.default("skip")
|
|
1991
|
-
.describe("How to handle NaN/missing values in the result"),
|
|
1992
|
-
window: z
|
|
1993
|
-
.number()
|
|
1994
|
-
.int()
|
|
1995
|
-
.optional()
|
|
1996
|
-
.describe("Default window size for rolling functions (default 20)"),
|
|
1997
|
-
description: z
|
|
1998
|
-
.string()
|
|
1999
|
-
.optional()
|
|
2000
|
-
.describe("Human-readable description of what this variable represents"),
|
|
2001
|
-
})
|
|
2002
|
-
.optional()
|
|
2003
|
-
.describe("Configuration for expression evaluation"),
|
|
2004
|
-
output_format: z
|
|
2005
|
-
.enum(["png", "pdf", "svg"])
|
|
2006
|
-
.optional()
|
|
2007
|
-
.default("pdf")
|
|
2008
|
-
.describe("Image format for projection visualization when project_onto_job is set (default: pdf)."),
|
|
2009
|
-
output_dpi: z
|
|
2010
|
-
.enum(["standard", "retina", "print"])
|
|
2011
|
-
.optional()
|
|
2012
|
-
.default("retina")
|
|
2013
|
-
.describe("Resolution for projection visualization"),
|
|
2014
|
-
colormap: z
|
|
2015
|
-
.string()
|
|
2016
|
-
.optional()
|
|
2017
|
-
.describe("Colormap for projection visualization (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral."),
|
|
2018
|
-
}, async ({ dataset_id, name, expression, project_onto_job, aggregation, options, output_format, output_dpi, colormap, }) => {
|
|
2019
|
-
const dpiMap = { standard: 1, retina: 2, print: 4 };
|
|
2020
|
-
if (project_onto_job) {
|
|
2021
|
-
// Mode: project onto SOM
|
|
2022
|
-
const body = {
|
|
2023
|
-
name,
|
|
2024
|
-
expression,
|
|
2025
|
-
aggregation: aggregation ?? "mean",
|
|
2026
|
-
output_format: output_format ?? "pdf",
|
|
2027
|
-
};
|
|
2028
|
-
if (options)
|
|
2029
|
-
body.options = options;
|
|
2030
|
-
if (output_dpi && output_dpi !== "retina")
|
|
2031
|
-
body.output_dpi = dpiMap[output_dpi] ?? 2;
|
|
2032
|
-
if (colormap)
|
|
2033
|
-
body.colormap = colormap;
|
|
2034
|
-
const data = (await apiCall("POST", `/v1/results/${project_onto_job}/derive`, body));
|
|
2035
|
-
const deriveJobId = data.id;
|
|
2036
|
-
const poll = await pollUntilComplete(deriveJobId);
|
|
2037
|
-
if (poll.status === "completed") {
|
|
2038
|
-
const results = (await apiCall("GET", `/v1/results/${deriveJobId}`));
|
|
2039
|
-
const summary = (results.summary ?? {});
|
|
2040
|
-
const stats = (summary.variable_stats ?? {});
|
|
2041
|
-
const content = [];
|
|
2042
|
-
content.push({
|
|
2043
|
-
type: "text",
|
|
2044
|
-
text: [
|
|
2045
|
-
`Derived Variable Projected: ${name} — job: ${deriveJobId}`,
|
|
2046
|
-
`Expression: ${expression}`,
|
|
2047
|
-
`Parent SOM: ${project_onto_job} | Aggregation: ${aggregation ?? "mean"}`,
|
|
2048
|
-
``,
|
|
2049
|
-
`Statistics (per-node ${aggregation ?? "mean"}):`,
|
|
2050
|
-
` Min: ${stats.min !== undefined ? Number(stats.min).toFixed(3) : "N/A"}`,
|
|
2051
|
-
` Max: ${stats.max !== undefined ? Number(stats.max).toFixed(3) : "N/A"}`,
|
|
2052
|
-
` Mean: ${stats.mean !== undefined ? Number(stats.mean).toFixed(3) : "N/A"}`,
|
|
2053
|
-
` Nodes with data: ${stats.n_nodes_with_data ?? "N/A"}`,
|
|
2054
|
-
summary.nan_count ? ` NaN values: ${summary.nan_count}` : "",
|
|
2055
|
-
]
|
|
2056
|
-
.filter((l) => l !== "")
|
|
2057
|
-
.join("\n"),
|
|
2058
|
-
});
|
|
2059
|
-
const safeName = name.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
2060
|
-
const imgExt = summary.output_format ?? output_format ?? "pdf";
|
|
2061
|
-
await tryAttachImage(content, deriveJobId, `projected_${safeName}.${imgExt}`);
|
|
2062
|
-
return { content };
|
|
2063
|
-
}
|
|
2064
|
-
if (poll.status === "failed") {
|
|
2065
|
-
return {
|
|
2066
|
-
content: [{
|
|
2067
|
-
type: "text",
|
|
2068
|
-
text: `Derive+project job ${deriveJobId} failed: ${poll.error ?? "unknown error"}`,
|
|
2069
|
-
}],
|
|
2070
|
-
};
|
|
2071
|
-
}
|
|
2072
|
-
return {
|
|
2073
|
-
content: [{
|
|
2074
|
-
type: "text",
|
|
2075
|
-
text: `Derive job submitted. Poll: get_job_status("${deriveJobId}")`,
|
|
2076
|
-
}],
|
|
2077
|
-
};
|
|
2078
|
-
}
|
|
2079
|
-
else {
|
|
2080
|
-
// Mode: add to dataset
|
|
2081
|
-
const body = { name, expression };
|
|
2082
|
-
if (options)
|
|
2083
|
-
body.options = options;
|
|
2084
|
-
const data = (await apiCall("POST", `/v1/datasets/${dataset_id}/derive`, body));
|
|
2085
|
-
const deriveJobId = data.id;
|
|
2086
|
-
const poll = await pollUntilComplete(deriveJobId);
|
|
2087
|
-
if (poll.status === "completed") {
|
|
2088
|
-
const results = (await apiCall("GET", `/v1/results/${deriveJobId}`));
|
|
2089
|
-
const summary = (results.summary ?? {});
|
|
2090
|
-
return {
|
|
2091
|
-
content: [{
|
|
2092
|
-
type: "text",
|
|
2093
|
-
text: [
|
|
2094
|
-
`Derived column "${name}" added to dataset ${dataset_id}`,
|
|
2095
|
-
`Expression: ${expression}`,
|
|
2096
|
-
`Rows: ${summary.n_rows ?? "?"}`,
|
|
2097
|
-
summary.nan_count ? `NaN values: ${summary.nan_count}` : "",
|
|
2098
|
-
`Min: ${summary.min ?? "?"} | Max: ${summary.max ?? "?"} | Mean: ${summary.mean ?? "?"}`,
|
|
2099
|
-
``,
|
|
2100
|
-
`The column is now available in the dataset. Include it in train_som`,
|
|
2101
|
-
`via the 'columns' parameter, or use datasets(action=preview) to verify.`,
|
|
2102
|
-
]
|
|
2103
|
-
.filter((l) => l !== "")
|
|
2104
|
-
.join("\n"),
|
|
2105
|
-
}],
|
|
2106
|
-
};
|
|
2107
|
-
}
|
|
2108
|
-
if (poll.status === "failed") {
|
|
2109
|
-
return {
|
|
2110
|
-
content: [{
|
|
2111
|
-
type: "text",
|
|
2112
|
-
text: `Derive variable job ${deriveJobId} failed: ${poll.error ?? "unknown error"}`,
|
|
2113
|
-
}],
|
|
2114
|
-
};
|
|
2115
|
-
}
|
|
2116
|
-
return {
|
|
2117
|
-
content: [{
|
|
2118
|
-
type: "text",
|
|
2119
|
-
text: `Derive job submitted. Poll: get_job_status("${deriveJobId}")`,
|
|
2120
|
-
}],
|
|
2121
|
-
};
|
|
2122
|
-
}
|
|
2123
|
-
});
|
|
2124
|
-
// ---- Compute Leases / Account Management ----
|
|
2125
|
-
server.tool("manage_account", `Manage compute leases, check account status, and view billing history.
|
|
2126
|
-
Your default backend is your local Primary Server. Use this tool to temporarily upgrade to cloud compute for heavy jobs.
|
|
2127
|
-
|
|
2128
|
-
Actions:
|
|
2129
|
-
- request_compute: Provisions a cloud burst instance (requires tier and duration_minutes). Leaves tier blank to list options.
|
|
2130
|
-
- compute_status: Checks if a burst lease is active and how much time remains.
|
|
2131
|
-
- release_compute: Manually terminates an active lease and reverts routing to the Primary Server.
|
|
2132
|
-
- compute_history: Views recent compute leases and credit spend.
|
|
2133
|
-
- add_funds: Instructions on how to add credits to your account.`, {
|
|
2134
|
-
action: {
|
|
2135
|
-
type: "string",
|
|
2136
|
-
description: "One of: request_compute, compute_status, release_compute, compute_history, add_funds",
|
|
2137
|
-
},
|
|
2138
|
-
tier: {
|
|
2139
|
-
type: "string",
|
|
2140
|
-
description: "Compute tier ID (e.g., cpu-8, gpu-t4). Leave empty during request_compute to list tiers.",
|
|
2141
|
-
optional: true,
|
|
2142
|
-
},
|
|
2143
|
-
duration_minutes: {
|
|
2144
|
-
type: "number",
|
|
2145
|
-
description: "How long to lease the instance (default: 60)",
|
|
2146
|
-
optional: true,
|
|
2147
|
-
},
|
|
2148
|
-
limit: {
|
|
2149
|
-
type: "number",
|
|
2150
|
-
description: "Number of records to fetch for compute_history (default: 10)",
|
|
2151
|
-
optional: true,
|
|
2152
|
-
}
|
|
2153
|
-
}, async ({ action, tier, duration_minutes, limit }) => {
|
|
2154
|
-
if (action === "request_compute") {
|
|
2155
|
-
if (!tier) {
|
|
2156
|
-
return {
|
|
2157
|
-
content: [{ type: "text", text: `Available Compute Tiers:
|
|
2158
|
-
CPU Tiers:
|
|
2159
|
-
cpu-8: 16 vCPUs, 32 GB RAM (~$0.20/hr)
|
|
2160
|
-
cpu-16: 32 vCPUs, 64 GB RAM (~$0.20/hr)
|
|
2161
|
-
cpu-24: 48 vCPUs, 96 GB RAM (~$0.28/hr)
|
|
2162
|
-
cpu-32: 64 vCPUs, 128 GB RAM (~$0.42/hr)
|
|
2163
|
-
cpu-48: 96 vCPUs, 192 GB RAM (~$0.49/hr)
|
|
2164
|
-
GPU Tiers:
|
|
2165
|
-
gpu-t4: 8 vCPUs, 32 GB, T4 16GB VRAM (~$0.22/hr)
|
|
2166
|
-
gpu-t4x: 16 vCPUs, 64 GB, T4 16GB VRAM (~$0.36/hr)
|
|
2167
|
-
gpu-t4xx: 32 vCPUs, 128 GB, T4 16GB VRAM (~$0.27/hr)
|
|
2168
|
-
gpu-l4: 8 vCPUs, 32 GB, L4 24GB VRAM (~$0.41/hr)
|
|
2169
|
-
gpu-l4x: 16 vCPUs, 64 GB, L4 24GB VRAM (~$0.37/hr)
|
|
2170
|
-
gpu-a10: 8 vCPUs, 32 GB, A10G 24GB VRAM (~$0.51/hr)
|
|
2171
|
-
gpu-a10x: 16 vCPUs, 64 GB, A10G 24GB VRAM (~$0.52/hr)` }]
|
|
2172
|
-
};
|
|
2173
|
-
}
|
|
2174
|
-
const data = await apiCall("POST", "/v1/compute/lease", { tier, duration_minutes });
|
|
2175
|
-
return {
|
|
2176
|
-
content: [{ type: "text", text: `Compute Lease Requested:
|
|
2177
|
-
Lease ID: ${data.lease_id}
|
|
2178
|
-
Status: ${data.status}
|
|
2179
|
-
Estimated Wait: ${data.estimated_wait_minutes} minutes
|
|
2180
|
-
Estimated Cost: $${(data.estimated_cost_cents / 100).toFixed(2)}
|
|
2181
|
-
Credits Remaining After Reserve: $${(data.credit_balance_cents / 100).toFixed(2)}
|
|
2182
|
-
|
|
2183
|
-
IMPORTANT: Cloud burst active. Data is pulled from shared Cloudflare R2, so you do NOT need to re-upload datasets. Just wait ~3 minutes and check status.` }]
|
|
2184
|
-
};
|
|
2185
|
-
}
|
|
2186
|
-
if (action === "compute_status") {
|
|
2187
|
-
const data = await apiCall("GET", "/v1/compute/lease");
|
|
2188
|
-
if (data.status === "none" || !data.lease_id) {
|
|
2189
|
-
return { content: [{ type: "text", text: "No active lease -- running on default Primary Server." }] };
|
|
2190
|
-
}
|
|
2191
|
-
return {
|
|
2192
|
-
content: [{ type: "text", text: `Active Compute Lease:
|
|
2193
|
-
Lease ID: ${data.lease_id}
|
|
2194
|
-
Status: ${data.status}
|
|
2195
|
-
Tier: ${data.tier} (${data.instance_type})
|
|
2196
|
-
Time Remaining: ${Math.round(data.time_remaining_ms / 60000)} minutes` }]
|
|
2197
|
-
};
|
|
2198
|
-
}
|
|
2199
|
-
if (action === "release_compute") {
|
|
2200
|
-
const data = await apiCall("DELETE", "/v1/compute/lease");
|
|
2201
|
-
return {
|
|
2202
|
-
content: [{ type: "text", text: `Compute Released:
|
|
2203
|
-
Duration Billed: ${data.duration_minutes} minutes
|
|
2204
|
-
Credits Deducted: $${(data.credits_deducted / 100).toFixed(2)}
|
|
2205
|
-
Final Balance: $${(data.final_balance_cents / 100).toFixed(2)}
|
|
2206
|
-
|
|
2207
|
-
Routing reverted to default Primary Server.` }]
|
|
2208
|
-
};
|
|
2209
|
-
}
|
|
2210
|
-
if (action === "compute_history") {
|
|
2211
|
-
const data = await apiCall("GET", `/v1/compute/history?limit=${limit || 10}`);
|
|
2212
|
-
const history = data.history.map((h) => `- ${h.started_at} | ${h.tier} | ${h.duration_minutes} min | $${(h.credits_charged / 100).toFixed(2)}`).join("\n");
|
|
2213
|
-
return {
|
|
2214
|
-
content: [{ type: "text", text: `Credit Balance: $${(data.credit_balance_cents / 100).toFixed(2)}\n\nRecent Usage:\n${history}` }]
|
|
2215
|
-
};
|
|
2216
|
-
}
|
|
2217
|
-
if (action === "add_funds") {
|
|
2218
|
-
return {
|
|
2219
|
-
content: [{ type: "text", text: `To add funds to your account, please visit the Barivia Billing Portal (integration pending) or ask your administrator to use the CLI tool:
|
|
2220
|
-
bash scripts/manage-credits.sh add <org_id> <amount_usd>` }]
|
|
2221
|
-
};
|
|
2222
|
-
}
|
|
2223
|
-
return {
|
|
2224
|
-
content: [{ type: "text", text: `Unknown action: ${action}. Valid actions are request_compute, compute_status, release_compute, compute_history, add_funds.` }]
|
|
2225
|
-
};
|
|
2226
|
-
});
|
|
2227
|
-
// ---- license_info ----
|
|
2228
|
-
server.tool("license_info", `Get plan/license capabilities, backend info, live status, and training time estimates.
|
|
2229
|
-
|
|
2230
|
-
Returns what the current API key is connected to: plan tier, compute class
|
|
2231
|
-
(CPU/GPU), usage limits, backend hardware details, job queue state, and
|
|
2232
|
-
estimated training times.
|
|
2233
|
-
|
|
2234
|
-
Use this BEFORE submitting large jobs to:
|
|
2235
|
-
- See your plan's compute class and whether GPU is available
|
|
2236
|
-
- Check queue depth to decide whether to wait or proceed
|
|
2237
|
-
- Estimate wall-clock time based on the current topology`, {}, async () => {
|
|
2238
|
-
const data = (await apiCall("GET", "/v1/system/info"));
|
|
2239
|
-
const plan = data.plan ?? {};
|
|
2240
|
-
const backend = data.backend ?? {};
|
|
2241
|
-
const status = data.status ?? {};
|
|
2242
|
-
const estimates = data.training_time_estimates_seconds ?? {};
|
|
2243
|
-
const topo = data.worker_topology ?? {};
|
|
2244
|
-
const gpuEnabled = plan.gpu_enabled ?? data.gpu_available ?? false;
|
|
2245
|
-
const computeDesc = gpuEnabled
|
|
2246
|
-
? backend.gpu_model
|
|
2247
|
-
? `GPU-accelerated (${backend.gpu_model}${backend.gpu_vram_gb ? `, ${backend.gpu_vram_gb} GB VRAM` : ""})`
|
|
2248
|
-
: "GPU-accelerated"
|
|
2249
|
-
: "CPU only";
|
|
2250
|
-
const fmtLimit = (v) => v === -1 || v === "-1" ? "unlimited" : String(v ?? "?");
|
|
2251
|
-
const leaseData = await apiCall("GET", "/v1/compute/lease").catch(() => null);
|
|
2252
|
-
const historyData = await apiCall("GET", "/v1/compute/history?limit=30").catch(() => null);
|
|
2253
|
-
const lines = [
|
|
2254
|
-
`Your Plan: ${String(plan.tier ?? "unknown").charAt(0).toUpperCase()}${String(plan.tier ?? "unknown").slice(1)}`,
|
|
2255
|
-
` Priority: ${plan.priority ?? "normal"}`,
|
|
2256
|
-
` Concurrency: ${plan.max_concurrent_jobs ?? "?"} simultaneous job${plan.max_concurrent_jobs !== 1 ? "s" : ""}`,
|
|
2257
|
-
` Datasets: ${fmtLimit(plan.max_datasets)} max, ${fmtLimit(plan.max_dataset_rows)} rows each`,
|
|
2258
|
-
` Monthly Jobs: ${fmtLimit(plan.max_monthly_jobs)}`,
|
|
2259
|
-
` Grid Size: ${fmtLimit(plan.max_grid_size)} max`,
|
|
2260
|
-
` Features: ${fmtLimit(plan.max_features)} max`,
|
|
2261
|
-
];
|
|
2262
|
-
if (historyData) {
|
|
2263
|
-
lines.push(``, `Compute Credits: $${(historyData.credit_balance_cents / 100).toFixed(2)} remaining`);
|
|
2264
|
-
}
|
|
2265
|
-
lines.push(``, `Default Backend:`, ` Label: ${backend.label ?? "unknown"}`, ` Compute: ${computeDesc}`);
|
|
2266
|
-
if (backend.memory_gb)
|
|
2267
|
-
lines.push(` Memory: ${backend.memory_gb} GB`);
|
|
2268
|
-
if (leaseData && leaseData.lease_id) {
|
|
2269
|
-
lines.push(``, `Active Compute Lease (Burst):`, ` Status: ${leaseData.status}`, ` Tier: ${leaseData.tier} (${leaseData.instance_type})`, ` Time Left: ${Math.round(leaseData.time_remaining_ms / 60000)} min`);
|
|
2270
|
-
}
|
|
2271
|
-
const running = Number(status.running_jobs ?? data.running_jobs ?? 0);
|
|
2272
|
-
const pending = Number(status.pending_jobs ?? data.pending_jobs ?? 0);
|
|
2273
|
-
const queueDepth = Number(status.queue_depth ?? data.queue_depth ?? 0);
|
|
2274
|
-
const freeMem = status.free_memory_gb ?? data.free_memory_gb;
|
|
2275
|
-
const compute_eta = Number(data.training_time_estimates_seconds?.total || 0);
|
|
2276
|
-
const final_eta_seconds = (pending + 1) * compute_eta;
|
|
2277
|
-
lines.push(``, `Live Status:`, ` Running Jobs: ${running}`, ` Pending Jobs: ${pending}`, ` Queue Depth: ${queueDepth}`, ` Current Wait: ~${Math.round(final_eta_seconds / 60)} minutes (before your job starts)`);
|
|
2278
|
-
if (freeMem !== undefined)
|
|
2279
|
-
lines.push(` Free Memory: ${freeMem} GB`);
|
|
2280
|
-
if (topo.num_workers !== undefined) {
|
|
2281
|
-
lines.push(``, `Worker Topology:`, ` Workers: ${topo.num_workers} × ${topo.threads_per_worker} threads`, ` Total Thread Budget: ${topo.total_thread_budget}`);
|
|
2282
|
-
}
|
|
2283
|
-
if (Object.keys(estimates).length > 0) {
|
|
2284
|
-
lines.push(``, `Estimated Training Times (seconds):`, ...Object.entries(estimates)
|
|
2285
|
-
.filter(([k]) => k !== "formula")
|
|
2286
|
-
.map(([k, v]) => ` ${k}: ~${v}s`));
|
|
2287
|
-
if (estimates.formula)
|
|
2288
|
-
lines.push(` ${estimates.formula}`);
|
|
2289
|
-
}
|
|
2290
|
-
return {
|
|
2291
|
-
content: [{ type: "text", text: lines.join("\n") }],
|
|
2292
|
-
};
|
|
2293
|
-
});
|
|
2294
|
-
// ---------------------------------------------------------------------------
|
|
2295
|
-
// Prompts
|
|
2296
|
-
// ---------------------------------------------------------------------------
|
|
2297
|
-
server.prompt("info", "Overview of the Barivia SOM MCP: capabilities, workflow, tools, analysis types, and tips. Use when the user asks what this MCP can do, how to get started, or what the process is.", {}, () => ({
|
|
2298
|
-
messages: [
|
|
2299
|
-
{
|
|
2300
|
-
role: "user",
|
|
2301
|
-
content: {
|
|
2302
|
-
type: "text",
|
|
2303
|
-
text: [
|
|
2304
|
-
"Inform the user using this overview:",
|
|
2305
|
-
"",
|
|
2306
|
-
"**What it is:** Barivia MCP connects you to a Self-Organizing Map (SOM) analytics engine. SOMs learn a 2D map from high-dimensional data for visualization, clustering, pattern discovery, and temporal analysis.",
|
|
2307
|
-
"",
|
|
2308
|
-
"**Core workflow:**",
|
|
2309
|
-
"1. **Upload** — `datasets(action=upload)` with a CSV file path or inline data",
|
|
2310
|
-
"2. **Preview** — `datasets(action=preview)` to inspect columns, stats, and detect cyclic/datetime fields",
|
|
2311
|
-
"3. **Prepare** — use the `prepare_training` prompt for a guided checklist (column selection, transforms, cyclic encoding, feature weights, grid sizing)",
|
|
2312
|
-
"4. **Train** — `train_som` with grid size, epochs, model type, and preprocessing options. Use `preset=quick|standard|refined|high_res` for sensible defaults",
|
|
2313
|
-
"5. **Monitor** — `get_job_status` to track progress; `get_results` to retrieve figures when complete",
|
|
2314
|
-
"6. **Analyze** — `analyze` with various analysis types (see below)",
|
|
2315
|
-
"7. **Iterate** — `recolor_som` to change colormap without retraining, `compare_runs` to compare hyperparameters, `project_variable` to overlay new variables",
|
|
2316
|
-
"",
|
|
2317
|
-
"**Analysis types** (via `analyze`):",
|
|
2318
|
-
"- `u_matrix` — cluster boundary distances",
|
|
2319
|
-
"- `component_planes` — per-feature heatmaps",
|
|
2320
|
-
"- `clusters` — automatic cluster detection and statistics",
|
|
2321
|
-
"- `quality_report` — QE, TE, explained variance, trustworthiness, neighborhood preservation",
|
|
2322
|
-
"- `hit_histogram` — data density across the map",
|
|
2323
|
-
"- `transition_flow` — temporal flow patterns (requires time-ordered data)",
|
|
2324
|
-
"",
|
|
2325
|
-
"**Data tools:**",
|
|
2326
|
-
"- `datasets(action=subset)` — filter by row range, value thresholds (gt/lt/gte/lte), equality, or set membership. Combine row_range + filter",
|
|
2327
|
-
"- `derive_variable` — create computed columns from expressions (ratios, differences, etc.)",
|
|
2328
|
-
"- `project_variable` — overlay any variable onto a trained SOM",
|
|
2329
|
-
"",
|
|
2330
|
-
"**Output options:** Format (png/pdf/svg) and colormap (coolwarm, viridis, plasma, inferno, etc.) can be set at training or changed later via recolor_som.",
|
|
2331
|
-
"",
|
|
2332
|
-
"**Key tools:** datasets, list, train_som, get_job_status, get_results, analyze, recolor_som, download_results, project_variable, transition_flow, compare_runs, derive_variable, license_info, explore_som.",
|
|
2333
|
-
"",
|
|
2334
|
-
"**Tips:**",
|
|
2335
|
-
"- Always `preview` before training to understand your data",
|
|
2336
|
-
"- Use `license_info` to check GPU availability and plan limits before large jobs",
|
|
2337
|
-
"- Start with `preset=quick` for fast iteration, then `refined` for publication quality",
|
|
2338
|
-
"- For time-series data, consider `transition_flow` after training",
|
|
2339
|
-
"",
|
|
2340
|
-
"Keep the reply scannable with headers and bullet points.",
|
|
2341
|
-
].join("\n"),
|
|
2342
|
-
},
|
|
2343
|
-
},
|
|
2344
|
-
],
|
|
2345
|
-
}));
|
|
2346
|
-
server.prompt("prepare_training", "Guided pre-training checklist. Use after uploading a dataset and before calling train_som. " +
|
|
2347
|
-
"Walks through data inspection, column selection, transforms, cyclic/temporal features, weighting, subsetting, and grid sizing.", { dataset_id: z.string().describe("Dataset ID to prepare for training") }, ({ dataset_id }) => ({
|
|
2348
|
-
messages: [
|
|
2349
|
-
{
|
|
2350
|
-
role: "user",
|
|
2351
|
-
content: {
|
|
2352
|
-
type: "text",
|
|
2353
|
-
text: [
|
|
2354
|
-
`Guide me through preparing dataset ${dataset_id} for SOM training.`,
|
|
2355
|
-
"",
|
|
2356
|
-
"Start by calling datasets(action=preview, dataset_id=\"" + dataset_id + "\") to show the columns, statistics, and detected cyclic/datetime fields. Then walk through each step below, asking for my choices at each stage. When all choices are made, present the final train_som call and offer to submit.",
|
|
2357
|
-
"",
|
|
2358
|
-
"**Step 1: DATA INSPECTION**",
|
|
2359
|
-
"Show the preview output: column names, types, basic stats (min/max/mean/std), sample rows. Flag anything unusual:",
|
|
2360
|
-
"- Columns with many missing values or zero variance",
|
|
2361
|
-
"- Highly skewed distributions (consider transforms)",
|
|
2362
|
-
"- ID-like or categorical columns that shouldn't be trained on",
|
|
2363
|
-
"- Columns the preview flags as cyclic or datetime candidates",
|
|
2364
|
-
"",
|
|
2365
|
-
"**Step 2: SUBSETTING** (optional)",
|
|
2366
|
-
"Does the user want to train on a subset? Options:",
|
|
2367
|
-
"- Row range: datasets(action=subset, row_range=[start, end]) for first N rows or a specific slice",
|
|
2368
|
-
"- Value filters: datasets(action=subset, filters=[{column, op, value}]) — ops: eq, ne, gt, lt, gte, lte, between, in",
|
|
2369
|
-
"- Or pass row_range directly in train_som params for lightweight slicing",
|
|
2370
|
-
"",
|
|
2371
|
-
"**Step 3: COLUMN SELECTION**",
|
|
2372
|
-
"Which columns to include? Exclude IDs, free text, constants, or redundant features. → train_som param: columns",
|
|
2373
|
-
"",
|
|
2374
|
-
"**Step 4: TRANSFORMS**",
|
|
2375
|
-
"Any right-skewed or heavy-tailed columns? Available transforms (applied BEFORE normalization):",
|
|
2376
|
-
"- log: natural log (requires >0; use for prices, revenues, volumes)",
|
|
2377
|
-
"- log1p: log(1+x) (safe for zeros; counts, frequencies)",
|
|
2378
|
-
"- sqrt: square root (mild skew correction)",
|
|
2379
|
-
"- rank: replace with rank order (non-parametric; heavy outliers)",
|
|
2380
|
-
"- invert: 1/x (reciprocal relationship)",
|
|
2381
|
-
"→ train_som param: transforms, e.g. { revenue: 'log', count: 'log1p' }",
|
|
2382
|
-
"",
|
|
2383
|
-
"**Step 5: CYCLIC FEATURES**",
|
|
2384
|
-
"Any periodic variables (hour 0–24, weekday 0–6, month 1–12, angle 0–360)?",
|
|
2385
|
-
"These MUST be encoded as (cos, sin) pairs so the SOM sees 23:00 and 01:00 as close.",
|
|
2386
|
-
"→ train_som param: cyclic_features, e.g. [{ feature: 'hour', period: 24 }]",
|
|
2387
|
-
"",
|
|
2388
|
-
"**Step 6: TEMPORAL FEATURES**",
|
|
2389
|
-
"Any datetime columns? Extract components like hour_of_day, day_of_year, month, day_of_week.",
|
|
2390
|
-
"Preview shows auto-detected formats. NEVER add temporal_features without user confirmation.",
|
|
2391
|
-
"→ train_som param: temporal_features, e.g. [{ columns: ['Date'], format: 'yyyy-mm-dd', extract: ['month', 'day_of_week'], cyclic: true }]",
|
|
2392
|
-
"",
|
|
2393
|
-
"**Step 7: FEATURE WEIGHTS**",
|
|
2394
|
-
"Should any feature have more or less influence? weight=0 disables, >1 emphasizes, <1 de-emphasizes.",
|
|
2395
|
-
"Cyclic shorthand: { 'hour': 2.0 } auto-expands to both hour_cos and hour_sin.",
|
|
2396
|
-
"→ train_som param: feature_weights, e.g. { temperature: 2.0, noise_col: 0.5 }",
|
|
2397
|
-
"",
|
|
2398
|
-
"**Step 8: GRID, MODEL & HYPERPARAMETERS**",
|
|
2399
|
-
"- Presets: quick (15×15), standard (25×25), refined (40×40), high_res (60×60)",
|
|
2400
|
-
"- Or explicit: grid_x, grid_y, epochs (single int or [ordering, convergence])",
|
|
2401
|
-
"- Model: SOM (standard), RSOM (recurrent/time-series), SOM-SOFT (GTM-style), RSOM-SOFT",
|
|
2402
|
-
"- learning_rate: number (e.g. 0.05) or { ordering: [start, end], convergence: [start, end] }",
|
|
2403
|
-
"- sigma_f: final neighborhood radius (lower = sharper clusters, default 1.0)",
|
|
2404
|
-
"- output_format: pdf (vector), png, svg. colormap: coolwarm, viridis, plasma, etc.",
|
|
2405
|
-
"- Check license_info for GPU availability before large grids",
|
|
2406
|
-
"",
|
|
2407
|
-
"**Final: REVIEW & SUBMIT**",
|
|
2408
|
-
"Summarize all chosen parameters as a complete train_som call. Show the full param set and confirm before submitting.",
|
|
2409
|
-
].join("\n"),
|
|
2410
|
-
},
|
|
2411
|
-
},
|
|
2412
|
-
],
|
|
2413
|
-
}));
|
|
2414
|
-
// ---------------------------------------------------------------------------
|
|
2415
|
-
// Image helper
|
|
2416
|
-
// ---------------------------------------------------------------------------
|
|
2417
|
-
function mimeForFilename(fname) {
|
|
2418
|
-
if (fname.endsWith(".pdf"))
|
|
2419
|
-
return "application/pdf";
|
|
2420
|
-
if (fname.endsWith(".svg"))
|
|
2421
|
-
return "image/svg+xml";
|
|
2422
|
-
return "image/png";
|
|
2423
|
-
}
|
|
2424
|
-
async function tryAttachImage(content, jobId, filename) {
|
|
2425
|
-
try {
|
|
2426
|
-
const { data: imgBuf } = await apiRawCall(`/v1/results/${jobId}/image/${filename}`);
|
|
2427
|
-
content.push({
|
|
2428
|
-
type: "image",
|
|
2429
|
-
data: imgBuf.toString("base64"),
|
|
2430
|
-
mimeType: mimeForFilename(filename),
|
|
2431
|
-
annotations: { audience: ["user"], priority: 0.8 },
|
|
2432
|
-
});
|
|
2433
|
-
}
|
|
2434
|
-
catch {
|
|
2435
|
-
content.push({
|
|
2436
|
-
type: "text",
|
|
2437
|
-
text: `(${filename} not available for inline display)`,
|
|
2438
|
-
});
|
|
2439
|
-
}
|
|
2440
|
-
}
|
|
2441
|
-
// ---------------------------------------------------------------------------
|
|
2442
|
-
// Connect via stdio
|
|
2443
|
-
// ---------------------------------------------------------------------------
|
|
2444
|
-
const transport = new StdioServerTransport();
|
|
2445
|
-
await server.connect(transport);
|
|
2446
|
-
//# sourceMappingURL=index.js.map
|
|
2
|
+
import{McpServer as e}from"@modelcontextprotocol/sdk/server/mcp.js";import{StdioServerTransport as t}from"@modelcontextprotocol/sdk/server/stdio.js";import{z as o}from"zod";import{registerAppResource as a,registerAppTool as r,RESOURCE_MIME_TYPE as n}from"@modelcontextprotocol/ext-apps/server";import i from"node:fs/promises";import s from"node:path";const l=process.env.BARIVIA_API_URL??process.env.BARSOM_API_URL??"https://api.barivia.se",u=process.env.BARIVIA_API_KEY??process.env.BARSOM_API_KEY??"";u||(console.error("Error: BARIVIA_API_KEY not set. Set it in your MCP client config."),process.exit(1));const d=parseInt(process.env.BARIVIA_FETCH_TIMEOUT_MS??"30000",10),c=new Set([502,503,504]);function p(e,t){return!(void 0===t||!c.has(t))||(e instanceof DOMException&&"AbortError"===e.name||e instanceof TypeError)}async function m(e,t,o=d){const a=new AbortController,r=setTimeout(()=>a.abort(),o);try{return await fetch(e,{...t,signal:a.signal})}finally{clearTimeout(r)}}async function g(e,t,o,a){const r=`${l}${t}`,n=a?.["Content-Type"]??"application/json",i=Math.random().toString(36).slice(2,10),s={Authorization:`Bearer ${u}`,"Content-Type":n,"X-Request-ID":i,...a};let d,c;void 0!==o&&(d="application/json"===n?JSON.stringify(o):String(o));for(let t=0;t<=2;t++)try{const o=await m(r,{method:e,headers:s,body:d}),a=await o.text();if(!o.ok){if(t<2&&p(null,o.status)){await new Promise(e=>setTimeout(e,1e3*2**t));continue}const e=(()=>{try{return JSON.parse(a)}catch{return null}})(),r=e?.error??a,n=400===o.status?" Check parameter types and required fields.":404===o.status?" The resource may not exist or may have been deleted.":409===o.status?" The job may not be in the expected state.":429===o.status?" Rate limit exceeded — wait a moment and retry.":"";throw new Error(`${r}${n}`)}return JSON.parse(a)}catch(e){if(c=e,t<2&&p(e)){await new Promise(e=>setTimeout(e,1e3*2**t));continue}throw e}throw c}async function f(e){const t=`${l}${e}`;let o;for(let a=0;a<=2;a++)try{const o=await m(t,{method:"GET",headers:{Authorization:`Bearer ${u}`}});if(!o.ok){if(a<2&&p(null,o.status)){await new Promise(e=>setTimeout(e,1e3*2**a));continue}throw new Error(`API GET ${e} returned ${o.status}`)}const r=await o.arrayBuffer();return{data:Buffer.from(r),contentType:o.headers.get("content-type")??"application/octet-stream"}}catch(e){if(o=e,a<2&&p(e)){await new Promise(e=>setTimeout(e,1e3*2**a));continue}throw e}throw o}function h(e){return{content:[{type:"text",text:JSON.stringify(e,null,2)}]}}async function b(e,t=3e4,o=1e3){const a=Date.now();for(;Date.now()-a<t;){const t=await g("GET",`/v1/jobs/${e}`),a=t.status;if("completed"===a||"failed"===a||"cancelled"===a)return{status:a,result_ref:t.result_ref,error:t.error};await new Promise(e=>setTimeout(e,o))}return{status:"timeout"}}const _=new e({name:"analytics-engine",version:"0.4.0"}),y=import.meta.dirname??s.dirname(new URL(import.meta.url).pathname);async function w(e){const t=[s.join(y,"views","src","views",e,"index.html"),s.join(y,"views",e,"index.html"),s.join(y,"..","dist","views","src","views",e,"index.html")];for(const e of t)try{return await i.readFile(e,"utf-8")}catch{continue}return null}const v="ui://barsom/som-explorer",$="ui://barsom/data-preview",x="ui://barsom/training-monitor";function j(e,t,o,a){const r=t.output_format??"pdf";if("transition_flow"===e){return[`transition_flow_lag${t.lag??1}.${r}`]}if("project_variable"===e){const e=t.variable_name??"variable";return[`projected_${String(e).replace(/[^a-zA-Z0-9_]/g,"_")}.${r}`]}if("derive_variable"===e){const e=t.variable_name??"variable";return[`projected_${String(e).replace(/[^a-zA-Z0-9_]/g,"_")}.${r}`]}const n=t.features??[],i=`combined.${r}`,s=`umatrix.${r}`,l=`hit_histogram.${r}`,u=`correlation.${r}`,d=n.map((e,t)=>`component_${t+1}_${e.replace(/[^a-zA-Z0-9_]/g,"_")}.${r}`),c=[i,s,l,u,...d];if(void 0===o||"default"===o)return a?c:[i];if("combined_only"===o)return[i];if("all"===o)return c;if(Array.isArray(o)){const e={combined:i,umatrix:s,hit_histogram:l,correlation:u};return n.forEach((t,o)=>{e[`component_${o+1}`]=d[o]}),o.map(t=>{const o=t.trim().toLowerCase();return e[o]?e[o]:t.includes(".")?t:null}).filter(e=>null!=e)}return[i]}function S(e){return e.endsWith(".pdf")?"application/pdf":e.endsWith(".svg")?"image/svg+xml":"image/png"}async function T(e,t,o){try{const{data:a}=await f(`/v1/results/${t}/image/${o}`);e.push({type:"image",data:a.toString("base64"),mimeType:S(o),annotations:{audience:["user"],priority:.8}})}catch{e.push({type:"text",text:`(${o} not available for inline display)`})}}a(_,v,v,{mimeType:n},async()=>{const e=await w("som-explorer");return{contents:[{uri:v,mimeType:n,text:e??"<html><body>SOM Explorer view not built yet. Run: npm run build:views</body></html>"}]}}),a(_,$,$,{mimeType:n},async()=>{const e=await w("data-preview");return{contents:[{uri:$,mimeType:n,text:e??"<html><body>Data Preview view not built yet.</body></html>"}]}}),a(_,x,x,{mimeType:n},async()=>{const e=await w("training-monitor");return{contents:[{uri:x,mimeType:n,text:e??"<html><body>Training Monitor view not built yet.</body></html>"}]}}),r(_,"explore_som",{title:"Explore SOM",description:"Interactive SOM explorer dashboard. Opens an inline visualization where you can toggle features, click nodes, and export figures. Use this after get_results for a richer, interactive exploration experience. Falls back to text+image on hosts that don't support MCP Apps.",inputSchema:{job_id:o.string().describe("Job ID of a completed SOM training job")},_meta:{ui:{resourceUri:v}}},async({job_id:e})=>{const t=await g("GET",`/v1/results/${e}`),o=t.summary??{},a=[];a.push({type:"text",text:JSON.stringify({job_id:e,summary:o,download_urls:t.download_urls})});const r=o.output_format??"pdf";return await T(a,e,`combined.${r}`),{content:a}}),_.tool("guide_barsom_workflow","Retrieve the Standard Operating Procedure (SOP) for the barSOM analysis pipeline.\nALWAYS call this tool first if you are unsure of the steps to execute a complete Self-Organizing Map analysis.\nThe workflow explains the exact sequence of tool calls needed: Upload → Preprocess → Train → Wait → Analyze.",{},async()=>({content:[{type:"text",text:"barSOM Standard Operating Procedure (SOP)\n\nStep 1: Upload Data\n- Use `datasets(action=upload)` with a local `file_path` to your CSV.\n- BEFORE UPLOADING: Clean the dataset to remove NaNs or malformed data.\n- Capture the `dataset_id` returned.\n\nStep 2: Preview & Preprocess\n- Use `datasets(action=preview)` to inspect columns, ranges, and types.\n- Check for skewed columns requiring 'log' or 'sqrt' transforms.\n- Check for cyclical or temporal features (hours, days) requiring `cyclic_features` or `temporal_features` during training.\n\nStep 3: Train the SOM\n- Call `train_som` with the `dataset_id`.\n- Carefully select columns to include (start with 5-10).\n- Assign `feature_weights` (especially for categorical data with natural hierarchies).\n- Wait for the returned `job_id`.\n\nStep 4: Wait for Completion (ASYNC POLLING)\n- Use `get_job_status` every 10-15 seconds.\n- Wait until status is \"completed\". DO NOT assume failure before 3 minutes (or longer for large grids).\n- If it fails, read the error message and adjust parameters (e.g., reduce grid size, fix column names).\n\nStep 5: Analyze and Export\n- Once completed, use `analyze(type=component_planes)` or `analyze(type=clusters)` to interpret the results.\n- Call `get_results` to get the final metrics (Quantization Error, Topographic Error)."}]})),_.tool("datasets",'Manage datasets: upload, preview, subset, or delete.\n\naction=upload: Upload a CSV for SOM analysis. Prefer file_path over csv_data so the MCP reads the file directly. Returns dataset ID and metadata. Then use datasets(action=preview) before train_som.\nBEFORE UPLOADING: Ensure data has no NaNs, missing values, or formats that can\'t be handled. The user/LLM is responsible for cleaning the dataset before upload. Categorical features should be weighted if there is a natural hierarchy.\naction=preview: Show columns, stats, sample rows, cyclic/datetime detections. ALWAYS preview before train_som on an unfamiliar dataset.\naction=subset: Create a new dataset from a subset of an existing one. Requires name and at least one of row_range or filters.\n - row_range: [start, end] 1-based inclusive (e.g. [1, 2000] for first 2000 rows)\n - filters: array of conditions, ALL must match (AND logic). Each: { column, op, value }.\n Operators:\n eq — exact match (string or number): { column: "region", op: "eq", value: "Europe" }\n ne — not equal: { column: "status", op: "ne", value: "error" }\n in — value in set: { column: "grade", op: "in", value: ["A", "B"] }\n gt/lt — above/below threshold: { column: "temp", op: "gt", value: 20 }\n gte/lte — at or above/below: { column: "price", op: "gte", value: 100 }\n between — closed interval [lo, hi]: { column: "age", op: "between", value: [18, 65] }\n - Combine row_range + filters to slice both rows and values.\n - Single filter object is also accepted (auto-wrapped).\naction=delete: Remove a dataset and free the slot.\n\nBEST FOR: Tabular numeric data. CSV with header required. Use list(type=datasets) to see existing datasets.',{action:o.enum(["upload","preview","subset","delete"]).describe("upload: add a CSV; preview: inspect before training; subset: create subset dataset; delete: remove dataset"),name:o.string().optional().describe("Dataset name (required for action=upload and subset)"),file_path:o.string().optional().describe("Path to local CSV (for upload; prefer over csv_data)"),csv_data:o.string().optional().describe("Inline CSV string (for upload; use for small data)"),dataset_id:o.string().optional().describe("Dataset ID (required for preview, subset, and delete)"),n_rows:o.number().int().optional().default(5).describe("Sample rows to return (preview only)"),row_range:o.tuple([o.number().int(),o.number().int()]).optional().describe("For subset: [start, end] 1-based inclusive row range (e.g. [1, 2000])"),filters:o.preprocess(e=>null==e||Array.isArray(e)?e:"object"==typeof e&&null!==e&&"column"in e?[e]:e,o.array(o.object({column:o.string(),op:o.enum(["eq","ne","in","gt","lt","gte","lte","between"]),value:o.union([o.string(),o.number(),o.array(o.union([o.string(),o.number()]))])})).optional().describe("For subset: filter conditions (AND logic). Single object or array. ops: eq, ne, in, gt, lt, gte, lte, between. Examples: { column: 'temp', op: 'between', value: [15, 30] }, { column: 'region', op: 'eq', value: 'Europe' }")),filter:o.object({column:o.string(),op:o.enum(["eq","ne","in","gt","lt","gte","lte","between"]),value:o.union([o.string(),o.number(),o.array(o.union([o.string(),o.number()]))])}).optional().describe("Deprecated — use filters instead. Single filter condition.")},async({action:e,name:t,file_path:o,csv_data:a,dataset_id:r,n_rows:n,row_range:l,filters:u,filter:d})=>{if("upload"===e){if(!t)throw new Error("datasets(upload) requires name");let e;if(o){const t=s.resolve(o);try{e=await i.readFile(t,"utf-8")}catch(e){const o=e instanceof Error?e.message:String(e);throw new Error(`Cannot read file "${t}": ${o}`)}}else{if(!(a&&a.length>0))throw new Error("datasets(upload) requires file_path or csv_data");e=a}return h(await g("POST","/v1/datasets",e,{"X-Dataset-Name":t,"Content-Type":"text/csv"}))}if("preview"===e){if(!r)throw new Error("datasets(preview) requires dataset_id");const e=await g("GET",`/v1/datasets/${r}/preview?n_rows=${n??5}`),t=e.columns??[],o=e.column_stats??[],a=e.cyclic_hints??[],i=e.sample_rows??[],s=e.datetime_columns??[],l=e.temporal_suggestions??[],u=e=>null==e?"—":Number(e).toFixed(3),d=[`Dataset: ${e.name} (${e.dataset_id})`,`${e.total_rows} rows × ${e.total_cols} columns`,"","Column Statistics:","| Column | Min | Max | Mean | Std | Nulls | Numeric |","|--------|-----|-----|------|-----|-------|---------|"];for(const e of o)d.push(`| ${e.column} | ${u(e.min)} | ${u(e.max)} | ${u(e.mean)} | ${u(e.std)} | ${e.null_count??0} | ${!1!==e.is_numeric?"yes":"no"} |`);if(a.length>0){d.push("","Detected Cyclic Feature Hints:");for(const e of a)d.push(` • ${e.column} — period=${e.period} (${e.reason})`)}if(s.length>0){d.push("","Detected Datetime Columns:");for(const e of s){const t=(e.detected_formats??[]).map(e=>`${e.format} — ${e.description} (${(100*e.match_rate).toFixed(0)}% match)`).join("; ");d.push(` • ${e.column}: sample="${e.sample}" → ${t}`)}}if(l.length>0){d.push("","Temporal Feature Suggestions (require user approval):");for(const e of l)d.push(` • Columns: ${e.columns.join(" + ")} → format: "${e.format}"`),d.push(` Available components: ${e.available_components.join(", ")}`)}if(i.length>0){d.push("",`Sample Rows (first ${i.length}):`),d.push(`| ${t.join(" | ")} |`),d.push(`| ${t.map(()=>"---").join(" | ")} |`);for(const e of i)d.push(`| ${t.map(t=>String(e[t]??"")).join(" | ")} |`)}return{content:[{type:"text",text:d.join("\n")}]}}if("subset"===e){if(!r)throw new Error("datasets(subset) requires dataset_id");if(!t)throw new Error("datasets(subset) requires name");const e=u??(d?[d]:void 0);if(void 0===l&&void 0===e)throw new Error("datasets(subset) requires at least one of row_range or filters");const o={name:t};void 0!==l&&(o.row_range=l),void 0!==e&&(o.filters=e);return h(await g("POST",`/v1/datasets/${r}/subset`,JSON.stringify(o),{"Content-Type":"application/json"}))}if("delete"===e){if(!r)throw new Error("datasets(delete) requires dataset_id");return h(await g("DELETE",`/v1/datasets/${r}`))}throw new Error("Invalid action")}),_.tool("train_som","Train a Self-Organizing Map on the dataset. Returns a job_id for polling.\n\nBEST FOR: Exploratory analysis of multivariate numeric data — clustering, regime\ndetection, process monitoring, anomaly visualization, dimensionality reduction.\nNOT FOR: Time-series forecasting, classification, or text/image data.\n\nASYNC POLLING PROTOCOL:\n- This tool returns a job_id. You MUST poll get_job_status to check completion.\n- Poll every 10-15 seconds.\n- Wait for status \"completed\" before calling analyze() or get_results().\n\nBEFORE calling, ask the user:\n1. Which columns to include? (use 'columns' to restrict)\n2. Any cyclic features?\n3. Any skewed columns? (suggest transforms)\n4. Feature weights?\n5. Quick exploration or refined map?\n\nESCALATION LADDER (If this tool fails):\n- Error mentions temporal validation: The dataset likely has datetime formatting issues. Use datasets(action=preview).\n- Error mentions column not found: Use datasets(action=preview) to verify exact column names (case-sensitive).\n- Error mentions NaNs or missing data: The user must clean the dataset.\n\nSee docs/SOM_PROCESS_AND_BEST_PRACTICES.md for detailed processual knowledge.",{dataset_id:o.string().describe("Dataset ID from datasets(action=upload) or list(type=datasets)"),preset:o.enum(["quick","standard","refined","high_res"]).optional().describe("Training preset — sets sensible defaults for grid, epochs, and batch_size. Explicit params override preset values. quick: 15×15, [15,5], batch=48. standard: 25×25, [30,15], batch=48, best with GPU. refined: 40×40, [50,25], batch=32, best with GPU. high_res: 60×60, [60,40], batch=32, best with GPU."),grid_x:o.number().int().optional().describe("Grid width (omit for auto from data size)"),grid_y:o.number().int().optional().describe("Grid height (omit for auto from data size)"),epochs:o.preprocess(e=>{if(null==e)return e;if("string"==typeof e){const t=parseInt(e,10);if(!Number.isNaN(t))return t;const o=e.match(/^\[\s*(\d+)\s*,\s*(\d+)\s*\]$/);if(o)return[parseInt(o[1],10),parseInt(o[2],10)]}return e},o.union([o.number().int(),o.array(o.number().int()).length(2)]).optional().describe("epochs: integer or [ordering, convergence] array, not a string. Example: 40 or [40, 20]. Set convergence=0 to skip phase 2 (e.g. [15, 0]).")),model:o.enum(["SOM","RSOM","SOM-SOFT","RSOM-SOFT"]).optional().default("SOM").describe("SOM model type. SOM=standard, SOM-SOFT=GTM-style soft responsibilities, RSOM=recurrent (time-series), RSOM-SOFT=recurrent+soft."),periodic:o.boolean().optional().default(!0).describe("Use periodic (toroidal) boundaries"),columns:o.array(o.string()).optional().describe("Subset of CSV column names to train on. Omit to use all columns. Useful to exclude irrelevant features."),cyclic_features:o.array(o.object({feature:o.string().describe("Column name (e.g., 'weekday')"),period:o.number().describe("Period (e.g., 7 for weekday, 24 for hour, 360 for angle)")})).optional().describe("Features to encode as cyclic (cos, sin) pairs"),temporal_features:o.array(o.object({columns:o.array(o.string()).describe("Column name(s) containing datetime strings, combined in order (e.g. ['Date', 'Time'])"),format:o.string().describe("Julia Dates format string from the whitelist (e.g. 'dd.mm.yyyy HH:MM'). Must match the combined column values."),extract:o.array(o.enum(["hour_of_day","day_of_year","month","day_of_week","minute_of_hour"])).describe("Which temporal components to extract"),cyclic:o.boolean().default(!0).describe("Encode extracted components as cyclic sin/cos pairs (default true)"),separator:o.string().optional().describe("Separator when combining multiple columns (default ' '). Use 'T' for ISO 8601.")})).optional().describe("Temporal feature extraction from datetime columns. Parses dates/times and extracts components. NEVER add this without user approval."),feature_weights:o.record(o.number()).optional().describe("Per-feature importance weights as {column_name: weight}. Applied after normalization (column *= sqrt(weight)). weight=0 disables, >1 emphasizes, <1 de-emphasizes. Cyclic shorthand: {'day_of_year': 2.0} auto-expands to both _cos and _sin. Categorical features should be weighted by the LLM if there is any natural hierarchy applicable that could be constructive."),transforms:o.record(o.enum(["log","log1p","log10","sqrt","square","abs","invert","rank","none"])).optional().describe("Per-column preprocessing applied BEFORE normalization. Example: {revenue: 'log', pressure: 'sqrt'}. 'log' = natural log (fails on <=0), 'log1p' = log(1+x) (safe for zeros), 'sqrt' = square root, 'rank' = replace with rank order, 'invert' = 1/x. Suggest log/log1p for right-skewed distributions (prices, volumes, counts)."),normalize:o.union([o.enum(["all","auto"]),o.array(o.string())]).optional().default("auto").describe("Normalization mode. 'auto' skips already-cyclic features."),sigma_f:o.preprocess(e=>{if(null==e)return e;if("string"==typeof e){const t=parseFloat(e);if(!Number.isNaN(t))return t}return e},o.number().optional().describe("Final neighborhood radius at end of ordering phase (default 1.0). Lower values (0.5–0.7) produce sharper cluster boundaries.")),learning_rate:o.preprocess(e=>{if(null==e)return e;if("string"==typeof e){const t=parseFloat(e);if(!Number.isNaN(t))return t}return e},o.union([o.number(),o.object({ordering:o.tuple([o.number(),o.number()]),convergence:o.tuple([o.number(),o.number()])})]).optional().describe("Learning rate control. Number = sets ordering final rate (e.g. 0.05). Object = full control: {ordering: [eta_0, eta_f], convergence: [eta_0, eta_f]}. Default: ordering 0.1→0.01, convergence 0.01→0.001.")),batch_size:o.number().int().optional().describe("Training batch size (default: auto ≈ n_samples/10, max 256). Smaller batches (e.g. 32–64) often sharpen features and can improve map quality (QE, explained variance) at the cost of more steps per epoch. Larger batches = faster epochs but coarser updates; try 64–256 for large datasets (>10k samples)."),backend:o.enum(["auto","cpu","cuda","cuda_graphs"]).optional().default("auto").describe("Compute backend. 'auto' uses CUDA if GPU is available (recommended). 'cpu' forces CPU. 'cuda_graphs' uses CUDA graph capture for maximum GPU throughput."),output_format:o.enum(["png","pdf","svg"]).optional().default("pdf").describe("Image output format. PDF (default) for publication-quality vector graphics, PNG for quick viewing, SVG for web embedding."),output_dpi:o.enum(["standard","retina","print"]).optional().default("retina").describe("Resolution for PNG output: standard (1x), retina (2x, default), print (4x). Ignored for PDF/SVG."),colormap:o.string().optional().describe("Override default colormap (coolwarm) for component planes and hit histogram. Examples: viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix always uses grays, cyclic features use twilight."),row_range:o.tuple([o.number().int().min(1),o.number().int().min(1)]).optional().describe("Train on a subset of rows only: [start, end] 1-based inclusive. Alternative to creating a subset dataset with datasets(action=subset).")},async({dataset_id:e,preset:t,grid_x:o,grid_y:a,epochs:r,model:n,periodic:i,columns:s,transforms:l,cyclic_features:u,temporal_features:d,feature_weights:c,normalize:p,sigma_f:m,learning_rate:f,batch_size:b,backend:_,output_format:y,output_dpi:w,colormap:v,row_range:$})=>{let x={};try{const e=await g("GET","/v1/training/config");x=e?.presets||{}}catch(e){if(t&&!o&&!r)throw new Error("Could not fetch training config from server, and missing explicit grid/epochs.")}const j=t?x[t]:void 0,S={model:n,periodic:i,normalize:p};void 0!==o&&void 0!==a?S.grid=[o,a]:j&&(S.grid=j.grid),void 0!==r?S.epochs=r:j&&(S.epochs=j.epochs),u&&u.length>0&&(S.cyclic_features=u),s&&s.length>0&&(S.columns=s),l&&Object.keys(l).length>0&&(S.transforms=l),d&&d.length>0&&(S.temporal_features=d),c&&Object.keys(c).length>0&&(S.feature_weights=c),void 0!==m&&(S.sigma_f=m),void 0!==f&&(S.learning_rate=f),void 0!==b?S.batch_size=b:j&&(S.batch_size=j.batch_size),void 0!==_&&"auto"!==_?S.backend=_:j?.backend&&(S.backend=j.backend),S.output_format=y??"pdf";const T={standard:1,retina:2,print:4};w&&"retina"!==w&&(S.output_dpi=T[w]??2),v&&(S.colormap=v),$&&$.length>=2&&$[0]<=$[1]&&(S.row_range=$);const E=await g("POST","/v1/jobs",{dataset_id:e,params:S});try{const e=await g("GET","/v1/system/info"),t=Number(e.status?.pending_jobs??e.pending_jobs??0),o=Number(e.training_time_estimates_seconds?.total??(e.gpu_available?45:120)),a=Math.round(t*o/60);a>1?(E.estimated_wait_minutes=a,E.message=`Job submitted. You are #${t+1} in queue. Estimated wait before start: ~${a} min.`):E.message="Job submitted. Should start momentarily."}catch(e){}return h(E)}),_.tool("get_job_status","Check status and progress of a training or analysis job.\n\nASYNC POLLING PROTOCOL:\n- Poll every 10-15 seconds. Do NOT poll faster as it wastes context.\n- For large grids (40x40+), do not assume failure before 3 minutes on CPU.\n- Wait for status \"completed\" before calling analyze() or get_results().\n\nESCALATION LADDER:\n- When status is 'completed', call get_results() to retrieve the map and metrics.\n- When status is 'failed', the worker hit an error. Extract the error message.\n - If the error is about memory/allocation, reduce batch_size or grid_x/grid_y and run train_som again.\n - If the error is about column missing, verify columns with datasets(action=preview).\n - If the error is about NaNs, the user MUST clean the dataset.\n\nTIMING:\nTypical completion times (CPU, 8700 samples):\n 10x10, 10 epochs: ~30s | 20x20, 30 epochs: ~3–5 min | 40x40, 60 epochs: ~15–30 min",{job_id:o.string().describe("Job ID from train_som")},async({job_id:e})=>{const t=await g("GET",`/v1/jobs/${e}`),o=t.status,a=100*(t.progress??0),r=null!=t.label&&""!==t.label?String(t.label):null;let n=`${r?`Job ${r} (id: ${e})`:`Job ${e}`}: ${o} (${a.toFixed(1)}%)`;return"completed"===o?n+=` | Results ready. Use get_results(job_id="${e}") to retrieve.`:"failed"===o&&(n+=` | Error: ${t.error??"unknown"}`),{content:[{type:"text",text:n}]}}),_.tool("get_results",'Retrieve results of a completed SOM training, projection, or derived variable job.\n\nWHEN TO USE: \n- Getting the first look at a trained SOM — combined visualization + quality metrics.\n- ONLY call this after get_job_status returns "completed".\n\nESCALATION LADDER:\n- If this returns an error that the job is not found, verify the job_id.\n- If it returns "job not complete", you polled too early. Call get_job_status and WAIT.\n\nTIMING: Near-instant (reads pre-computed results from S3).\n\nReturns: text summary with metrics and inline images (combined view and all plots shown directly in chat).\n\nDOWNLOAD LINKS: Links to API-domain or presigned URLs may not work when clicked (MCP holds the API key, not the browser). Images are inlined. For weights use get_job_export(export="weights"); for node stats use get_job_export(export="nodes"). If the user wants to save a file, offer to fetch via the appropriate tool.\n\nOPTIONS:\n- figures: request specific plots only. Omit for default (combined only; or all if include_individual=true).\n - "combined_only": only the combined view.\n - "all": combined + umatrix + hit_histogram + all component planes.\n - Array of logical names: e.g. figures: ["umatrix"] for just the U-matrix, or figures: ["combined","hit_histogram","correlation"] or ["combined","umatrix","component_1","component_2"]. Logical names: combined, umatrix, hit_histogram, correlation, component_1, component_2, ... (component_N = Nth feature). Pass an array to fetch/save only those figures.\n- include_individual=true: when figures is omitted, shows each component plane, U-matrix, and hit histogram\n as separate inline images. Ignored when figures is set.\n\nAFTER showing results, guide the user:\n1. "The U-matrix shows [N] distinct regions. Does this match expected groupings?"\n2. "QE=X, TE=Y — [assessment]. Would you like to retrain with different params?"\n3. "Which features show interesting patterns in the component planes?"\n4. If QE > 2: suggest more epochs or larger grid\n5. If TE > 0.15: suggest larger grid\n6. If explained variance < 0.7: suggest transforms, feature selection, or more training\n\nWORKFLOW: get_results → analyze(clusters) → component_planes → feature_correlation.\nRequest specific figures with get_results(job_id, figures=[...]) (e.g. figures: ["umatrix"] or figures: ["combined","hit_histogram"]) or run analyze(job_id, analysis_type) for a single view.\nUse get_job_export(export="training_log") for the learning curve (QE vs epoch — healthy=steady decline then plateau).\nUse analyze(job_id, "quality_report") for extended metrics (trustworthiness, neighborhood preservation).\n\nMETRIC INTERPRETATION:\n- QE < 1.5: good fit. QE > 2: consider more epochs, larger grid, or batch_size=32.\n- TE < 0.05: good topology. TE > 0.15: grid too small.\n- Explained variance > 0.8: good. < 0.7: try transforms, fewer features, or more training.\nAfter successful retrieval, you may auto-generate a brief piece of feedback based on the user\'s experience so far and ask if they\'d like you to send it to the developers via send_feedback (max 190 words).',{job_id:o.string().describe("Job ID of a completed job"),figures:o.union([o.enum(["default","combined_only","all"]),o.array(o.string())]).optional().describe("Which figures to return. Omit or 'default' for combined only (or all if include_individual=true). 'combined_only': just combined view. 'all': combined + umatrix + hit_histogram + correlation + all component planes. Or array of logical names to fetch only those: combined, umatrix, hit_histogram, correlation, component_1, component_2, ..."),include_individual:o.boolean().optional().default(!1).describe("If true and figures is omitted, inline each individual plot (component planes, u-matrix, hit histogram). Ignored when figures is set.")},async({job_id:e,figures:t,include_individual:o})=>{const a=await g("GET",`/v1/results/${e}`),r=a.summary??{},n=(a.download_urls,null!=a.label&&""!==a.label?String(a.label):null),i=n?`Results for ${n} (job_id: ${e})`:`Results for job_id: ${e}`,s=[],l=new Set,u=r.job_type??"train_som";r.output_format;if("transition_flow"===u){const a=r.lag??1,n=r.flow_stats??{};s.push({type:"text",text:[`Transition Flow ${i}`,`Parent SOM: ${r.parent_job_id??"N/A"} | Lag: ${a} | Samples: ${r.n_samples??0}`,"","Flow Statistics:",` Mean flow magnitude: ${void 0!==n.mean_magnitude?Number(n.mean_magnitude).toFixed(4):"N/A"}`,` Max flow magnitude: ${void 0!==n.max_magnitude?Number(n.max_magnitude).toFixed(4):"N/A"}`,` Nodes with flow: ${n.n_nodes_with_flow??"N/A"}`,"","Arrows show net directional drift between consecutive BMU assignments.","Long/bright arrows = frequent state transitions. Short arrows = stable states.","Background = U-matrix (cluster boundaries). Arrows in dark regions = intra-cluster.","","Use transition_flow(lag=N) with larger N to reveal longer-term temporal structure."].join("\n")});for(const a of j(u,r,t,o))await T(s,e,a),l.add(a)}else if("project_variable"===u){const a=r.variable_name??"variable",n=r.aggregation??"mean",d=r.variable_stats??{};s.push({type:"text",text:[`Projected Variable: ${a} (${n}) — ${i}`,`Parent SOM: ${r.parent_job_id??"N/A"} | Samples: ${r.n_samples??0}`,"",`Variable Statistics (per-node ${n}):`,` Min: ${void 0!==d.min?Number(d.min).toFixed(3):"N/A"}`,` Max: ${void 0!==d.max?Number(d.max).toFixed(3):"N/A"}`,` Mean: ${void 0!==d.mean?Number(d.mean).toFixed(3):"N/A"}`,` Nodes with data: ${d.n_nodes_with_data??"N/A"} / ${Number(d.n_nodes_with_data??0)+Number(d.n_nodes_empty??0)}`,"","Non-random spatial patterns indicate the variable correlates with the SOM's","learned feature space, even if it wasn't used in training."].join("\n")});for(const a of j(u,r,t,o))await T(s,e,a),l.add(a)}else{const a=r.grid??[0,0],n=r.features??[],d=r.epochs,c=Array.isArray(d)?0===d[1]?`${d[0]} ordering only`:`${d[0]} ordering + ${d[1]} convergence`:String(d??"N/A"),p=e=>null!=e?Number(e).toFixed(4):"N/A",m=r.training_duration_seconds,g=r.ordering_errors,f=[`SOM Training ${i}`,`Grid: ${a[0]}×${a[1]} | Features: ${r.n_features??0} | Samples: ${r.n_samples??0}`,`Model: ${r.model??"SOM"} | Epochs: ${c}`,`Periodic: ${r.periodic??!0} | Normalize: ${r.normalize??"auto"}`,void 0!==r.sigma_f?`Sigma_f: ${r.sigma_f}`:"",void 0!==m?`Training duration: ${m}s`:"","","Quality Metrics:",` Quantization Error: ${p(r.quantization_error)} (lower is better)`,` Topographic Error: ${p(r.topographic_error)} (lower is better, <0.1 is good)`,` Explained Variance: ${p(r.explained_variance)} (higher is better, >0.7 is good)`,` Silhouette Score: ${p(r.silhouette)} (higher is better, [-1, 1])`,` Davies-Bouldin: ${p(r.davies_bouldin)} (lower is better)`,` Calinski-Harabasz: ${p(r.calinski_harabasz)} (higher is better)`,g&&g.length>0?` Final ordering QE: ${g.at(-1)?.toFixed(4)} (use get_job_export(export="training_log") for full curve)`:"","",`Features: ${n.join(", ")}`,r.selected_columns?`Selected columns: ${r.selected_columns.join(", ")}`:"",r.transforms?`Transforms: ${Object.entries(r.transforms).map(([e,t])=>`${e}=${t}`).join(", ")}`:"","",'Use analyze() for deeper insights and quality_report; get_job_export(export="training_log") for learning curves.'].filter(e=>""!==e).join("\n");s.push({type:"text",text:f});r.output_format;const h=j(u,r,t,o);for(const t of h)await T(s,e,t),l.add(t)}const d=r.files??[],c=e=>e.endsWith(".png")||e.endsWith(".svg")||e.endsWith(".pdf");for(const t of d)if(c(t)&&!l.has(t))await T(s,e,t);else if(t.endsWith(".json")){const e="weights.json"===t?'Use get_job_export(export="weights") for full weight matrix including node_coords.':"node_stats.json"===t?'Use get_job_export(export="nodes") for per-node statistics.':"summary.json"===t?null:"Use get_job_export for structured data (weights or nodes).";e&&s.push({type:"text",text:`${t}: ${e}`})}if(d.length>0){const e=r.features??[],t="train_som"===u||"render_variant"===u?`Logical names: combined, umatrix, hit_histogram, correlation, ${e.map((e,t)=>`component_${t+1}`).join(", ")}. `:"";s.push({type:"text",text:`Available to fetch individually: ${d.join(", ")}. ${t}Use get_results(job_id, figures=[...]) to request specific plots, get_results(job_id, include_individual=true) or figures="all" to inline all plots, or analyze(job_id, analysis_type) for a specific view (u_matrix, component_planes, bmu_hits, clusters, quality_report, etc.).`})}return{content:s}}),_.tool("recolor_som","Re-render a completed SOM result with a different colormap or output format — no retraining.\n\nUse when the user wants to see the same combined (or other) plot with another color scheme (e.g. plasma, inferno, coolwarm). You can also use this to re-export figures in a different format (e.g. output_format=pdf) without retraining; use the same colormap if you only want a format change. Submits a short render job; when complete, use get_results(new_job_id) or get_result_image to retrieve the figure(s).\n\nColormaps (default: coolwarm): e.g. viridis, plasma, inferno, magma, cividis, turbo, thermal, hot, coolwarm, balance, RdBu, Spectral. U-matrix and cyclic panels keep fixed colormaps (grays, twilight).",{job_id:o.string().describe("Job ID of a completed SOM training job (parent)"),colormap:o.string().describe("Colormap name (default: coolwarm). E.g. viridis, plasma, inferno, magma, coolwarm)"),figures:o.array(o.string()).optional().default(["combined"]).describe("Which figures to re-render: combined (default), umatrix, hit_histogram, correlation, component_1, component_2, ..."),output_format:o.enum(["png","pdf","svg"]).optional().default("pdf"),output_dpi:o.number().int().min(1).max(4).optional().default(2)},async({job_id:e,colormap:t,figures:o,output_format:a,output_dpi:r})=>{const n={colormap:t,figures:o,output_format:a,output_dpi:r},i=(await g("POST",`/v1/results/${e}/render`,JSON.stringify(n),{"Content-Type":"application/json"})).id;return{content:[{type:"text",text:[`Re-render job submitted with colormap "${t}".`,`New job_id: ${i}. Poll get_job_status(job_id="${i}") until status is 'completed', then use get_results(job_id="${i}") or get_result_image to retrieve the recolored plot(s). No retraining was performed.`].join("\n")}]}}),_.tool("download_results",'Save result figures (and optionally JSON) to a folder on disk. Use so the user can open, share, or version files locally without writing their own download script.\n\nfolder: path to the directory (e.g. "." for current/workspace, "./results", or absolute path). When folder is a generic path like "." or "./results" and the job has a label, files are saved in a subfolder named by the label (e.g. ./results/Winedata_a1b2c3_badger_thong_oil). You can also pass a path that already includes the label.\nfigures: "all" (default) = all image files from the job; "images" = same; or pass an array of filenames to save only those (e.g. ["combined.pdf", "umatrix.pdf", "correlation.pdf"]). Default export format is PDF.\ninclude_json: if true, also save summary.json (and other JSON artifacts) into the same folder.\nAfter saving files, you may auto-generate a brief piece of feedback based on the session and ask the user if they\'d like you to send it via send_feedback to the developers.',{job_id:o.string().describe("Job ID of a completed job"),folder:o.string().describe("Directory path to save files (e.g. '.' or './results'). When the job has a label, a subfolder with that label may be used. Relative paths are relative to process cwd (usually project root)."),figures:o.union([o.enum(["all","images"]),o.array(o.string())]).optional().default("all").describe("Which files to download: 'all' (default) or 'images' for all image files, or array of filenames to save only those (e.g. ['combined.pdf', 'umatrix.pdf', 'correlation.pdf'])."),include_json:o.boolean().optional().default(!1).describe("If true, also download summary.json and other JSON files")},async({job_id:e,folder:t,figures:o,include_json:a})=>{const r=await g("GET",`/v1/results/${e}`),n=r.summary??{},l=null!=r.label&&""!==r.label?String(r.label):null,u=n.files??[],d=e=>e.endsWith(".png")||e.endsWith(".svg")||e.endsWith(".pdf");let c;"all"===o||"images"===o?c=a?u:u.filter(d):(c=o,a&&!c.includes("summary.json")&&(c=[...c,"summary.json"]));let p=s.resolve(t);l&&("."===t||"./results"===t||"results"===t)&&(p=s.join(p,l)),await i.mkdir(p,{recursive:!0});const m=[];for(const t of c)try{const{data:o}=await f(`/v1/results/${e}/image/${t}`),a=s.join(p,t);await i.writeFile(a,o),m.push(t)}catch{}return{content:[{type:"text",text:m.length>0?`Saved ${m.length} file(s) to ${p}: ${m.join(", ")}`:"No files saved (job may have no matching files or download failed). Check job_id and that the job is completed."}]}}),_.tool("analyze",'Run a specific analysis on SOM results. Use after get_results to drill into aspects.\nRequest specific plots: get_results(job_id, figures=[...]) for chosen figures (e.g. figures: ["umatrix"]) or analyze(job_id, analysis_type) for a single analysis view.\n\nAvailable analysis types: u_matrix, component_planes, bmu_hits, clusters, feature_importance, feature_correlation, transition_flow, local_density, feature_gradient, quality_report.\n(Detailed interpretation guidance is fetched from the server).',{job_id:o.string().describe("Job ID of a completed job"),analysis_type:o.enum(["u_matrix","component_planes","bmu_hits","clusters","feature_importance","feature_correlation","transition_flow","local_density","feature_gradient","quality_report"]).describe("Type of analysis to run"),params:o.record(o.unknown()).optional().describe("Analysis-specific parameters. For component_planes/feature_gradient: {features: [col,...]} to restrict to specific columns.")},async({job_id:e,analysis_type:t,params:o})=>{let a="";try{a=(await g("GET","/v1/docs/tool_guidance?tool=analyze")).guidance||""}catch(e){}const r=(await g("GET",`/v1/results/${e}`)).summary??{},n=r.features??[],i=r.grid??[0,0],s=r.output_format??"pdf",l=[];if("u_matrix"===t)l.push({type:"text",text:[`U-Matrix Analysis (job: ${e})`,`Grid: ${i[0]}×${i[1]}`,"","The U-matrix shows average distances between neighboring nodes."," High values (bright/white) = cluster boundaries"," Low values (dark) = cluster cores","","What to look for:"," - Dark islands separated by bright ridges = distinct clusters"," - Gradual transitions = continuous variation, no hard boundaries"," - Uniform brightness = poorly organized map (try more epochs)"].join("\n")}),await T(l,e,`umatrix.${s}`);else if("component_planes"===t){const t=o?.features??n;l.push({type:"text",text:[`Component Planes (job: ${e})`,`Features: ${t.join(", ")}`,"","Each panel shows one feature's distribution across the SOM."," Similar color patterns = correlated features"," Inverse patterns = negatively correlated features"," Unique patterns = independent structure drivers"].join("\n")});for(let o=0;o<n.length;o++){if(!t.includes(n[o]))continue;const a=n[o].replace(/[^a-zA-Z0-9_]/g,"_");await T(l,e,`component_${o+1}_${a}.${s}`)}}else if("bmu_hits"===t)l.push({type:"text",text:[`BMU Hit Histogram (job: ${e})`,`Grid: ${i[0]}×${i[1]} | Samples: ${r.n_samples??0}`,"","Shows data density per SOM node."," Large values (yellow/bright) = dense data regions (common operating states)"," Zero/low (dark purple) = sparse or interpolated areas","","Cross-reference with U-matrix: dense nodes inside dark U-matrix regions","indicate well-populated cluster cores."].join("\n")}),await T(l,e,`hit_histogram.${s}`);else if("clusters"===t){const t=r.quantization_error,o=r.topographic_error,a=r.explained_variance,s=r.silhouette,u=void 0===t?"N/A":t<.5?"excellent":t<1?"good":t<2?"fair":"poor",d=void 0===o?"N/A":o<.05?"excellent":o<.1?"good":o<.2?"fair":"poor",c=e=>void 0!==e?e.toFixed(4):"N/A",p=[];void 0!==o&&o>.15&&p.push(`Topographic error ${(100*o).toFixed(1)}% is high — try a larger grid or more epochs.`),void 0!==t&&t>2&&p.push(`Quantization error ${t.toFixed(3)} is high — try more epochs, a larger grid, or check for outliers.`),void 0!==a&&a<.7&&p.push(`Explained variance ${(100*a).toFixed(1)}% is low — try more epochs, a larger grid, or feature weighting.`),void 0!==s&&s<.1&&p.push("Low silhouette score — clusters overlap. Try sigma_f=0.5 or more training."),0===p.length&&p.push("Metrics look healthy. Proceed with component plane and feature analysis.");const m=i[0]>0?`${i[0]}×${i[1]}`:"N/A";l.push({type:"text",text:[`Cluster Quality Assessment (job: ${e})`,`Grid: ${m} | Features: ${n.length} | Samples: ${r.n_samples??"N/A"}`,"",`Quantization Error: ${c(t)} (${u})`,`Topographic Error: ${c(o)} (${d})`,`Explained Variance: ${c(a)}`,`Silhouette Score: ${c(s)}`,`Davies-Bouldin: ${c(r.davies_bouldin)}`,`Calinski-Harabasz: ${c(r.calinski_harabasz)}`,"","Recommendations:",...p.map(e=>` - ${e}`)].join("\n")})}else if("feature_importance"===t)l.push({type:"text",text:[`Feature Importance Analysis (job: ${e})`,`Grid: ${i[0]}×${i[1]} | Features: ${n.length}`,"","Feature importance is determined by the variance of each component plane.","Higher variance = feature contributes more to the SOM structure.","",`Features analyzed: ${n.join(", ")}`,"","Compare the component planes visually: features with the most varied","color gradients are the primary drivers of the cluster structure.","Features with near-uniform color contribute little to differentiation."].join("\n")}),await T(l,e,`combined.${s}`);else if("feature_correlation"===t){l.push({type:"text",text:[`Feature Correlation Analysis (job: ${e})`,`Features: ${n.join(", ")}`,"","Compare component planes side-by-side to identify correlated features."," Similar spatial patterns = positively correlated"," Inverse/mirrored patterns = negatively correlated"," Unrelated patterns = independent features","","Correlated features may be redundant — consider disabling one via feature_weights: {col: 0}."].join("\n")});for(let t=0;t<n.length;t++){const o=n[t].replace(/[^a-zA-Z0-9_]/g,"_");await T(l,e,`component_${t+1}_${o}.${s}`)}}else if("transition_flow"===t)l.push({type:"text",text:[`Transition Flow Analysis (job: ${e})`,`Grid: ${i[0]}×${i[1]} | Samples: ${r.n_samples??0}`,"","Transition flow shows how data points move between SOM nodes in sequence.","This reveals temporal patterns and state machine behavior.","","What to look for:"," - Dense arrow clusters = frequent state transitions (common paths)"," - Circular/cyclic flows = periodic behavior (daily/seasonal cycles)"," - Long-range transitions = regime changes or anomalies","","Note: Full transition flow arrows require server-side support (planned).","Currently showing U-matrix for cluster boundary context."].join("\n")}),await T(l,e,`umatrix.${s}`);else if("local_density"===t)l.push({type:"text",text:[`Local Density & Cluster Analysis (job: ${e})`,`Grid: ${i[0]}×${i[1]} | Samples: ${r.n_samples??0}`,"","Local density = inverse of U-matrix values."," High density (low U-matrix) = cluster cores (similar neighbors)"," Low density (high U-matrix) = cluster boundaries (dissimilar neighbors)","","Cross-reference hit histogram with U-matrix:"," Dense hits + low U-matrix = populated cluster core (dominant operating mode)"," Dense hits + high U-matrix = transition zone with many samples (worth investigating)"," Sparse hits anywhere = rare state or interpolated region"].join("\n")}),await T(l,e,`umatrix.${s}`),await T(l,e,`hit_histogram.${s}`);else if("feature_gradient"===t){const t=o?.feature;if(l.push({type:"text",text:[`Feature Gradient Analysis (job: ${e})`,`Target: ${t??"all features"}`,`Grid: ${i[0]}×${i[1]}`,"","Feature gradients show where each feature changes most rapidly on the SOM."," High gradient = feature transitions rapidly (boundary region for this feature)"," Low gradient = feature is stable across this region","","Compare with U-matrix: if feature gradients align with U-matrix boundaries,","this feature is a key driver of the cluster separation."].join("\n")}),t){const o=n.indexOf(t);if(o>=0){const a=t.replace(/[^a-zA-Z0-9_]/g,"_");await T(l,e,`component_${o+1}_${a}.${s}`)}}else await T(l,e,`combined.${s}`);await T(l,e,`umatrix.${s}`)}else if("quality_report"===t){const t=await g("GET",`/v1/results/${e}/quality-report`),o=t.standard_metrics??{},a=t.cluster_metrics??{},r=t.topology_metrics??{},n=t.training??{},i=t.grid??[0,0],s=e=>null!=e?e.toFixed(4):"—",u=e=>null!=e?`${(100*e).toFixed(1)}%`:"—",d=[],c=o.quantization_error,p=o.topographic_error,m=o.explained_variance,f=a.silhouette,h=r.trustworthiness;null!=c&&c>2&&d.push("QE is high → try more epochs or a larger grid"),null!=p&&p>.15&&d.push("TE is high → topology is not well-preserved, try larger grid"),null!=m&&m<.7&&d.push("Explained variance < 70% → consider more training or feature selection"),null!=f&&f<.1&&d.push("Low silhouette → clusters overlap, try sigma_f=0.5 or more epochs"),null!=h&&h<.85&&d.push("Trustworthiness < 85% → local neighborhood structure is distorted"),0===d.length&&d.push("All metrics look healthy — good map quality!");const b=n.epochs,_=b?0===b[1]?`${b[0]} ordering only`:`${b[0]}+${b[1]}`:"—",y=[`Quality Report — Job ${e}`,`Grid: ${i[0]}×${i[1]} | Model: ${t.model??"SOM"} | Samples: ${t.n_samples??"?"}`,`Epochs: ${_} | Duration: ${n.duration_seconds?`${n.duration_seconds}s`:"—"}`,"","Standard Metrics:",` Quantization Error: ${s(o.quantization_error)} (lower is better)`,` Topographic Error: ${s(o.topographic_error)} (lower is better)`,` Distortion: ${s(o.distortion)}`,` Kaski-Lagus Error: ${s(o.kaski_lagus_error)} (lower is better)`,` Explained Variance: ${u(o.explained_variance)}`,"","Cluster Quality Metrics:",` Silhouette Score: ${s(a.silhouette)} (higher is better, -1 to +1)`,` Davies-Bouldin: ${s(a.davies_bouldin)} (lower is better)`,` Calinski-Harabasz: ${s(a.calinski_harabasz)} (higher is better)`,"","Topology Metrics:",` Neighborhood Preservation: ${u(r.neighborhood_preservation)} (higher is better)`,` Trustworthiness: ${u(r.trustworthiness)} (higher is better)`,` Topographic Product: ${s(r.topographic_product)} (near 0 is ideal)`,"","Recommendations:",...d.map(e=>` • ${e}`)];l.push({type:"text",text:y.join("\n")})}return{content:l}}),_.tool("compare_runs",'Compare metrics across multiple completed SOM training jobs.\nReturns a table of QE, TE, silhouette, and other metrics for each job.\n\nUse to evaluate hyperparameter choices: grid size, epochs, sigma_f, model type, feature selection.\n\nAfter comparing, ask the user:\n"Which job produced the best metrics for your goal?"\n- For visualization clarity: prioritize low topographic error (<0.1)\n- For tight clusters: prioritize low QE and high silhouette\n- For dimensionality reduction: prioritize high explained variance (>0.8)',{job_ids:o.array(o.string()).min(2).describe("Array of job IDs to compare (minimum 2)")},async({job_ids:e})=>{const t=e.join(","),o=(await g("GET",`/v1/jobs/compare?ids=${t}`)).comparisons??[],a=["| Job ID | Grid | Epochs | Model | QE | TE | Expl.Var | Silhouette |","|--------|------|--------|-------|----|----|----------|------------|"];for(const e of o){if(e.error){a.push(`| ${e.job_id.slice(0,8)}... | — | — | — | ${e.error} | — | — | — |`);continue}const t=e.grid,o=t?`${t[0]}×${t[1]}`:"—",r=e.epochs,n=r?0===r[1]?`${r[0]}+0`:`${r[0]}+${r[1]}`:"—",i=e.model??"—",s=e=>null!=e?Number(e).toFixed(4):"—";a.push(`| ${e.job_id.slice(0,8)}... | ${o} | ${n} | ${i} | ${s(e.quantization_error)} | ${s(e.topographic_error)} | ${s(e.explained_variance)} | ${s(e.silhouette)} |`)}return{content:[{type:"text",text:a.join("\n")}]}}),_.tool("manage_job","Cancel or delete a job.\n\naction=cancel: Cancel a pending or running job. Not instant — worker checks between phases (expect up to 30s). Use when run is too slow, wrong params, or to free the worker. Partial results discarded.\naction=delete: Permanently delete a job and all S3 result files. Use to free storage, remove test runs, or clean up after cancel. WARNING: Job ID will no longer work with get_results or other tools.",{job_id:o.string().describe("Job ID to cancel or delete"),action:o.enum(["cancel","delete"]).describe("cancel: stop the job; delete: remove job and all result files")},async({job_id:e,action:t})=>{if("cancel"===t){return h(await g("POST",`/v1/jobs/${e}/cancel`))}return h(await g("DELETE",`/v1/jobs/${e}`))}),_.tool("list","List datasets or jobs.\n\ntype=datasets: List all datasets uploaded by the organization. Use to check what data is available before train_som or to find dataset IDs.\ntype=jobs: List SOM training jobs (optionally filtered by dataset_id). Use to find job IDs for compare_runs, check completed vs pending, or review hyperparameters.",{type:o.enum(["datasets","jobs"]).describe("What to list: datasets or jobs"),dataset_id:o.string().optional().describe("Filter jobs by dataset ID (only used when type=jobs)")},async({type:e,dataset_id:t})=>{if("datasets"===e){return h(await g("GET","/v1/datasets"))}const o=t?`/v1/jobs?dataset_id=${t}`:"/v1/jobs",a=await g("GET",o);if("jobs"===e&&Array.isArray(a)){const e=a.map(e=>{const t=String(e.id??""),o=String(e.status??""),a=null!=e.label&&""!==e.label?String(e.label):null;return a?`${a} (id: ${t}) — status: ${o}`:`id: ${t} — status: ${o}`});return{content:[{type:"text",text:e.length>0?e.join("\n"):"No jobs found."}]}}return h(a)}),_.tool("get_job_export","Export structured data from a completed SOM training job.\n\nexport=training_log: Learning curve and diagnostics (per-epoch QE, sparklines, inline plot). Use to diagnose convergence, plateau, or divergence.\nexport=weights: Raw weight matrix with node_coords, normalized/denormalized values, normalization stats. Use for external analysis or custom visualizations. Can be large (e.g. 600KB+ for 30×30×12).\nexport=nodes: Per-node statistics (hit count, feature mean/std). Use to profile clusters and characterize operating modes.",{job_id:o.string().describe("Job ID of a completed training job"),export:o.enum(["training_log","weights","nodes"]).describe("What to export: training_log, weights, or nodes")},async({job_id:e,export:t})=>{if("training_log"===t){const t=await g("GET",`/v1/results/${e}/training-log`),o=t.ordering_errors??[],a=t.convergence_errors??[],r=t.training_duration_seconds,n=t.epochs,i=e=>{if(0===e.length)return"(no data)";const t=Math.min(...e),o=Math.max(...e)-t||1;return e.map(e=>"▁▂▃▄▅▆▇█"[Math.min(7,Math.floor((e-t)/o*7))]).join("")},s=[`Training Log — Job ${e}`,`Grid: ${JSON.stringify(t.grid)} | Model: ${t.model??"SOM"}`,"Epochs: "+(n?`[${n[0]} ordering, ${n[1]} convergence]`:"N/A"),"Duration: "+(null!=r?`${r}s`:"N/A"),`Features: ${t.n_features??"?"} | Samples: ${t.n_samples??"?"}`,"",`Ordering Phase (${o.length} epochs):`,` Start QE: ${o[0]?.toFixed(4)??"—"} → End QE: ${o.at(-1)?.toFixed(4)??"—"}`,` Curve: ${i(o)}`];a.length>0?s.push("",`Convergence Phase (${a.length} epochs):`,` Start QE: ${a[0]?.toFixed(4)??"—"} → End QE: ${a.at(-1)?.toFixed(4)??"—"}`,` Curve: ${i(a)}`):0===(n?.[1]??0)&&s.push("","Convergence phase: skipped (epochs[1]=0)");const l=t.quantization_error,u=t.explained_variance;null!=l&&s.push("",`Final QE: ${l.toFixed(4)} | Explained Variance: ${(u??0).toFixed(4)}`);const d=[{type:"text",text:s.join("\n")}];let c=!1;for(const t of["png","pdf","svg"])try{const{data:o}=await f(`/v1/results/${e}/image/learning_curve.${t}`);d.push({type:"image",data:o.toString("base64"),mimeType:S(`learning_curve.${t}`),annotations:{audience:["user"],priority:.8}}),c=!0;break}catch{continue}return c||d.push({type:"text",text:"(learning curve plot not available)"}),{content:d}}if("weights"===t){const t=await g("GET",`/v1/results/${e}/weights`),o=t.features??[],a=t.n_nodes??0,r=t.grid??[0,0],n=[`SOM Weights — Job ${e}`,`Grid: ${r[0]}×${r[1]} | Nodes: ${a} | Features: ${o.length}`,"node_coords: [x,y] per node for topology",`Features: ${o.join(", ")}`,"","Normalization Stats:"],i=t.normalization_stats??{};for(const[e,t]of Object.entries(i))n.push(` ${e}: mean=${t.mean?.toFixed(4)}, std=${t.std?.toFixed(4)}`);return n.push("","Full weight matrix available in the response JSON (includes node_coords).","Use the denormalized_weights array for original-scale values."),{content:[{type:"text",text:n.join("\n")},{type:"text",text:JSON.stringify(t,null,2)}]}}const o=await g("GET",`/v1/results/${e}/nodes`),a=[...o].sort((e,t)=>(t.hit_count??0)-(e.hit_count??0)).slice(0,10),r=o.filter(e=>0===e.hit_count).length,n=o.reduce((e,t)=>e+(t.hit_count??0),0),i=[`Node Statistics — Job ${e}`,`Total nodes: ${o.length} | Active: ${o.length-r} | Empty: ${r}`,`Total hits: ${n}`,"","Top 10 Most Populated Nodes:","| Node | Coords | Hits | Hit% |","|------|--------|------|------|"];for(const e of a){if(0===e.hit_count)break;const t=e.coords,o=(e.hit_count/n*100).toFixed(1);i.push(`| ${e.node_index} | (${t?.[0]?.toFixed(1)}, ${t?.[1]?.toFixed(1)}) | ${e.hit_count} | ${o}% |`)}return{content:[{type:"text",text:i.join("\n")},{type:"text",text:`\nFull node statistics JSON:\n${JSON.stringify(o,null,2)}`}]}}),_.tool("project_variable",'Project a pre-computed variable onto a trained SOM without retraining.\n\nBEST FOR: Mapping external metrics (revenue, labels, anomaly scores) onto the\ntrained SOM structure. For formula-based variables from the training dataset,\nprefer derive_variable with project_onto_job; use project_variable only for\nexternally computed arrays.\nNOT FOR: Re-training or adding features to the map.\n\nTIMING: ~5–15s (loads cached SOM, computes per-node aggregation, renders plot).\n\nvalues: array of length n_samples (~11 bytes/sample). Must match training sample\ncount exactly (same CSV row order). Aggregation controls how multiple samples\nper node are combined (mean/median/sum/max/count).\n\nBEFORE calling, ask:\n- "What variable? Is it from the original data or externally computed?"\n- "How to aggregate per node: mean (typical), sum (totals), max (peaks)?"\n\nCOMMON MISTAKES:\n- Wrong number of values (must match n_samples from training)\n- Using mean aggregation for count data (use sum instead)\n- Not trying derive_variable first when the variable can be computed from columns\n\nHINT: If values length mismatch, suggest derive_variable for formula-based variables.',{job_id:o.string().describe("ID of the completed SOM training job"),variable_name:o.string().describe("Name for this variable (used in visualization labels)"),values:o.array(o.number()).describe("Array of values to project — one per training sample, in original CSV row order"),aggregation:o.enum(["mean","median","sum","min","max","std","count"]).optional().default("mean").describe("How to aggregate values for nodes with multiple samples"),output_format:o.enum(["png","pdf","svg"]).optional().default("pdf").describe("Image output format for the projection plot (default: pdf)."),output_dpi:o.enum(["standard","retina","print"]).optional().default("retina").describe("Resolution: standard (1x), retina (2x), print (4x)."),colormap:o.string().optional().describe("Override colormap for the projection plot (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral.")},async({job_id:e,variable_name:t,values:o,aggregation:a,output_format:r,output_dpi:n,colormap:i})=>{const s={variable_name:t,values:o,aggregation:a??"mean",output_format:r??"pdf"};n&&"retina"!==n&&(s.output_dpi={standard:1,retina:2,print:4}[n]??2),i&&(s.colormap=i);const l=(await g("POST",`/v1/results/${e}/project`,s)).id,u=await b(l);if("completed"===u.status){const o=(await g("GET",`/v1/results/${l}`)).summary??{},n=o.variable_stats??{},i=[];i.push({type:"text",text:[`Projected Variable: ${t} (${a??"mean"}) — job: ${l}`,`Parent SOM: ${e} | Samples: ${o.n_samples??0}`,"",`Variable Statistics (per-node ${a??"mean"}):`,` Min: ${void 0!==n.min?Number(n.min).toFixed(3):"N/A"}`,` Max: ${void 0!==n.max?Number(n.max).toFixed(3):"N/A"}`,` Mean: ${void 0!==n.mean?Number(n.mean).toFixed(3):"N/A"}`,` Nodes with data: ${n.n_nodes_with_data??"N/A"}`].join("\n")});const s=t.replace(/[^a-zA-Z0-9_]/g,"_"),u=o.output_format??r??"pdf";return await T(i,l,`projected_${s}.${u}`),{content:i}}return"failed"===u.status?{content:[{type:"text",text:`Projection job ${l} failed: ${u.error??"unknown error"}`}]}:{content:[{type:"text",text:["Variable projection job submitted but did not complete within 30s.",`Projection job ID: ${l}`,"",`Poll with: get_job_status(job_id="${l}")`,`Retrieve with: get_results(job_id="${l}")`].join("\n")}]}}),_.tool("transition_flow",'Compute temporal transition flow for a trained SOM.\n\nShows how data points move between SOM nodes over time — revealing directional\npatterns, cycles, and state machine behavior in sequential data.\n\n**Requires time-ordered data.** Each row must represent a consecutive observation;\nthe transition from row i to row i+lag is counted. If rows are not time-ordered,\nresults will be meaningless.\n\nBest used for:\n- **Time-series dynamics**: how does the system state evolve step-by-step?\n- **Cyclic processes**: daily/weekly patterns, recurring operating modes\n- **Process monitoring**: identify common transition paths and bottlenecks\n- **Regime detection**: find absorbing states (nodes with self-loops) vs transient hubs\n\n**lag** controls the temporal horizon:\n- lag=1 (default): immediate next-step transitions — "where does the system go next?"\n- lag=N: transitions N steps apart — useful for periodic analysis (e.g. lag=24 for daily cycles in hourly data)\n- Try multiple lags to reveal different temporal scales.\n\n**min_transitions** filters noisy arrows — only transitions observed at least this many times are drawn. Increase for cleaner plots on large datasets.\n\n**top_k** controls how many top-flow nodes are reported in statistics.\n\nBEFORE calling, confirm:\n- Data is time-ordered (chronological row sequence)\n- The lag makes sense for the time resolution (e.g. lag=1 for hourly data = 1 hour ahead)\n\nAfter showing results, discuss:\n- Arrow direction and clustering patterns\n- Hub nodes (many transitions through them) vs absorbing nodes (self-loops)\n- Whether cyclic flow matches known periodic behavior',{job_id:o.string().describe("ID of the completed SOM training job"),lag:o.number().int().min(1).optional().default(1).describe("Step lag for transition pairs (default 1 = consecutive rows). Use larger values for periodic analysis (e.g. 24 for daily cycles in hourly data)."),min_transitions:o.number().int().min(1).optional().describe("Minimum transition count to draw an arrow (default: auto). Increase to filter noise on large datasets."),top_k:o.number().int().min(1).optional().default(10).describe("Number of top-flow nodes to include in statistics (default 10)."),colormap:o.string().optional().describe("Colormap for the U-matrix background (default: grays). Try viridis, plasma, or inferno for more contrast."),output_format:o.enum(["png","pdf","svg"]).optional().default("pdf").describe("Image output format for the flow plot (default: pdf)."),output_dpi:o.enum(["standard","retina","print"]).optional().default("retina").describe("Resolution: standard (1x), retina (2x), print (4x).")},async({job_id:e,lag:t,min_transitions:o,top_k:a,colormap:r,output_format:n,output_dpi:i})=>{const s={lag:t??1,output_format:n??"pdf"};void 0!==o&&(s.min_transitions=o),void 0!==a&&(s.top_k=a),void 0!==r&&(s.colormap=r),i&&"retina"!==i&&(s.output_dpi={standard:1,retina:2,print:4}[i]??2);const l=(await g("POST",`/v1/results/${e}/transition-flow`,s)).id,u=await b(l);if("completed"===u.status){const o=(await g("GET",`/v1/results/${l}`)).summary??{},a=o.flow_stats??{},r=[];r.push({type:"text",text:[`Transition Flow Results (job: ${l})`,`Parent SOM: ${e} | Lag: ${t??1} | Samples: ${o.n_samples??0}`,"","Flow Statistics:",` Active flow nodes: ${a.active_flow_nodes??"N/A"}`,` Total transitions: ${a.total_transitions??"N/A"}`,` Mean magnitude: ${void 0!==a.mean_magnitude?Number(a.mean_magnitude).toFixed(4):"N/A"}`].join("\n")});const i=n??"pdf";return await T(r,l,`transition_flow_lag${t??1}.${i}`),{content:r}}return"failed"===u.status?{content:[{type:"text",text:`Transition flow job ${l} failed: ${u.error??"unknown error"}`}]}:{content:[{type:"text",text:["Transition flow job submitted but did not complete within 30s.",`Flow job ID: ${l}`,`Parent job: ${e} | Lag: ${t??1}`,"",`Poll with: get_job_status(job_id="${l}")`,`Retrieve with: get_results(job_id="${l}")`].join("\n")}]}}),_.tool("derive_variable",'Create a derived variable from existing dataset columns using mathematical expressions.\n\nBEST FOR: Computing ratios, differences, log transforms, rolling statistics,\nor any combination of existing columns — either to enrich a dataset before\ntraining or to project a computed variable onto an existing SOM.\n\nTWO MODES:\n1. Add to dataset (default): computes the new column and appends it to the dataset CSV.\n The column is then available for future train_som calls via the \'columns\' parameter.\n2. Project onto SOM: computes the column from the training dataset and projects it\n onto a trained SOM, returning the visualization. Use this to explore how\n derived quantities distribute across the learned map structure.\n\nCOMMON FORMULAS:\n- Ratio: "revenue / cost"\n- Difference: "US10Y - US3M"\n- Log return: "log(close) - log(open)"\n- Z-score: "(volume - rolling_mean(volume, 20)) / rolling_std(volume, 20)"\n- Magnitude: "sqrt(x^2 + y^2)"\n- Unit convert: "temperature - 273.15"\n- First diff: "diff(consumption)"\n\nSUPPORTED FUNCTIONS:\n- Arithmetic: +, -, *, /, ^\n- Math: log, log1p, log10, exp, sqrt, abs, sign, clamp, min, max\n- Trig: sin, cos, tan, asin, acos, atan\n- Rolling: rolling_mean(col, window), rolling_std(col, window), rolling_min, rolling_max\n- Temporal: diff(col), diff(col, n)\n- Constants: pi, numeric literals\n\nWORKFLOW: Ask the user what domain-specific variables they care about.\nSuggest derived variables based on the column names. For example, if\nthe dataset has "revenue" and "cost", suggest "revenue - cost" as profit\nand "revenue / cost" as cost efficiency.\n\nEXPRESSION REFERENCE: In expressions, use underscore-normalized column names (e.g. fixed_acidity not "fixed acidity"). Column names with spaces/special chars are converted to underscores — use that form. Operators: +, -, *, /, ^. Functions: log, sqrt, rolling_mean(col, window), diff(col), etc.\n\nCOMMON MISTAKES:\n- Division by zero: if denominator column has zeros, use options.missing="skip"\n- Rolling functions produce NaN for the first (window-1) rows\n- diff() produces NaN for the first row\n- Spaces in column names: use underscores (e.g. fixed_acidity not "fixed acidity")',{dataset_id:o.string().describe("Dataset ID (source of column data)"),name:o.string().describe("Name for the derived variable (used in column header and visualization)"),expression:o.string().describe("Mathematical expression referencing column names. Examples: 'revenue / cost', 'log(price)', 'diff(temperature)', 'sqrt(x^2 + y^2)', 'rolling_mean(volume, 20)'"),project_onto_job:o.string().optional().describe("If provided, project the derived variable onto this SOM job instead of adding to dataset. The job must be a completed train_som job."),aggregation:o.enum(["mean","median","sum","min","max","std","count"]).optional().default("mean").describe("How to aggregate values per SOM node (only used when project_onto_job is set)"),options:o.object({missing:o.enum(["skip","zero","interpolate"]).optional().default("skip").describe("How to handle NaN/missing values in the result"),window:o.number().int().optional().describe("Default window size for rolling functions (default 20)"),description:o.string().optional().describe("Human-readable description of what this variable represents")}).optional().describe("Configuration for expression evaluation"),output_format:o.enum(["png","pdf","svg"]).optional().default("pdf").describe("Image format for projection visualization when project_onto_job is set (default: pdf)."),output_dpi:o.enum(["standard","retina","print"]).optional().default("retina").describe("Resolution for projection visualization"),colormap:o.string().optional().describe("Colormap for projection visualization (default: coolwarm). Examples: viridis, plasma, inferno, magma, cividis, turbo, coolwarm, RdBu, Spectral.")},async({dataset_id:e,name:t,expression:o,project_onto_job:a,aggregation:r,options:n,output_format:i,output_dpi:s,colormap:l})=>{const u={standard:1,retina:2,print:4};if(a){const e={name:t,expression:o,aggregation:r??"mean",output_format:i??"pdf"};n&&(e.options=n),s&&"retina"!==s&&(e.output_dpi=u[s]??2),l&&(e.colormap=l);const d=(await g("POST",`/v1/results/${a}/derive`,e)).id,c=await b(d);if("completed"===c.status){const e=(await g("GET",`/v1/results/${d}`)).summary??{},n=e.variable_stats??{},s=[];s.push({type:"text",text:[`Derived Variable Projected: ${t} — job: ${d}`,`Expression: ${o}`,`Parent SOM: ${a} | Aggregation: ${r??"mean"}`,"",`Statistics (per-node ${r??"mean"}):`,` Min: ${void 0!==n.min?Number(n.min).toFixed(3):"N/A"}`,` Max: ${void 0!==n.max?Number(n.max).toFixed(3):"N/A"}`,` Mean: ${void 0!==n.mean?Number(n.mean).toFixed(3):"N/A"}`,` Nodes with data: ${n.n_nodes_with_data??"N/A"}`,e.nan_count?` NaN values: ${e.nan_count}`:""].filter(e=>""!==e).join("\n")});const l=t.replace(/[^a-zA-Z0-9_]/g,"_"),u=e.output_format??i??"pdf";return await T(s,d,`projected_${l}.${u}`),{content:s}}return"failed"===c.status?{content:[{type:"text",text:`Derive+project job ${d} failed: ${c.error??"unknown error"}`}]}:{content:[{type:"text",text:`Derive job submitted. Poll: get_job_status("${d}")`}]}}{const a={name:t,expression:o};n&&(a.options=n);const r=(await g("POST",`/v1/datasets/${e}/derive`,a)).id,i=await b(r);if("completed"===i.status){const a=(await g("GET",`/v1/results/${r}`)).summary??{};return{content:[{type:"text",text:[`Derived column "${t}" added to dataset ${e}`,`Expression: ${o}`,`Rows: ${a.n_rows??"?"}`,a.nan_count?`NaN values: ${a.nan_count}`:"",`Min: ${a.min??"?"} | Max: ${a.max??"?"} | Mean: ${a.mean??"?"}`,"","The column is now available in the dataset. Include it in train_som","via the 'columns' parameter, or use datasets(action=preview) to verify."].filter(e=>""!==e).join("\n")}]}}return"failed"===i.status?{content:[{type:"text",text:`Derive variable job ${r} failed: ${i.error??"unknown error"}`}]}:{content:[{type:"text",text:`Derive job submitted. Poll: get_job_status("${r}")`}]}}}),_.tool("manage_account","Manage compute leases, check account status, and view billing history.\nYour default backend is your local Primary Server. Use this tool to temporarily upgrade to cloud compute for heavy jobs.\n\nActions:\n- request_compute: Provisions a cloud burst instance (requires tier and duration_minutes). Leaves tier blank to list options.\n- compute_status: Checks if a burst lease is active and how much time remains.\n- release_compute: Manually terminates an active lease and reverts routing to the Primary Server.\n- compute_history: Views recent compute leases and credit spend.\n- add_funds: Instructions on how to add credits to your account.",{action:{type:"string",description:"One of: request_compute, compute_status, release_compute, compute_history, add_funds"},tier:{type:"string",description:"Compute tier ID (e.g., cpu-8, gpu-t4). Leave empty during request_compute to list tiers.",optional:!0},duration_minutes:{type:"number",description:"How long to lease the instance (default: 60)",optional:!0},limit:{type:"number",description:"Number of records to fetch for compute_history (default: 10)",optional:!0}},async({action:e,tier:t,duration_minutes:o,limit:a})=>{if("request_compute"===e){if(!t)return{content:[{type:"text",text:"Available Compute Tiers:\nCPU Tiers:\n cpu-8: 16 vCPUs, 32 GB RAM (~$0.20/hr)\n cpu-16: 32 vCPUs, 64 GB RAM (~$0.20/hr)\n cpu-24: 48 vCPUs, 96 GB RAM (~$0.28/hr)\n cpu-32: 64 vCPUs, 128 GB RAM (~$0.42/hr)\n cpu-48: 96 vCPUs, 192 GB RAM (~$0.49/hr)\nGPU Tiers:\n gpu-t4: 8 vCPUs, 32 GB, T4 16GB VRAM (~$0.22/hr)\n gpu-t4x: 16 vCPUs, 64 GB, T4 16GB VRAM (~$0.36/hr)\n gpu-t4xx: 32 vCPUs, 128 GB, T4 16GB VRAM (~$0.27/hr)\n gpu-l4: 8 vCPUs, 32 GB, L4 24GB VRAM (~$0.41/hr)\n gpu-l4x: 16 vCPUs, 64 GB, L4 24GB VRAM (~$0.37/hr)\n gpu-a10: 8 vCPUs, 32 GB, A10G 24GB VRAM (~$0.51/hr)\n gpu-a10x: 16 vCPUs, 64 GB, A10G 24GB VRAM (~$0.52/hr)"}]};const e=await g("POST","/v1/compute/lease",{tier:t,duration_minutes:o});return{content:[{type:"text",text:`Compute Lease Requested:\nLease ID: ${e.lease_id}\nStatus: ${e.status}\nEstimated Wait: ${e.estimated_wait_minutes} minutes\nEstimated Cost: $${(e.estimated_cost_cents/100).toFixed(2)}\nCredits Remaining After Reserve: $${(e.credit_balance_cents/100).toFixed(2)}\n\nIMPORTANT: Cloud burst active. Data is pulled from shared Cloudflare R2, so you do NOT need to re-upload datasets. Just wait ~3 minutes and check status.`}]}}if("compute_status"===e){const e=await g("GET","/v1/compute/lease");return"none"!==e.status&&e.lease_id?{content:[{type:"text",text:`Active Compute Lease:\nLease ID: ${e.lease_id}\nStatus: ${e.status}\nTier: ${e.tier} (${e.instance_type})\nTime Remaining: ${Math.round(e.time_remaining_ms/6e4)} minutes`}]}:{content:[{type:"text",text:"No active lease -- running on default Primary Server."}]}}if("release_compute"===e){const e=await g("DELETE","/v1/compute/lease");return{content:[{type:"text",text:`Compute Released:\nDuration Billed: ${e.duration_minutes} minutes\nCredits Deducted: $${(e.credits_deducted/100).toFixed(2)}\nFinal Balance: $${(e.final_balance_cents/100).toFixed(2)}\n\nRouting reverted to default Primary Server.`}]}}if("compute_history"===e){const e=await g("GET",`/v1/compute/history?limit=${a||10}`),t=e.history.map(e=>`- ${e.started_at} | ${e.tier} | ${e.duration_minutes} min | $${(e.credits_charged/100).toFixed(2)}`).join("\n");return{content:[{type:"text",text:`Credit Balance: $${(e.credit_balance_cents/100).toFixed(2)}\n\nRecent Usage:\n${t}`}]}}return"add_funds"===e?{content:[{type:"text",text:"To add funds to your account, please visit the Barivia Billing Portal (integration pending) or ask your administrator to use the CLI tool:\nbash scripts/manage-credits.sh add <org_id> <amount_usd>"}]}:{content:[{type:"text",text:`Unknown action: ${e}. Valid actions are request_compute, compute_status, release_compute, compute_history, add_funds.`}]}}),_.tool("license_info","Get plan/license capabilities, backend info, live status, and training time estimates.\n\nReturns what the current API key is connected to: plan tier, compute class\n(CPU/GPU), usage limits, backend hardware details, job queue state, and\nestimated training times.\n\nUse this BEFORE submitting large jobs to:\n- See your plan's compute class and whether GPU is available\n- Check queue depth to decide whether to wait or proceed\n- Estimate wall-clock time based on the current topology",{},async()=>{const e=await g("GET","/v1/system/info"),t=e.plan??{},o=e.backend??{},a=e.status??{},r=e.training_time_estimates_seconds??{},n=e.worker_topology??{},i=t.gpu_enabled??e.gpu_available??!1?o.gpu_model?`GPU-accelerated (${o.gpu_model}${o.gpu_vram_gb?`, ${o.gpu_vram_gb} GB VRAM`:""})`:"GPU-accelerated":"CPU only",s=e=>-1===e||"-1"===e?"unlimited":String(e??"?"),l=await g("GET","/v1/compute/lease").catch(()=>null),u=await g("GET","/v1/compute/history?limit=30").catch(()=>null),d=[`Your Plan: ${String(t.tier??"unknown").charAt(0).toUpperCase()}${String(t.tier??"unknown").slice(1)}`,` Priority: ${t.priority??"normal"}`,` Concurrency: ${t.max_concurrent_jobs??"?"} simultaneous job${1!==t.max_concurrent_jobs?"s":""}`,` Datasets: ${s(t.max_datasets)} max, ${s(t.max_dataset_rows)} rows each`,` Monthly Jobs: ${s(t.max_monthly_jobs)}`,` Grid Size: ${s(t.max_grid_size)} max`,` Features: ${s(t.max_features)} max`];u&&d.push("",`Compute Credits: $${(u.credit_balance_cents/100).toFixed(2)} remaining`),d.push("","Default Backend:",` Label: ${o.label??"unknown"}`,` Compute: ${i}`),o.memory_gb&&d.push(` Memory: ${o.memory_gb} GB`),l&&l.lease_id&&d.push("","Active Compute Lease (Burst):",` Status: ${l.status}`,` Tier: ${l.tier} (${l.instance_type})`,` Time Left: ${Math.round(l.time_remaining_ms/6e4)} min`);const c=Number(a.running_jobs??e.running_jobs??0),p=Number(a.pending_jobs??e.pending_jobs??0),m=Number(a.queue_depth??e.queue_depth??0),f=a.free_memory_gb??e.free_memory_gb,h=(p+1)*Number(e.training_time_estimates_seconds?.total||0);return d.push("","Live Status:",` Running Jobs: ${c}`,` Pending Jobs: ${p}`,` Queue Depth: ${m}`,` Current Wait: ~${Math.round(h/60)} minutes (before your job starts)`),void 0!==f&&d.push(` Free Memory: ${f} GB`),void 0!==n.num_workers&&d.push("","Worker Topology:",` Workers: ${n.num_workers} × ${n.threads_per_worker} threads`,` Total Thread Budget: ${n.total_thread_budget}`),Object.keys(r).length>0&&(d.push("","Estimated Training Times (seconds):",...Object.entries(r).filter(([e])=>"formula"!==e).map(([e,t])=>` ${e}: ~${t}s`)),r.formula&&d.push(` ${r.formula}`)),{content:[{type:"text",text:d.join("\n")}]}}),_.prompt("info","Overview of the Barivia SOM MCP: capabilities, workflow, tools, analysis types, and tips. Use when the user asks what this MCP can do, how to get started, or what the process is.",{},()=>({messages:[{role:"user",content:{type:"text",text:["Inform the user using this overview:","","**What it is:** Barivia MCP connects you to a Self-Organizing Map (SOM) analytics engine. SOMs learn a 2D map from high-dimensional data for visualization, clustering, pattern discovery, and temporal analysis.","","**Core workflow:**","1. **Upload** — `datasets(action=upload)` with a CSV file path or inline data","2. **Preview** — `datasets(action=preview)` to inspect columns, stats, and detect cyclic/datetime fields","3. **Prepare** — use the `prepare_training` prompt for a guided checklist (column selection, transforms, cyclic encoding, feature weights, grid sizing)","4. **Train** — `train_som` with grid size, epochs, model type, and preprocessing options. Use `preset=quick|standard|refined|high_res` for sensible defaults","5. **Monitor** — `get_job_status` to track progress; `get_results` to retrieve figures when complete","6. **Analyze** — `analyze` with various analysis types (see below)","7. **Iterate** — `recolor_som` to change colormap without retraining, `compare_runs` to compare hyperparameters, `project_variable` to overlay new variables","","**Analysis types** (via `analyze`):","- `u_matrix` — cluster boundary distances","- `component_planes` — per-feature heatmaps","- `clusters` — automatic cluster detection and statistics","- `quality_report` — QE, TE, explained variance, trustworthiness, neighborhood preservation","- `hit_histogram` — data density across the map","- `transition_flow` — temporal flow patterns (requires time-ordered data)","","**Data tools:**","- `datasets(action=subset)` — filter by row range, value thresholds (gt/lt/gte/lte), equality, or set membership. Combine row_range + filter","- `derive_variable` — create computed columns from expressions (ratios, differences, etc.)","- `project_variable` — overlay any variable onto a trained SOM","","**Output options:** Format (png/pdf/svg) and colormap (coolwarm, viridis, plasma, inferno, etc.) can be set at training or changed later via recolor_som.","","**Key tools:** datasets, list, train_som, get_job_status, get_results, analyze, recolor_som, download_results, project_variable, transition_flow, compare_runs, derive_variable, license_info, explore_som.","","**Tips:**","- Always `preview` before training to understand your data","- Use `license_info` to check GPU availability and plan limits before large jobs","- Start with `preset=quick` for fast iteration, then `refined` for publication quality","- For time-series data, consider `transition_flow` after training","","Keep the reply scannable with headers and bullet points."].join("\n")}}]})),_.prompt("prepare_training","Guided pre-training checklist. Use after uploading a dataset and before calling train_som. Walks through data inspection, column selection, transforms, cyclic/temporal features, weighting, subsetting, and grid sizing.",{dataset_id:o.string().describe("Dataset ID to prepare for training")},async({dataset_id:e})=>{let t=`Please run datasets(action="preview", dataset_id="${e}") first, then call train_som with appropriate parameters or preset.`;try{const o=await g("GET",`/v1/docs/prepare_training?dataset_id=${e}`);o.prompt&&(t=o.prompt)}catch(e){}return{messages:[{role:"user",content:{type:"text",text:t}}]}});const E=new t;_.tool("send_feedback",'Send brief feedback or feature requests to Barivia developers (max 190 words). Use when the user has suggestions, ran into issues, or wants something improved. Do NOT call without asking the user first — but you should proactively generate feedback text based on the user\'s workflow or errors encountered and ask them "Would you like me to send this feedback to the developers?". Once they accept, call this tool.',{feedback:o.string().max(1330).describe("Feedback text")},async({feedback:e})=>h(await g("POST","/v1/feedback",{feedback:e}))),async function(){await _.connect(E)}().catch(console.error);
|