npm - @argosvix/mcp-server - Versions diffs - 0.20.0-alpha.1 → 0.22.0-alpha.1 - Mend

@argosvix/mcp-server 0.20.0-alpha.1 → 0.22.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/tools.js CHANGED Viewed

@@ -130,6 +130,24 @@ const TOOL_ARG_ALLOWLIST = {
     // は scores 同梱、 run は POST (= Pro+ で startEvalRun に渡す軸)。
     list_eval_runs: ["limit"],
     get_eval_run: ["runId"],
+    compare_eval_runs: ["baselineRunId", "candidateRunId"],
+    bulk_delete_calls: ["callIds", "dryRun"],
+    export_calls: ["startTime", "endTime", "provider", "model", "limit"],
+    list_saved_views: [],
+    create_saved_view: ["name", "filter"],
+    delete_saved_view: ["id"],
+    list_audit_log: ["limit", "eventType", "targetKind", "actorUserId", "from", "to", "cursor"],
+    aggregate_calls: ["startTime", "endTime", "groupBy", "metric", "provider", "tagKey"],
+    get_percentiles: ["startTime", "endTime", "provider", "model", "metric", "groupBy"],
+    list_projects: [],
+    create_project: ["name", "slug"],
+    rename_project: ["projectId", "name", "slug"],
+    delete_project: ["projectId"],
+    // 2026-06-05 axis 4 Tier 1 = get_account_health (= AI agent が 1 call で
+    // 自社 LLM infra の健康状態 サマリを取得)。 既存 4 endpoint (= aggregate /
+    // percentiles / llm-budget / audit) を 並列 fetch + 1 narrative response に
+    // 圧縮。 window 軸のみ 受け、 backend 新 endpoint 不要 (= 純 read aggregator)。
+    get_account_health: ["window"],
     // 2026-06-02 Codex round 2 🔴 fix = idempotencyKey 必須 path (= AI agent が
     // retry した時に backend で dedup)、 client が opaque string 64 char で carry。
     run_eval: ["name", "recentCount", "label", "promptRegistryId", "idempotencyKey"],
@@ -1205,6 +1223,379 @@ export const tools = [
             },
         },
     },
+    {
+        name: "get_percentiles",
+        description: "calls の percentile metrics を 取得 (= POST /v1/query/percentiles)。 metric = 'latency' (= レイテンシ ms) or 'cost' (= USD)、 全期間 1 数値 or groupBy='day'/'hour'/'minute' で 時系列 series。 " +
+            "AI agent が 「先週の p95 latency 推移を 日次で」 narrative で carry。 D1 SQLite (= percentile_cont 不在) で window function 経由 nearest-rank 法 計算。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {
+                startTime: {
+                    type: "string",
+                    description: "範囲開始 ISO timestamp (= UTC、 省略 = 全期間)",
+                },
+                endTime: {
+                    type: "string",
+                    description: "範囲終了 ISO timestamp",
+                },
+                provider: { type: "string", description: "provider filter" },
+                model: { type: "string", description: "model filter" },
+                metric: {
+                    type: "string",
+                    description: "metric 種別、 default = 'latency'",
+                    enum: ["latency", "cost"],
+                    default: "latency",
+                },
+                groupBy: {
+                    type: "string",
+                    description: "時系列 分割 軸 (省略 = 全期間 1 数値、 'day' = 日次、 'hour' = 時間別、 'minute' = 分別)",
+                    enum: ["day", "hour", "minute"],
+                },
+            },
+        },
+    },
+    {
+        name: "list_projects",
+        description: "自 account の active projects を 一覧取得 (= GET /v1/projects、 archived 除外)。 " +
+            "v1.5 project switcher narrative で、 AI agent が 「dev / staging / prod 環境別 観測」 carry。 Pro 5 件 / Team unlimited、 Free は default のみ。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {},
+        },
+    },
+    {
+        name: "create_project",
+        description: "新規 project を 作成 (= POST /v1/projects)。 name = 表示名、 slug = URL-safe 短い識別子 (= /^[a-z][a-z0-9-]{0,31}$/)。 " +
+            "Pro 5 件上限、 Team unlimited、 Free は 不可 (= 403)。 mutation = session 認証時 Origin/Referer 強制 (= dashboard 経由前提)。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["name", "slug"],
+            properties: {
+                name: {
+                    type: "string",
+                    description: "project 表示名 (1-64 文字)",
+                    minLength: 1,
+                    maxLength: 64,
+                },
+                slug: {
+                    type: "string",
+                    description: "URL-safe 短い識別子 (= /^[a-z][a-z0-9-]{0,31}$/、 32 字以内、 先頭小文字、 hyphens 可)",
+                    pattern: "^[a-z][a-z0-9-]{0,31}$",
+                },
+            },
+        },
+    },
+    {
+        name: "rename_project",
+        description: "既存 project の name / slug を 更新 (= PATCH /v1/projects/:id)。 name と slug は どちらか一方 / 両方指定可。 slug は URL-safe 制約 (= /^[a-z][a-z0-9-]{0,31}$/)。 default project の rename は 許可。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["projectId"],
+            properties: {
+                projectId: {
+                    type: "string",
+                    description: "対象 project の id (= list_projects で 取得した UUID)",
+                    minLength: 1,
+                    maxLength: 64,
+                },
+                name: {
+                    type: "string",
+                    description: "新しい表示名 (省略時 不変)",
+                    minLength: 1,
+                    maxLength: 64,
+                },
+                slug: {
+                    type: "string",
+                    description: "新しい slug (省略時 不変、 /^[a-z][a-z0-9-]{0,31}$/)",
+                    pattern: "^[a-z][a-z0-9-]{0,31}$",
+                },
+            },
+        },
+    },
+    {
+        name: "delete_project",
+        description: "project を soft delete (= DELETE /v1/projects/:id、 archived_at 設定で 論理削除)。 default project は 削除不可 (= accounts.default_project_id 参照 整合 のため 400)。 " +
+            "archived 後 calls / alerts は そのまま (= 過去観測は keep)、 新規 record は 別 project に carry する narrative。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["projectId"],
+            properties: {
+                projectId: {
+                    type: "string",
+                    description: "削除対象 project の id (= list_projects 経由)",
+                    minLength: 1,
+                    maxLength: 64,
+                },
+            },
+        },
+    },
+    {
+        name: "get_account_health",
+        description: "自社 LLM infra の健康状態 サマリを 1 call で取得 (= axis 4 Tier 1 = 自律 AI ops)。 既存 4 endpoint (= aggregate_calls / get_percentiles / get_llm_budget / list_audit_log) を 並列 fetch して 1 narrative response に圧縮。 " +
+            "返却 = { window, totals: {calls, costUsd, errorRate}, latency: {p50, p95, p99}, budget: {used, limit, percentUsed}, recentEvents: 件数, summary: 'ok' | 'warn' | 'critical' } 形式。 " +
+            "AI agent narrative = 「今うちの LLM infra どう？」 を 1 prompt で carry。 backend 新 endpoint 不要 = 純 read aggregator。 個別 endpoint 失敗は partial で 返す (= 1 軸 timeout が summary を 止めない)。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {
+                window: {
+                    type: "string",
+                    description: "観測窓 (= '1h' / '24h' / '7d'、 default '24h')",
+                    enum: ["1h", "24h", "7d"],
+                    default: "24h",
+                },
+            },
+        },
+    },
+    {
+        name: "aggregate_calls",
+        description: "calls の 集計 cube を 取得 (= POST /v1/query/aggregate)。 groupBy (= provider / model / day / hour / minute / tag) × metric (= cost / latency / tokens / count / error_rate) で 1 call で AI agent が 「今月の cost を model 別 に集計」 narrative carry。 " +
+            "tag mode は tagKey 必須 (= alphanumeric + _ - のみ、 例: 'env' / 'feature')。 hour mode は 168h / minute mode は 60min まで (= 超過 400)。 cost = SUM(cost_usd) / latency = AVG(latency_ms) / tokens = SUM(total_tokens) / count = COUNT(*) / error_rate = error ÷ total。 " +
+            "返却 = { groups: [{key, value, count}], total: {value, count} } 形式。 軸 1 操作系 + 自律 AI ops の 分析 narrative の coverage 拡張。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {
+                startTime: {
+                    type: "string",
+                    description: "範囲開始 ISO timestamp (= UTC、 省略 = 全期間)",
+                },
+                endTime: {
+                    type: "string",
+                    description: "範囲終了 ISO timestamp (= UTC、 省略 = 現在)",
+                },
+                groupBy: {
+                    type: "string",
+                    description: "集約軸 (= 'provider' / 'model' / 'day' / 'hour' / 'minute' / 'tag')、 default = 'provider'。 hour は 168h / minute は 60min まで",
+                    enum: ["provider", "model", "day", "hour", "minute", "tag"],
+                    default: "provider",
+                },
+                metric: {
+                    type: "string",
+                    description: "metric 種別 (= 'cost' / 'latency' / 'tokens' / 'count' / 'error_rate')、 default = 'cost'",
+                    enum: ["cost", "latency", "tokens", "count", "error_rate"],
+                    default: "cost",
+                },
+                provider: {
+                    type: "string",
+                    description: "provider filter (= 'openai' / 'anthropic' 等)、 省略 = 全 provider",
+                },
+                tagKey: {
+                    type: "string",
+                    description: "groupBy='tag' の時必須。 tags JSON 内 key 名 (alphanumeric + _- のみ、 1-64 文字)",
+                    pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$",
+                },
+            },
+        },
+    },
+    {
+        name: "list_audit_log",
+        description: "Phase B audit log を 一覧 取得 (= GET /v1/audit-log)。 自 account 限定、 admin role のみ許可 (= viewer/member は 403)。 " +
+            "AI agent が 「最近の招待 / API key revoke / プロジェクト変更」 等の 操作履歴 を 自律参照する narrative (= axis 4 自律 AI ops)。 " +
+            "filter = eventType (= 'invitation.created' / 'api_key.revoked' 等) / targetKind / actorUserId / from / to。 " +
+            "cursor pagination 対応 (= nextCursor 形式 = 'created_at|id')、 max limit 200。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {
+                limit: {
+                    type: "integer",
+                    description: "返却件数 (1-200、 デフォルト 50)",
+                    minimum: 1,
+                    maximum: 200,
+                    default: 50,
+                },
+                eventType: {
+                    type: "string",
+                    description: "event_type 完全一致 filter (= 'invitation.created' / 'api_key.revoked' / 'membership.removed' 等)",
+                },
+                targetKind: {
+                    type: "string",
+                    description: "target_kind filter (= 'invitation' / 'api_key' / 'membership' 等)",
+                },
+                actorUserId: {
+                    type: "string",
+                    description: "actor_user_id filter (= 特定 user の操作のみ抽出)",
+                },
+                from: {
+                    type: "string",
+                    description: "範囲開始 ISO timestamp (= UTC)",
+                },
+                to: {
+                    type: "string",
+                    description: "範囲終了 ISO timestamp (= UTC)",
+                },
+                cursor: {
+                    type: "string",
+                    description: "ページ送り cursor (= 前 response の nextCursor を そのまま渡す、 'created_at|id' 形式)",
+                },
+            },
+        },
+    },
+    {
+        name: "list_saved_views",
+        description: "保存済 saved views 一覧を 取得 (= GET /v1/saved-views)。 saved view = /calls page で よく使う filter (startDate/endDate/provider/model/limit) の組み合わせを 名前付きで 保存したもの。 " +
+            "AI agent は 「いつもの先週の OpenAI filter で 呼び出し見せて」 narrative で carry できる。 account 単位、 max 20 件。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {},
+        },
+    },
+    {
+        name: "create_saved_view",
+        description: "新規 saved view を 作成 / 同名なら上書き (= POST /v1/saved-views)。 name は account 内一意。 filter は SavedViewFilter shape (= startDate / endDate / provider / model / limit / preset / sortBy? / sortOrder?)。 " +
+            "AI agent が 自動で よく使う filter を 名前付き保存 narrative。 例: 「直近 7 日 GPT-4 のみ」 view を 作って 後で呼ぶ。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["name", "filter"],
+            properties: {
+                name: {
+                    type: "string",
+                    description: "saved view の名前 (1-80 文字、 改行不可)。 同名で 既存なら 上書き",
+                    minLength: 1,
+                    maxLength: 80,
+                },
+                filter: {
+                    type: "object",
+                    description: "filter shape = startDate (ISO) + endDate (ISO) + provider (空可) + model (空可) + limit (number) + preset (string|null) + sortBy? + sortOrder?",
+                    required: ["startDate", "endDate", "provider", "model", "limit", "preset"],
+                    properties: {
+                        startDate: { type: "string", description: "ISO timestamp (= 範囲開始)" },
+                        endDate: { type: "string", description: "ISO timestamp (= 範囲終了)" },
+                        provider: {
+                            type: "string",
+                            description: "プロバイダー (= 'openai' / 'anthropic' / 'google' 等)、 空 = 全 provider",
+                        },
+                        model: {
+                            type: "string",
+                            description: "モデル名、 空 = 全 model",
+                        },
+                        limit: {
+                            type: "integer",
+                            description: "返却件数 cap",
+                            minimum: 1,
+                        },
+                        preset: {
+                            type: ["string", "null"],
+                            description: "preset 識別子 (= dashboard 既定 filter、 null 可)",
+                        },
+                        sortBy: { type: "string", description: "ソート対象 column" },
+                        sortOrder: {
+                            type: "string",
+                            description: "ソート方向 ('asc' / 'desc')",
+                            enum: ["asc", "desc"],
+                        },
+                    },
+                },
+            },
+        },
+    },
+    {
+        name: "delete_saved_view",
+        description: "指定 id の saved view を 削除 (= DELETE /v1/saved-views/:id)。 自 account 限定。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["id"],
+            properties: {
+                id: {
+                    type: "string",
+                    description: "削除対象の saved view id (= UUID)",
+                    minLength: 1,
+                    maxLength: 64,
+                },
+            },
+        },
+    },
+    {
+        name: "export_calls",
+        description: "calls の large batch export (= POST /v1/query/export)。 query_calls より 高 limit (= plan 別 max records: Free 1000 / Pro 50000、 config/plans.ts)、 全 plan で利用可。 " +
+            "filter 軸 = startTime / endTime / provider / model + limit。 AI agent が 「先月分の全 GPT-4 呼び出しを取り出して傾向分析して」 narrative で 1 call carry。 " +
+            "結果 format は query_calls と 同 JSON (= AI が そのまま CSV / 統計に carry 可能)。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {
+                startTime: {
+                    type: "string",
+                    description: "範囲 開始 ISO timestamp (= UTC、 省略 = 全期間)",
+                },
+                endTime: {
+                    type: "string",
+                    description: "範囲 終了 ISO timestamp (= UTC、 省略 = 現在)",
+                },
+                provider: {
+                    type: "string",
+                    description: "プロバイダー fix (= openai / anthropic / google / azure / cohere)",
+                },
+                model: {
+                    type: "string",
+                    description: "model 名 fix (= 部分一致なし、 完全一致 例: 'gpt-4o-mini')",
+                },
+                limit: {
+                    type: "integer",
+                    description: "返却件数 cap。 plan 別 max 内なら そのまま、 超過は plan max に clamp",
+                    minimum: 1,
+                },
+            },
+        },
+    },
+    {
+        name: "bulk_delete_calls",
+        description: "指定 call id 一覧 (= max 100) を 自 account 限定で 一括削除する (= POST /v1/calls/bulk-delete)。 " +
+            "AI agent が dogfood / dev test で 蓄積した garbage call の cleanup narrative に carry (= 軸 1 操作系)。 " +
+            "dryRun=true で 削除前に matched 件数を 事前確認可能。 削除は 1 SQL atomic、 audit log に bulk_deleted event を 記録。 " +
+            "FK 制約上 関連 traces / annotations / scores は ON DELETE 経由で 連鎖削除 (= 既存 schema narrative)。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["callIds"],
+            properties: {
+                callIds: {
+                    type: "array",
+                    description: "削除対象の call id 配列 (1-100 件、 各 1-128 文字)",
+                    items: { type: "string", minLength: 1, maxLength: 128 },
+                    minItems: 1,
+                    maxItems: 100,
+                },
+                dryRun: {
+                    type: "boolean",
+                    description: "true で 削除せず matched 件数のみ 返却 (= 確認 UX)",
+                    default: false,
+                },
+            },
+        },
+    },
+    {
+        name: "compare_eval_runs",
+        description: "2 つの eval run (baseline / candidate) を 比較して per-criterion mean score delta + failed count delta + verdict を 返す (= GET /v1/eval-runs/compare)。 " +
+            "AI agent は 「baseline と 比べて candidate は どう 変わったか」 を 1 call で 把握でき、 prompt 改善 効果や regress 検出 narrative に carry できる (= axis 1 操作系 + axis 4 自律 AI ops 寄与)。 " +
+            "verdict = improved / regressed / mixed / unchanged。 failed count は score <= 2 を 「failed」 で 算出。 同 account 限定。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["baselineRunId", "candidateRunId"],
+            properties: {
+                baselineRunId: {
+                    type: "integer",
+                    description: "比較元 run の id (= list_eval_runs.runs[].id)",
+                    minimum: 1,
+                },
+                candidateRunId: {
+                    type: "integer",
+                    description: "比較先 run の id (= 同上)、 baseline と 異なる必要あり",
+                    minimum: 1,
+                },
+            },
+        },
+    },
     {
         name: "run_eval",
         description: "新規 eval run を 即時実行する (= POST /v1/eval-runs)。 直近 N 件の calls × 既定 5 criteria (+ 自作 custom criteria max 8) で gpt-4o-mini に採点させる。 " +
@@ -1605,6 +1996,287 @@ export async function dispatchTool(input) {
                 }
                 return await callApi(apiBase, `/v1/eval-runs/${encodeURIComponent(runId)}`, {}, apiKey);
             }
+            case "get_percentiles": {
+                const body = {};
+                if (typeof safeArgs["startTime"] === "string")
+                    body["startTime"] = safeArgs["startTime"];
+                if (typeof safeArgs["endTime"] === "string")
+                    body["endTime"] = safeArgs["endTime"];
+                if (typeof safeArgs["provider"] === "string")
+                    body["provider"] = safeArgs["provider"];
+                if (typeof safeArgs["model"] === "string")
+                    body["model"] = safeArgs["model"];
+                if (typeof safeArgs["metric"] === "string")
+                    body["metric"] = safeArgs["metric"];
+                if (typeof safeArgs["groupBy"] === "string")
+                    body["groupBy"] = safeArgs["groupBy"];
+                return await callApi(apiBase, "/v1/query/percentiles", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: body,
+                });
+            }
+            case "list_projects": {
+                return await callApi(apiBase, "/v1/projects", {}, apiKey);
+            }
+            case "create_project": {
+                const name = safeArgs["name"];
+                const slug = safeArgs["slug"];
+                if (typeof name !== "string" || name.length === 0 || name.length > 64) {
+                    return errorResponse("name required (1-64 chars)");
+                }
+                if (typeof slug !== "string" || !/^[a-z][a-z0-9-]{0,31}$/.test(slug)) {
+                    return errorResponse("slug required (lowercase alphanumeric + hyphens, max 32 chars, starts with letter)");
+                }
+                return await callApi(apiBase, "/v1/projects", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: { name, slug },
+                });
+            }
+            case "rename_project": {
+                const projectId = safeArgs["projectId"];
+                if (typeof projectId !== "string" || projectId.length === 0 || projectId.length > 64) {
+                    return errorResponse("projectId required (1-64 chars)");
+                }
+                const body = {};
+                if (typeof safeArgs["name"] === "string")
+                    body["name"] = safeArgs["name"];
+                if (typeof safeArgs["slug"] === "string") {
+                    if (!/^[a-z][a-z0-9-]{0,31}$/.test(safeArgs["slug"])) {
+                        return errorResponse("slug must match /^[a-z][a-z0-9-]{0,31}$/");
+                    }
+                    body["slug"] = safeArgs["slug"];
+                }
+                if (Object.keys(body).length === 0) {
+                    return errorResponse("at least one of name / slug required");
+                }
+                return await callApi(apiBase, `/v1/projects/${encodeURIComponent(projectId)}`, {}, apiKey, { method: "PATCH", jsonBody: body });
+            }
+            case "delete_project": {
+                const projectId = safeArgs["projectId"];
+                if (typeof projectId !== "string" || projectId.length === 0 || projectId.length > 64) {
+                    return errorResponse("projectId required (1-64 chars)");
+                }
+                return await callApi(apiBase, `/v1/projects/${encodeURIComponent(projectId)}`, {}, apiKey, { method: "DELETE" });
+            }
+            case "get_account_health": {
+                // 2026-06-05 axis 4 Tier 1 = 自社 LLM infra 健康状態 サマリ。 既存
+                // 4 endpoint を 並列 fetch + 1 narrative 圧縮。 個別 fail は partial
+                // で carry (= 1 軸 timeout で summary を 止めない設計)。
+                const winRaw = typeof safeArgs["window"] === "string" ? safeArgs["window"] : "24h";
+                const window = winRaw === "1h" || winRaw === "24h" || winRaw === "7d" ? winRaw : "24h";
+                const now = Date.now();
+                const windowMs = window === "1h"
+                    ? 60 * 60 * 1000
+                    : window === "24h"
+                        ? 24 * 60 * 60 * 1000
+                        : 7 * 24 * 60 * 60 * 1000;
+                const startTime = new Date(now - windowMs).toISOString();
+                const endTime = new Date(now).toISOString();
+                const aggregateBody = { startTime, endTime, groupBy: "provider", metric: "count" };
+                const errorBody = { startTime, endTime, groupBy: "provider", metric: "error_rate" };
+                const costBody = { startTime, endTime, groupBy: "provider", metric: "cost" };
+                const percentileBody = { startTime, endTime, metric: "latency" };
+                const [countsRes, errorRes, costRes, percentilesRes, budgetRes, auditRes,] = await Promise.allSettled([
+                    callApi(apiBase, "/v1/query/aggregate", {}, apiKey, {
+                        method: "POST",
+                        jsonBody: aggregateBody,
+                    }),
+                    callApi(apiBase, "/v1/query/aggregate", {}, apiKey, {
+                        method: "POST",
+                        jsonBody: errorBody,
+                    }),
+                    callApi(apiBase, "/v1/query/aggregate", {}, apiKey, {
+                        method: "POST",
+                        jsonBody: costBody,
+                    }),
+                    callApi(apiBase, "/v1/query/percentiles", {}, apiKey, {
+                        method: "POST",
+                        jsonBody: percentileBody,
+                    }),
+                    callApi(apiBase, "/v1/account/llm-feature-budget", {}, apiKey),
+                    callApi(apiBase, "/v1/audit-log", { limit: 10 }, apiKey),
+                ]);
+                const extractJson = (r) => {
+                    if (r.status !== "fulfilled" || r.value.isError)
+                        return null;
+                    const txt = r.value.content[0]?.text ?? "";
+                    try {
+                        return JSON.parse(txt);
+                    }
+                    catch {
+                        return null;
+                    }
+                };
+                const counts = extractJson(countsRes);
+                const errors = extractJson(errorRes);
+                const cost = extractJson(costRes);
+                const percentiles = extractJson(percentilesRes);
+                const budget = extractJson(budgetRes);
+                const audit = extractJson(auditRes);
+                const totalCalls = counts?.total?.value ?? 0;
+                const errorRate = errors?.total?.value !== undefined && errors.total.value !== null
+                    ? errors.total.value
+                    : null;
+                const costUsd = cost?.total?.value ?? 0;
+                const p50 = percentiles?.p50 ?? null;
+                const p95 = percentiles?.p95 ?? null;
+                const p99 = percentiles?.p99 ?? null;
+                const budgetUsed = budget?.usedUsd ?? null;
+                const budgetLimit = budget?.monthlyLimitUsd ?? null;
+                const budgetPercent = budgetUsed !== null && budgetLimit !== null && budgetLimit > 0
+                    ? Math.round((budgetUsed / budgetLimit) * 1000) / 10
+                    : null;
+                const recentEvents = Array.isArray(audit?.events) ? audit.events.length : 0;
+                let summary = "ok";
+                if ((errorRate !== null && errorRate >= 0.1) ||
+                    (budgetPercent !== null && budgetPercent >= 90) ||
+                    (p95 !== null && p95 >= 10000)) {
+                    summary = "critical";
+                }
+                else if ((errorRate !== null && errorRate >= 0.03) ||
+                    (budgetPercent !== null && budgetPercent >= 70) ||
+                    (p95 !== null && p95 >= 3000)) {
+                    summary = "warn";
+                }
+                const partialFailures = [];
+                if (countsRes.status !== "fulfilled" || countsRes.value.isError)
+                    partialFailures.push("counts");
+                if (errorRes.status !== "fulfilled" || errorRes.value.isError)
+                    partialFailures.push("errorRate");
+                if (costRes.status !== "fulfilled" || costRes.value.isError)
+                    partialFailures.push("cost");
+                if (percentilesRes.status !== "fulfilled" || percentilesRes.value.isError)
+                    partialFailures.push("percentiles");
+                if (budgetRes.status !== "fulfilled" || budgetRes.value.isError)
+                    partialFailures.push("budget");
+                if (auditRes.status !== "fulfilled" || auditRes.value.isError)
+                    partialFailures.push("auditLog");
+                return {
+                    content: [
+                        {
+                            type: "text",
+                            text: JSON.stringify({
+                                window,
+                                totals: { calls: totalCalls, costUsd, errorRate },
+                                latency: { p50, p95, p99 },
+                                budget: { used: budgetUsed, limit: budgetLimit, percentUsed: budgetPercent },
+                                recentEvents,
+                                summary,
+                                partialFailures: partialFailures.length > 0 ? partialFailures : undefined,
+                            }),
+                        },
+                    ],
+                };
+            }
+            case "aggregate_calls": {
+                const body = {};
+                if (typeof safeArgs["startTime"] === "string")
+                    body["startTime"] = safeArgs["startTime"];
+                if (typeof safeArgs["endTime"] === "string")
+                    body["endTime"] = safeArgs["endTime"];
+                if (typeof safeArgs["groupBy"] === "string")
+                    body["groupBy"] = safeArgs["groupBy"];
+                if (typeof safeArgs["metric"] === "string")
+                    body["metric"] = safeArgs["metric"];
+                if (typeof safeArgs["provider"] === "string")
+                    body["provider"] = safeArgs["provider"];
+                if (typeof safeArgs["tagKey"] === "string")
+                    body["tagKey"] = safeArgs["tagKey"];
+                return await callApi(apiBase, "/v1/query/aggregate", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: body,
+                });
+            }
+            case "list_audit_log": {
+                const q = {};
+                if (typeof safeArgs["limit"] === "number")
+                    q["limit"] = safeArgs["limit"];
+                if (typeof safeArgs["eventType"] === "string")
+                    q["eventType"] = safeArgs["eventType"];
+                if (typeof safeArgs["targetKind"] === "string")
+                    q["targetKind"] = safeArgs["targetKind"];
+                if (typeof safeArgs["actorUserId"] === "string")
+                    q["actorUserId"] = safeArgs["actorUserId"];
+                if (typeof safeArgs["from"] === "string")
+                    q["from"] = safeArgs["from"];
+                if (typeof safeArgs["to"] === "string")
+                    q["to"] = safeArgs["to"];
+                if (typeof safeArgs["cursor"] === "string")
+                    q["cursor"] = safeArgs["cursor"];
+                return await callApi(apiBase, "/v1/audit-log", q, apiKey);
+            }
+            case "list_saved_views": {
+                return await callApi(apiBase, "/v1/saved-views", {}, apiKey);
+            }
+            case "create_saved_view": {
+                const name = safeArgs["name"];
+                const filter = safeArgs["filter"];
+                if (typeof name !== "string" || name.length === 0 || name.length > 80) {
+                    return errorResponse("name required (1-80 chars)");
+                }
+                if (!filter || typeof filter !== "object") {
+                    return errorResponse("filter required (object)");
+                }
+                return await callApi(apiBase, "/v1/saved-views", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: { name, filter },
+                });
+            }
+            case "delete_saved_view": {
+                const id = safeArgs["id"];
+                if (typeof id !== "string" || id.length === 0 || id.length > 64) {
+                    return errorResponse("id required (1-64 chars)");
+                }
+                return await callApi(apiBase, `/v1/saved-views/${encodeURIComponent(id)}`, {}, apiKey, { method: "DELETE" });
+            }
+            case "export_calls": {
+                const body = {};
+                if (typeof safeArgs["startTime"] === "string")
+                    body["startTime"] = safeArgs["startTime"];
+                if (typeof safeArgs["endTime"] === "string")
+                    body["endTime"] = safeArgs["endTime"];
+                if (typeof safeArgs["provider"] === "string")
+                    body["provider"] = safeArgs["provider"];
+                if (typeof safeArgs["model"] === "string")
+                    body["model"] = safeArgs["model"];
+                if (typeof safeArgs["limit"] === "number")
+                    body["limit"] = safeArgs["limit"];
+                return await callApi(apiBase, "/v1/query/export", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: body,
+                });
+            }
+            case "bulk_delete_calls": {
+                const ids = safeArgs["callIds"];
+                if (!Array.isArray(ids) || ids.length === 0 || ids.length > 100) {
+                    return errorResponse("callIds required (non-empty string array, max 100)");
+                }
+                const strIds = [];
+                for (const id of ids) {
+                    if (typeof id !== "string" || id.length === 0 || id.length > 128) {
+                        return errorResponse("each callId must be non-empty string up to 128 chars");
+                    }
+                    strIds.push(id);
+                }
+                const body = { callIds: strIds };
+                if (safeArgs["dryRun"] === true)
+                    body["dryRun"] = true;
+                return await callApi(apiBase, "/v1/calls/bulk-delete", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: body,
+                });
+            }
+            case "compare_eval_runs": {
+                const baselineId = validateAnnotationId(safeArgs["baselineRunId"]);
+                const candidateId = validateAnnotationId(safeArgs["candidateRunId"]);
+                if (!baselineId || !candidateId) {
+                    return errorResponse("baselineRunId + candidateRunId required (positive integers up to 10 digits)");
+                }
+                if (baselineId === candidateId) {
+                    return errorResponse("baselineRunId and candidateRunId must differ");
+                }
+                return await callApi(apiBase, "/v1/eval-runs/compare", { baseline: baselineId, candidate: candidateId }, apiKey);
+            }
             case "run_eval": {
                 const body = {};
                 if (typeof safeArgs["name"] === "string")