npm - @argosvix/mcp-server - Versions diffs - 0.28.2-alpha.1 → 0.29.0-alpha.1 - Mend

@argosvix/mcp-server 0.28.2-alpha.1 → 0.29.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # @argosvix/mcp-server
-**v0.28.2-alpha.1** — Argosvix MCP server lets AI agents (Claude Desktop, Cursor, Codex CLI, custom MCP clients) query, manage, and operate their LLM observability data directly from the conversation. Supports both **stdio** (subprocess) and **HTTP** (remote / self-host) transports.
+**v0.28.3-alpha.1** — Argosvix MCP server lets AI agents (Claude Desktop, Cursor, Codex CLI, custom MCP clients) query, manage, and operate their LLM observability data directly from the conversation. Supports both **stdio** (subprocess) and **HTTP** (remote / self-host) transports.
 **Surface:** 71 tools (67 generally available + 4 founder-ops scoped) / 3 resources / 8 resource templates / 3 prompts. Autonomous-AI-ops endpoints (`get_account_health` / `detect_anomaly` / `propose_alert_rules` / `classify_calls_batch` / `propose_eval_criteria`) plus runtime control plane (budget gates / policy gates / human-approval gates) complete the "agent that can both observe AND act" narrative. See [CHANGELOG.md](./CHANGELOG.md) for release history.

package/dist/tools.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../src/tools.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;~~AA0M~~/D,eAAO,MAAM,KAAK,EAAE,IAAI,~~EA0nEvB~~,CAAC;AAEF,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,aAAa,GAAG,OAAO,CAAC;IAChE,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,~~CAy5DD~~"}
1	+ {"version":3,"file":"tools.d.ts","sourceRoot":"","sources":["../src/tools.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAmN/D,eAAO,MAAM,KAAK,EAAE,IAAI,EA+wEvB,CAAC;AAEF,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,aAAa,GAAG,OAAO,CAAC;IAChE,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB,CAAC,CAo9DD"}

package/dist/tools.js CHANGED Viewed

@@ -51,6 +51,7 @@ const TOOL_ARG_ALLOWLIST = {
         "sleepMinutes",
         "enabled",
         "conditions",
+        "evalCriterionId",
     ],
     // 2026-06-03 v1.6 #13-4 = update_alert / delete_alert tools (= axis 1 強化)。
     // backend PATCH/DELETE /v1/alerts/:id の wrap。 alertType は immutable (= backend
@@ -73,7 +74,8 @@ const TOOL_ARG_ALLOWLIST = {
     // Phase 3 read tools = alertId は path 直前置換 (= GET /v1/alerts/:id)。
     // list_alert_events は query param (limit / alertId) のみ allowlist。
     get_alert: ["alertId"],
-    list_alert_events: ["limit", "alertId"],
+    // 2026-06-12 発火履歴深掘り = keyset cursor (beforeTriggeredAt + beforeId) パリティ。
+    list_alert_events: ["limit", "alertId", "beforeTriggeredAt", "beforeId"],
     // 2026-05-31 Phase 3 = acknowledge_alert tool。 eventId は path 直前置換、 body は空
     // (= source は MCP server 側で 強制的に "mcp" carry、 LLM から override させない)。
     acknowledge_alert: ["eventId"],
@@ -199,6 +201,13 @@ const TOOL_ARG_ALLOWLIST = {
     // 2026-06-02 Codex round 2 🔴 fix = idempotencyKey 必須 path (= AI agent が
     // retry した時に backend で dedup)、 client が opaque string 64 char で carry。
     run_eval: ["name", "recentCount", "label", "promptRegistryId", "idempotencyKey"],
+    // 2026-06-15 #2 Phase D4 = golden dataset (期待出力つき固定テストセット) ツール。
+    // list/get/create は CRUD、 run は対象モデルで実行 → judge → eval_scores (= 回帰 A/B)。
+    list_eval_datasets: [],
+    get_eval_dataset: ["datasetId"],
+    create_eval_dataset: ["name", "description", "items", "frozen"],
+    run_eval_dataset: ["datasetId", "targetModel", "judgeModel", "idempotencyKey"],
+    delete_eval_dataset: ["datasetId"],
     // 2026-06-06 axis 4 Tier 2 = 自律 AI ops 第一弾。 mutation 軸なので dryRun 必須、
     // backend で audit emit + UPDATE 順序 + idempotency carry (= R35 narrative)。
     purge_expired_plaintext: ["olderThanDays", "dryRun", "approvalId"],
@@ -376,7 +385,8 @@ export const tools = [
                     type: "string",
                     description: "監視する指標。 cost_threshold=単発コスト閾値 (USD) / monthly_budget=月次予算 (USD) / " +
                         "error_rate=エラー率(%) / latency_degradation=レイテンシ劣化 (ms) / " +
-                        "anomaly_cost / anomaly_latency / anomaly_error_rate=異常検知 (= windowMinutes は 60 固定)",
+                        "anomaly_cost / anomaly_latency / anomaly_error_rate=異常検知 (= windowMinutes は 60 固定) / " +
+                        "eval_score=eval スコア低下 (= evalCriterionId 必須、 直近 window の平均 score が thresholdValue 未満で発火)",
                     enum: [
                         "cost_threshold",
                         "error_rate",
@@ -385,6 +395,7 @@ export const tools = [
                         "anomaly_cost",
                         "anomaly_latency",
                         "anomaly_error_rate",
+                        "eval_score",
                     ],
                 },
                 thresholdValue: {
@@ -455,6 +466,11 @@ export const tools = [
                     description: "作成直後に有効化するか。 デフォルト true。",
                     default: true,
                 },
+                evalCriterionId: {
+                    type: "integer",
+                    description: "alertType=eval_score のとき必須。 監視する eval criterion の id (= list_eval_criteria.criteria[].id)。 直近 window の平均 score が thresholdValue 未満で発火する。",
+                    minimum: 1,
+                },
                 conditions: {
                     type: "object",
                     description: "v1.5 multi-condition alert (= 複合条件)。 指定すると alertType + thresholdValue + " +
@@ -648,7 +664,10 @@ export const tools = [
         description: "alert の発火 (trigger) 履歴を新しい順で返す。 account 横断 (= 全 alert の最近の発火) が " +
             "デフォルト。 alertId を指定すると その alert のみに絞る。 「最近どの alert が何回発火したか」 " +
             "「コスト超過アラートはいつ鳴ったか」 等の確認に使う。 各 event の id は acknowledge_alert " +
-            "tool に そのまま渡せる。 acknowledgedAt / acknowledgedBy は 未 ack なら null。",
+            "tool に そのまま渡せる。 acknowledgedAt / acknowledgedBy は 未 ack なら null。 " +
+            "各 event は発火時点の thresholdValue / windowMinutes / alertType snapshot を含む " +
+            "(= 後から rule を編集しても発火当時の条件が分かる)。 次ページは最終 event の " +
+            "triggeredAt + id を beforeTriggeredAt + beforeId に渡す (= keyset cursor)。",
         inputSchema: {
             type: "object",
             additionalProperties: false,
@@ -665,6 +684,16 @@ export const tools = [
                     description: "特定 alert に絞る場合の ID。 省略で全 alert の発火履歴",
                     pattern: "^[A-Za-z0-9_-]{1,64}$",
                 },
+                beforeTriggeredAt: {
+                    type: "string",
+                    description: "ページング cursor (= 前ページ最終 event の triggeredAt)。 beforeId と必ず同時指定",
+                    format: "date-time",
+                },
+                beforeId: {
+                    type: "string",
+                    description: "ページング cursor (= 前ページ最終 event の id)。 beforeTriggeredAt と必ず同時指定",
+                    pattern: "^[A-Za-z0-9_-]{1,64}$",
+                },
             },
         },
     },
@@ -1727,7 +1756,7 @@ export const tools = [
     {
         name: "purge_expired_plaintext",
         description: "(axis 4 Tier 2 = 自律 AI ops 第一弾) 自 account の 平文 record のうち olderThanDays 経過したものを 一括 purge (= POST /v1/tier2/plaintext/purge-expired)。 利用規約 v2.1 narrative の 「90 日 まで 保管可能」 と整合 (= 自動 retention narrative)、 AI agent が 「30 日 経過の 平文 data を 自動 purge」 narrative で 1 prompt 完結。 " +
-            "dryRun=true (= safety default narrative) で count + sample 5 件の call_id を 返す、 dryRun=false で 実 UPDATE。 emit → UPDATE 順序 + deterministic idempotencyId (= sha1(endpoint+accountId+olderThanDays+cutoff_date)) で webhook retry 同等 narrative。 founder dogfood scope = 自 account のみ purge、 「全 account purge」 path は v1.8 carry。 " +
+            "dryRun=true (= safety default narrative) で count + sample 5 件の call_id を 返す、 dryRun=false で 実 UPDATE。 emit → UPDATE 順序 + deterministic idempotencyId (= sha1(endpoint+accountId+olderThanDays+cutoff_date)) で webhook retry 同等 narrative。 **Pro+ プラン限定 (= 2026-06-12 解放、 Free は 403)。 実 purge (dryRun=false) は approvalId 必須** = request_approval (action: 'purge_expired_plaintext') で人間承認を取ってから実行する (= 平文 NULL 化は不可逆のため)。 自 account のみ purge。 " +
             "返却 dryRun=true = { dryRun: true, targetCount, cutoffTimestamp, olderThanDays, sampleTargetCallIds }、 dryRun=false = { dryRun: false, purgedCount, cutoffTimestamp, olderThanDays, purgedAt }。 audit = tier2.purge_expired_plaintext を emit。",
         inputSchema: {
             type: "object",
@@ -1757,7 +1786,7 @@ export const tools = [
         name: "retry_failed_webhook",
         description: "(axis 4 Tier 2 = 自律 AI ops 第一弾) 失敗した Stripe webhook event (= billing_dead_letter テーブル) を 再処理 marker として audit log に carry (= POST /v1/tier2/webhook-events/retry)。 「先週 Stripe webhook が 一時失敗してた件を 全部 retry して」 narrative で 1 prompt 完結。 " +
             "eventIds (= 特定 event 単体、 最大 100 件) ま た は fromTimestamp/toTimestamp (= range、 7 日 cap) で 対象 select。 dryRun=true で list preview、 dryRun=false で 各 event を audit log に 「marked_for_manual_redispatch」 narrative で 残す (= founder が wrangler / Stripe dashboard 経由で 実 retry を carry する 軸、 完全 auto re-dispatch は v1.8 carry)。 " +
-            "emit は deterministic idempotencyId (= sha1(endpoint+accountId+eventId)) で carry、 同 args の 二重実行は silent skip。 founder dogfood scope = billing_dead_letter には account_id 列が 無いので 全 dead-letter を 走査、 v1.8 で account_id scoping carry。 " +
+            "emit は deterministic idempotencyId (= sha1(endpoint+accountId+eventId)) で carry、 同 args の 二重実行は silent skip。 **founder 運用限定 (= 内部の決済 webhook 復旧ツール、 一般 account は 403)**。 billing_dead_letter は account 横断の内部 table で、 実 re-dispatch も手動運用前提のため一般開放の予定はない。 " +
             "返却 dryRun=true = { dryRun: true, targetCount, events: [{eventId, eventType, reason, receivedAt}] }、 dryRun=false = { dryRun: false, targetCount, succeeded: string[], failed: [{eventId, reason}], skipped: string[], narrative, retriedAt }。 audit = tier2.retry_failed_webhook を 各 event 毎に emit。",
         inputSchema: {
             type: "object",
@@ -1848,7 +1877,7 @@ export const tools = [
     },
     {
         name: "extend_customer_trial",
-        description: "(axis 4 Tier 2 = 自律 AI ops 第三弾) 自 account の Stripe subscription trial 期間を 1-30 日 延長 (= POST /v1/tier2/trial/extend)。 founder dogfood scope = 当面 自 account のみ、 paid Pro+ user は 403。 累計 60 日 上限 (= 過去 30 日 audit 集計)、 status='trialing' でなければ 409。 " +
+        description: "(axis 4 Tier 2 = 自律 AI ops 第三弾) 自 account の Stripe subscription trial 期間を 1-30 日 延長 (= POST /v1/tier2/trial/extend)。 **founder 運用限定 (= サポート用の内部ツール、 一般 account は 403)**。 trial 延長は収益に直結するため一般開放の予定はない。 累計 60 日 上限 (= 過去 30 日 audit 集計)、 status='trialing' でなければ 409。 " +
             "R39 carry = dryRun は 必須明示 (= 暗黙 false で mutation する事故 防御)、 dryRun=false 時 は idempotencyKey も 必須 (16-128 alphanumeric+'_-')。 同 key 再呼び出しは tier2_idempotency table 経由で cached result 返却 (= retry double-extend を 構造防御)。 " +
             "dryRun=true で previousTrialEnd / newTrialEnd / 累計 narrative preview のみ (= Stripe call なし)。 dryRun=false で 実 Stripe mutation + accounts_subscription 同期 update。",
         inputSchema: {
@@ -1895,7 +1924,7 @@ export const tools = [
     },
     {
         name: "apply_promo_code_to_customer",
-        description: "(axis 4 Tier 2 = 自律 AI ops 第三弾) 自 account の Stripe subscription に user-facing promotion code (= 既 Stripe で 登録済の 「LAUNCH50」 等) を 適用 (= POST /v1/tier2/promo/apply)。 founder dogfood scope = 当面 自 account のみ、 paid Pro+ user は 403。 既 active discount があれば 409 (= 重ね掛け 構造防御)、 status が canceled / incomplete_expired は 409。 " +
+        description: "(axis 4 Tier 2 = 自律 AI ops 第三弾) 自 account の Stripe subscription に user-facing promotion code (= 既 Stripe で 登録済の 「LAUNCH50」 等) を 適用 (= POST /v1/tier2/promo/apply)。 **founder 運用限定 (= サポート用の内部ツール、 一般 account は 403)**。 経済影響のある操作の規約整備とセットでないと開放しない方針 (= 解放未定)。 既 active discount があれば 409 (= 重ね掛け 構造防御)、 status が canceled / incomplete_expired は 409。 " +
             "R39 carry = promotion_code 経由で Stripe redeem 判定を委ねる構造 (= coupon 直接適用は 制約 bypass で禁止)、 dryRun 必須明示 + dryRun=false 時 idempotencyKey 必須。 同 key 再呼び出しは tier2_idempotency table 経由で cached result 返却、 concurrent apply を 構造直列化。 " +
             "dryRun=true で resolve + 既 active 判定 + 推定 割引 narrative preview のみ (= Stripe mutation なし)。 dryRun=false で 実 promotion_code 適用。",
         inputSchema: {
@@ -2293,6 +2322,129 @@ export const tools = [
             },
         },
     },
+    {
+        name: "list_eval_datasets",
+        description: "自 account の golden dataset 一覧 (= GET /v1/eval-datasets)。 各 dataset は name / 説明 / item 件数 / frozen 状態を持つ。 golden dataset = 期待出力つきの固定テストセットで、 run_eval_dataset で対象モデルに通して回帰 A/B を測る母集団。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            properties: {},
+        },
+    },
+    {
+        name: "get_eval_dataset",
+        description: "指定 dataset の detail + items 全件を取得 (= GET /v1/eval-datasets/:id)。 datasetId は list_eval_datasets.datasets[].id。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["datasetId"],
+            properties: {
+                datasetId: {
+                    type: "integer",
+                    description: "対象 dataset の id (= list_eval_datasets.datasets[].id)",
+                    minimum: 1,
+                },
+            },
+        },
+    },
+    {
+        name: "create_eval_dataset",
+        description: "golden dataset を新規作成 (= POST /v1/eval-datasets、 Pro+ 限定)。 items に期待出力つきテストケースを最大 20 件渡せる。 dataset は account あたり最大 50 件。 frozen=true で母集団を凍結 (= 以後 item 改変・解凍不可、 回帰判定の比較可能性を固定)。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["name"],
+            properties: {
+                name: {
+                    type: "string",
+                    description: "dataset 名 (1-100 文字、 account 内で一意)",
+                    minLength: 1,
+                    maxLength: 100,
+                },
+                description: {
+                    type: "string",
+                    description: "任意の説明 (<= 500 文字)",
+                    maxLength: 500,
+                },
+                items: {
+                    type: "array",
+                    description: "テストケース (最大 20 件)。 各 inputText を対象モデルに入力し、 expectedOutput を judge の [REFERENCE ANSWER] として採点に使う。",
+                    maxItems: 20,
+                    items: {
+                        type: "object",
+                        additionalProperties: false,
+                        required: ["inputText"],
+                        properties: {
+                            inputText: {
+                                type: "string",
+                                description: "モデルへの入力 (1-4000 文字)",
+                                minLength: 1,
+                                maxLength: 4000,
+                            },
+                            expectedOutput: {
+                                type: "string",
+                                description: "期待する出力 (任意、 <= 4000 文字)。 judge に参照解として渡す。",
+                                maxLength: 4000,
+                            },
+                        },
+                    },
+                },
+                frozen: {
+                    type: "boolean",
+                    description: "true = 母集団凍結 (= 以後 item 改変・解凍不可)。 省略 = false。",
+                },
+            },
+        },
+    },
+    {
+        name: "run_eval_dataset",
+        description: "golden dataset を対象モデルで実行して回帰判定する (= POST /v1/eval-datasets/:id/run、 Pro+ 限定)。 各 item の inputText を targetModel に通し、 出力を既定 criteria + expectedOutput で gpt-4o-mini に採点させて eval_scores に記録する。 結果は compare_eval_runs で run 間比較できる。 実行記録は本番 cost/分析/アラート集計からは除外される。 cost: item 数 × criteria 数の LLM call。 OPENAI_API_KEY 未 provision 環境では 503。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["datasetId", "targetModel"],
+            properties: {
+                datasetId: {
+                    type: "integer",
+                    description: "実行する dataset の id (= list_eval_datasets.datasets[].id)",
+                    minimum: 1,
+                },
+                targetModel: {
+                    type: "string",
+                    description: "回帰を測りたい対象モデル (= 価格表に載っている OpenAI モデルのみ、 例 'gpt-4o-mini')。 未知モデルは 400。",
+                    minLength: 1,
+                    maxLength: 128,
+                },
+                judgeModel: {
+                    type: "string",
+                    description: "採点モデル (省略 = gpt-4o-mini)。 価格表に載っている OpenAI モデルのみ。",
+                    maxLength: 128,
+                },
+                idempotencyKey: {
+                    type: "string",
+                    description: "retry dedup 用の opaque key (= UUID 推奨、 200 char cap)。 同 key の再 POST は既存 run を返す (= 二重課金防止)。",
+                    minLength: 1,
+                    maxLength: 200,
+                },
+            },
+        },
+    },
+    {
+        name: "delete_eval_dataset",
+        description: "golden dataset を削除 (= DELETE /v1/eval-datasets/:id、 Pro+ 限定)。 items は連鎖削除される。 過去の eval run / score は残る。",
+        inputSchema: {
+            type: "object",
+            additionalProperties: false,
+            required: ["datasetId"],
+            properties: {
+                datasetId: {
+                    type: "integer",
+                    description: "削除する dataset の id",
+                    minimum: 1,
+                },
+            },
+        },
+    },
 ];
 export async function dispatchTool(input) {
     const { name, args, apiKey, apiBase } = input;
@@ -2451,8 +2603,13 @@ export async function dispatchTool(input) {
                 return await callApi(apiBase, `/v1/alerts/${encodeURIComponent(alertId)}`, {}, apiKey);
             }
             case "list_alert_events": {
-                // limit / alertId は query param で /v1/alerts/events へ。 alertId は backend
-                // 側でも [A-Za-z0-9_-]+ で再 validate されるが、 schema pattern で先弾き済。
+                // limit / alertId / cursor は query param で /v1/alerts/events へ。 alertId は
+                // backend 側でも [A-Za-z0-9_-]+ で再 validate されるが、 schema pattern で先弾き済。
+                // cursor 2 つの同時指定契約は backend が 400 で enforce (= R77 と同型)。
+                const beforeTs = safeArgs["beforeTriggeredAt"];
+                if (beforeTs !== undefined && (typeof beforeTs !== "string" || Number.isNaN(Date.parse(beforeTs)))) {
+                    return errorResponse("beforeTriggeredAt must be ISO-8601 string");
+                }
                 return await callApi(apiBase, "/v1/alerts/events", safeArgs, apiKey);
             }
             case "acknowledge_alert": {
@@ -2569,6 +2726,43 @@ export async function dispatchTool(input) {
                 }
                 return await callApi(apiBase, `/v1/eval-criteria/${encodeURIComponent(criterionId)}`, {}, apiKey, { method: "DELETE" });
             }
+            case "list_eval_datasets": {
+                return await callApi(apiBase, "/v1/eval-datasets", {}, apiKey);
+            }
+            case "get_eval_dataset": {
+                const datasetId = validateAnnotationId(safeArgs["datasetId"]);
+                if (!datasetId) {
+                    return errorResponse("datasetId required (positive integer up to 10 digits)");
+                }
+                return await callApi(apiBase, `/v1/eval-datasets/${encodeURIComponent(datasetId)}`, {}, apiKey);
+            }
+            case "create_eval_dataset": {
+                if (typeof safeArgs["name"] !== "string") {
+                    return errorResponse("name required (string)");
+                }
+                return await callApi(apiBase, "/v1/eval-datasets", {}, apiKey, {
+                    method: "POST",
+                    jsonBody: safeArgs,
+                });
+            }
+            case "run_eval_dataset": {
+                const datasetId = validateAnnotationId(safeArgs["datasetId"]);
+                if (!datasetId) {
+                    return errorResponse("datasetId required (positive integer up to 10 digits)");
+                }
+                if (typeof safeArgs["targetModel"] !== "string") {
+                    return errorResponse("targetModel required (string)");
+                }
+                const { datasetId: _omitDsId, ...body } = safeArgs;
+                return await callApi(apiBase, `/v1/eval-datasets/${encodeURIComponent(datasetId)}/run`, {}, apiKey, { method: "POST", jsonBody: body });
+            }
+            case "delete_eval_dataset": {
+                const datasetId = validateAnnotationId(safeArgs["datasetId"]);
+                if (!datasetId) {
+                    return errorResponse("datasetId required (positive integer up to 10 digits)");
+                }
+                return await callApi(apiBase, `/v1/eval-datasets/${encodeURIComponent(datasetId)}`, {}, apiKey, { method: "DELETE" });
+            }
             case "test_webhook": {
                 if (typeof safeArgs["url"] !== "string") {
                     return errorResponse("url required (https://...)");