@argosvix/mcp-server 0.14.1-alpha.1 → 0.21.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tools.d.ts +2 -0
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +1329 -74
- package/dist/tools.js.map +1 -1
- package/dist/tools.test.js +497 -1
- package/dist/tools.test.js.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +2 -2
package/dist/tools.js
CHANGED
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
* write tools:
|
|
15
15
|
* - silence_alert / unsilence_alert = alert のミュート操作 (= Phase 2)
|
|
16
16
|
* - create_alert = 新規 alert ルール作成 (= Phase 3)
|
|
17
|
+
* - update_alert / delete_alert = alert lifecycle 完結 (= v1.6 #13-4)
|
|
18
|
+
* - create_annotation / update_annotation / delete_annotation = annotation CRUD (= v1.6 #13-3)
|
|
17
19
|
*
|
|
18
20
|
* 残 (= acknowledge_alert / HTTP transport / resources・prompts) は後続 phase。
|
|
19
21
|
*/
|
|
@@ -48,6 +50,24 @@ const TOOL_ARG_ALLOWLIST = {
|
|
|
48
50
|
"enabled",
|
|
49
51
|
"conditions",
|
|
50
52
|
],
|
|
53
|
+
// 2026-06-03 v1.6 #13-4 = update_alert / delete_alert tools (= axis 1 強化)。
|
|
54
|
+
// backend PATCH/DELETE /v1/alerts/:id の wrap。 alertType は immutable (= backend
|
|
55
|
+
// validateUpdate で 400)、 schema 側にも入れない。 alertId は path 直前置換、
|
|
56
|
+
// body field のみ allowlist。
|
|
57
|
+
update_alert: [
|
|
58
|
+
"alertId",
|
|
59
|
+
"name",
|
|
60
|
+
"thresholdValue",
|
|
61
|
+
"windowMinutes",
|
|
62
|
+
"filterProvider",
|
|
63
|
+
"filterModel",
|
|
64
|
+
"channelKinds",
|
|
65
|
+
"channelTargets",
|
|
66
|
+
"sleepMinutes",
|
|
67
|
+
"enabled",
|
|
68
|
+
"conditions",
|
|
69
|
+
],
|
|
70
|
+
delete_alert: ["alertId"],
|
|
51
71
|
// Phase 3 read tools = alertId は path 直前置換 (= GET /v1/alerts/:id)。
|
|
52
72
|
// list_alert_events は query param (limit / alertId) のみ allowlist。
|
|
53
73
|
get_alert: ["alertId"],
|
|
@@ -61,15 +81,46 @@ const TOOL_ARG_ALLOWLIST = {
|
|
|
61
81
|
list_annotations_for_call: ["callId"],
|
|
62
82
|
list_annotations_by_label: ["label", "limit"],
|
|
63
83
|
get_annotation: ["annotationId"],
|
|
84
|
+
// 2026-06-03 v1.6 #13-3 = annotation CRUD write tools (= axis 1 強化、
|
|
85
|
+
// 全社未対応領域での 新規優位)。 backend POST/PATCH/DELETE /v1/annotations を
|
|
86
|
+
// allowlist 経由で wrap。 annotation_text / label / quality_score の値域は
|
|
87
|
+
// backend で 最終 validation (= 2000 char / 50 char + 英数 _ - / 1-5 integer)。
|
|
88
|
+
create_annotation: ["callId", "annotationText", "label", "qualityScore"],
|
|
89
|
+
update_annotation: ["annotationId", "annotationText", "label", "qualityScore"],
|
|
90
|
+
delete_annotation: ["annotationId"],
|
|
64
91
|
// 2026-06-02 v1.5 = eval criteria read tools。 global default + 自 account custom
|
|
65
92
|
// の両方を visible。 criterionId は AUTOINCREMENT integer、 dispatch 側で validate。
|
|
66
93
|
list_eval_criteria: [],
|
|
67
94
|
get_eval_criterion: ["criterionId"],
|
|
95
|
+
// 2026-06-03 v1.6 #13-2 = eval criteria write tools (= axis 1 強化、 Langfuse 未対応領域)。
|
|
96
|
+
// Pro+ 専用。 create + update = full replace (= name + rubric + scaleMin + scaleMax 全
|
|
97
|
+
// 必須)、 delete = 自 account 内 only (= global default は 構造防御で対象外)。
|
|
98
|
+
create_eval_criterion: ["name", "rubric", "scaleMin", "scaleMax"],
|
|
99
|
+
update_eval_criterion: ["criterionId", "name", "rubric", "scaleMin", "scaleMax"],
|
|
100
|
+
delete_eval_criterion: ["criterionId"],
|
|
101
|
+
// 2026-06-03 v1.6 #13-5 = test_webhook tool (= alert webhook 試送 path)。
|
|
102
|
+
// backend POST /v1/alerts/test-webhook を allowlist 経由で wrap。 URL は SSRF
|
|
103
|
+
// 防御 (= validateWebhookTarget)、 secret は HMAC 署名用、 rate limit 5/分。
|
|
104
|
+
test_webhook: ["url", "secret", "alertName"],
|
|
105
|
+
// 2026-06-03 v1.6 #13-7 = LLM feature budget tools (= safety classifier +
|
|
106
|
+
// PII audit + eval baseline runner の 月予算 cap)。 get は Free / Pro+ 共通、
|
|
107
|
+
// raise は Pro+ 専用、 $5-$500 hard cap。 default $5 で 月跨ぎ で 自動 reset。
|
|
108
|
+
get_llm_budget: [],
|
|
109
|
+
raise_llm_budget: ["budgetUsd"],
|
|
68
110
|
// 2026-06-02 v1.5 Round F = prompt registry read tools。 user の保存 prompt
|
|
69
111
|
// template を tap で取得、 AI agent が template + variables + labels を context
|
|
70
112
|
// に取り込む path。 name / label / limit は query param、 promptId は path 直前置換。
|
|
71
113
|
list_prompts: ["label", "name", "limit"],
|
|
72
114
|
get_prompt: ["promptId"],
|
|
115
|
+
// 2026-06-03 v1.6 #13-1 = prompt registry write tools (= axis 1 強化、 Langfuse
|
|
116
|
+
// 比較で同等領域)。 Pro+ 専用。 create = 新 version 作成、 update = template /
|
|
117
|
+
// variables / labels / description の部分更新、 rename = name + version 変更
|
|
118
|
+
// (= typo 修正軸、 UNIQUE 衝突 → 409)、 delete = 削除 (= 204)。 backend が field
|
|
119
|
+
// validation + plan gate + Origin/Referer CSRF 防御を carry する path に乗る。
|
|
120
|
+
create_prompt: ["name", "version", "template", "variables", "labels", "description"],
|
|
121
|
+
update_prompt: ["promptId", "template", "variables", "labels", "description"],
|
|
122
|
+
rename_prompt: ["promptId", "name", "version"],
|
|
123
|
+
delete_prompt: ["promptId"],
|
|
73
124
|
// 2026-06-02 v1.5 closure = safety classifier read tools。 OpenAI Moderation
|
|
74
125
|
// cron が書き込んだ assessment を AI agent から閲覧する path。 callId は
|
|
75
126
|
// /v1/safety-assessments?call_id= query param、 assessmentId は path 直前置換。
|
|
@@ -79,6 +130,19 @@ const TOOL_ARG_ALLOWLIST = {
|
|
|
79
130
|
// は scores 同梱、 run は POST (= Pro+ で startEvalRun に渡す軸)。
|
|
80
131
|
list_eval_runs: ["limit"],
|
|
81
132
|
get_eval_run: ["runId"],
|
|
133
|
+
compare_eval_runs: ["baselineRunId", "candidateRunId"],
|
|
134
|
+
bulk_delete_calls: ["callIds", "dryRun"],
|
|
135
|
+
export_calls: ["startTime", "endTime", "provider", "model", "limit"],
|
|
136
|
+
list_saved_views: [],
|
|
137
|
+
create_saved_view: ["name", "filter"],
|
|
138
|
+
delete_saved_view: ["id"],
|
|
139
|
+
list_audit_log: ["limit", "eventType", "targetKind", "actorUserId", "from", "to", "cursor"],
|
|
140
|
+
aggregate_calls: ["startTime", "endTime", "groupBy", "metric", "provider", "tagKey"],
|
|
141
|
+
get_percentiles: ["startTime", "endTime", "provider", "model", "metric", "groupBy"],
|
|
142
|
+
list_projects: [],
|
|
143
|
+
create_project: ["name", "slug"],
|
|
144
|
+
rename_project: ["projectId", "name", "slug"],
|
|
145
|
+
delete_project: ["projectId"],
|
|
82
146
|
// 2026-06-02 Codex round 2 🔴 fix = idempotencyKey 必須 path (= AI agent が
|
|
83
147
|
// retry した時に backend で dedup)、 client が opaque string 64 char で carry。
|
|
84
148
|
run_eval: ["name", "recentCount", "label", "promptRegistryId", "idempotencyKey"],
|
|
@@ -355,6 +419,126 @@ export const tools = [
|
|
|
355
419
|
},
|
|
356
420
|
},
|
|
357
421
|
},
|
|
422
|
+
{
|
|
423
|
+
name: "update_alert",
|
|
424
|
+
description: "既存 alert の設定を更新する (= PATCH /v1/alerts/:id)。 alertType (= 監視タイプ) は immutable で、 " +
|
|
425
|
+
"変更したい場合は新規 alert を作成してから旧 alert を delete する (= alert lifecycle 完結)。 " +
|
|
426
|
+
"閾値 / 評価窓 / 通知チャンネル / 名前 / 有効化フラグ / 複合条件 を 部分 update 可能 (= 全フィールド optional)。 " +
|
|
427
|
+
"例: 「月予算 alert の threshold を $100 → $50 に下げて」 / 「通知チャンネルを Slack に追加」。",
|
|
428
|
+
inputSchema: {
|
|
429
|
+
type: "object",
|
|
430
|
+
additionalProperties: false,
|
|
431
|
+
required: ["alertId"],
|
|
432
|
+
properties: {
|
|
433
|
+
alertId: {
|
|
434
|
+
type: "string",
|
|
435
|
+
description: "対象 alert の ID (= list_alerts で取得)",
|
|
436
|
+
pattern: "^[A-Za-z0-9-]{1,64}$",
|
|
437
|
+
},
|
|
438
|
+
name: {
|
|
439
|
+
type: "string",
|
|
440
|
+
description: "alert の表示名 (1-100 文字、 改行不可)。 省略で 既存値維持",
|
|
441
|
+
minLength: 1,
|
|
442
|
+
maxLength: 100,
|
|
443
|
+
pattern: "^[^\\r\\n]{1,100}$",
|
|
444
|
+
},
|
|
445
|
+
thresholdValue: {
|
|
446
|
+
type: "number",
|
|
447
|
+
description: "閾値 (= 0 以上)。 省略で 既存値維持",
|
|
448
|
+
minimum: 0,
|
|
449
|
+
},
|
|
450
|
+
windowMinutes: {
|
|
451
|
+
type: "integer",
|
|
452
|
+
description: "集計時間窓 (分、 5-43200)。 省略で 既存値維持",
|
|
453
|
+
minimum: 5,
|
|
454
|
+
maximum: 43200,
|
|
455
|
+
},
|
|
456
|
+
filterProvider: {
|
|
457
|
+
type: "string",
|
|
458
|
+
description: "対象 provider。 省略で 既存値維持、 明示 null で 全 provider に解除",
|
|
459
|
+
enum: ["openai", "anthropic", "gemini", "mistral"],
|
|
460
|
+
},
|
|
461
|
+
filterModel: {
|
|
462
|
+
type: "string",
|
|
463
|
+
description: "対象 model (部分一致)。 省略で 既存値維持、 明示 null で 全 model に解除",
|
|
464
|
+
maxLength: 128,
|
|
465
|
+
},
|
|
466
|
+
channelKinds: {
|
|
467
|
+
type: "array",
|
|
468
|
+
description: "有効化する通知チャンネル種別。 省略で 既存値維持。 channelTargets と同時更新推奨",
|
|
469
|
+
minItems: 1,
|
|
470
|
+
items: { type: "string", enum: ["email", "slack", "webhook", "discord", "teams"] },
|
|
471
|
+
},
|
|
472
|
+
channelTargets: {
|
|
473
|
+
type: "object",
|
|
474
|
+
description: "channelKinds に挙げた各 kind の宛先 object。 省略で 既存値維持",
|
|
475
|
+
additionalProperties: false,
|
|
476
|
+
properties: {
|
|
477
|
+
email: { type: "string", description: "通知先メールアドレス" },
|
|
478
|
+
slack: { type: "string", description: "Slack Incoming Webhook URL" },
|
|
479
|
+
webhook: { type: "string", description: "汎用 webhook URL (https)" },
|
|
480
|
+
discord: { type: "string", description: "Discord webhook URL" },
|
|
481
|
+
teams: { type: "string", description: "Microsoft Teams Incoming Webhook URL" },
|
|
482
|
+
},
|
|
483
|
+
},
|
|
484
|
+
sleepMinutes: {
|
|
485
|
+
type: "integer",
|
|
486
|
+
description: "連続通知の抑制時間 (分、 5-10080)。 省略で 既存値維持",
|
|
487
|
+
minimum: 5,
|
|
488
|
+
maximum: 10080,
|
|
489
|
+
},
|
|
490
|
+
enabled: {
|
|
491
|
+
type: "boolean",
|
|
492
|
+
description: "alert の有効化フラグ。 false で 一時的に評価を止める (= silence と違い再 enable には PATCH 必要)",
|
|
493
|
+
},
|
|
494
|
+
conditions: {
|
|
495
|
+
type: "object",
|
|
496
|
+
description: "v1.5 multi-condition alert の更新。 指定すると 単 metric path を ignore し " +
|
|
497
|
+
"AND/OR 集約 path に switch。 既存の 単 metric / multi-condition の どちらにも上書き可能。",
|
|
498
|
+
required: ["operator", "conditions"],
|
|
499
|
+
additionalProperties: false,
|
|
500
|
+
properties: {
|
|
501
|
+
operator: { type: "string", enum: ["AND", "OR"] },
|
|
502
|
+
conditions: {
|
|
503
|
+
type: "array",
|
|
504
|
+
minItems: 1,
|
|
505
|
+
maxItems: 8,
|
|
506
|
+
items: {
|
|
507
|
+
type: "object",
|
|
508
|
+
additionalProperties: false,
|
|
509
|
+
required: ["metric", "threshold", "windowMinutes", "comparator"],
|
|
510
|
+
properties: {
|
|
511
|
+
metric: { type: "string" },
|
|
512
|
+
threshold: { type: "number" },
|
|
513
|
+
windowMinutes: { type: "integer", minimum: 5, maximum: 43200 },
|
|
514
|
+
comparator: { type: "string", enum: [">", "<", ">=", "<="] },
|
|
515
|
+
},
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
},
|
|
519
|
+
},
|
|
520
|
+
},
|
|
521
|
+
},
|
|
522
|
+
},
|
|
523
|
+
{
|
|
524
|
+
name: "delete_alert",
|
|
525
|
+
description: "alert を 削除する (= DELETE /v1/alerts/:id)。 関連する alert_events も CASCADE 削除される。 " +
|
|
526
|
+
"誤削除防御のため、 必要に応じて事前に get_alert で 詳細確認を推奨。 " +
|
|
527
|
+
"alert を一時停止したいだけなら delete でなく silence_alert (= ミュート) または update_alert で " +
|
|
528
|
+
"enabled=false を 推奨 (= 復活可能)。",
|
|
529
|
+
inputSchema: {
|
|
530
|
+
type: "object",
|
|
531
|
+
additionalProperties: false,
|
|
532
|
+
required: ["alertId"],
|
|
533
|
+
properties: {
|
|
534
|
+
alertId: {
|
|
535
|
+
type: "string",
|
|
536
|
+
description: "対象 alert の ID (= list_alerts で取得)",
|
|
537
|
+
pattern: "^[A-Za-z0-9-]{1,64}$",
|
|
538
|
+
},
|
|
539
|
+
},
|
|
540
|
+
},
|
|
541
|
+
},
|
|
358
542
|
{
|
|
359
543
|
name: "get_alert",
|
|
360
544
|
description: "指定 alert の詳細設定と直近の trigger 履歴を返す。 list_alerts で得た alertId を渡す。 " +
|
|
@@ -480,6 +664,95 @@ export const tools = [
|
|
|
480
664
|
},
|
|
481
665
|
},
|
|
482
666
|
},
|
|
667
|
+
{
|
|
668
|
+
name: "create_annotation",
|
|
669
|
+
description: "LLM call に対する新規 annotation (= human review / ラベル付け) を作成する。 " +
|
|
670
|
+
"annotationText / label / qualityScore のうち少なくとも 1 つを指定する (= 「空 annotation」 は backend で 400)。 " +
|
|
671
|
+
"用例: 「Claude、 この call を 『badly-summarized』 ラベルで quality 2 にして」、 " +
|
|
672
|
+
"「eval ループ用に positive / negative の 二極ラベルを 大量付与する」。 " +
|
|
673
|
+
"eval baseline runner (= run_eval) と 組み合わせると、 annotation を ground truth として 評価軸 を 校正できる。",
|
|
674
|
+
inputSchema: {
|
|
675
|
+
type: "object",
|
|
676
|
+
additionalProperties: false,
|
|
677
|
+
required: ["callId"],
|
|
678
|
+
properties: {
|
|
679
|
+
callId: {
|
|
680
|
+
type: "string",
|
|
681
|
+
description: "対象 call の id (= query_calls.records[].id)",
|
|
682
|
+
pattern: "^[A-Za-z0-9_-]{1,128}$",
|
|
683
|
+
},
|
|
684
|
+
annotationText: {
|
|
685
|
+
type: "string",
|
|
686
|
+
description: "自由記述コメント (0-2000 文字)。 backend で長さ validation",
|
|
687
|
+
maxLength: 2000,
|
|
688
|
+
},
|
|
689
|
+
label: {
|
|
690
|
+
type: "string",
|
|
691
|
+
description: "ラベル (0-50 文字、 英数 + _ - のみ)。 dashboard filter で 使える",
|
|
692
|
+
maxLength: 50,
|
|
693
|
+
pattern: "^[A-Za-z0-9_-]{0,50}$",
|
|
694
|
+
},
|
|
695
|
+
qualityScore: {
|
|
696
|
+
type: "integer",
|
|
697
|
+
description: "品質スコア (= 1-5 integer)。 省略で NULL",
|
|
698
|
+
minimum: 1,
|
|
699
|
+
maximum: 5,
|
|
700
|
+
},
|
|
701
|
+
},
|
|
702
|
+
},
|
|
703
|
+
},
|
|
704
|
+
{
|
|
705
|
+
name: "update_annotation",
|
|
706
|
+
description: "既存 annotation の annotationText / label / qualityScore を 部分更新する (= PATCH /v1/annotations/:id)。 " +
|
|
707
|
+
"callId は immutable。 「ラベルを 修正したい」 「qualityScore を 再評価して 4 → 5 に上げる」 用途。 " +
|
|
708
|
+
"annotation_id は list_annotations_for_call で取得した annotations[].id を渡す。",
|
|
709
|
+
inputSchema: {
|
|
710
|
+
type: "object",
|
|
711
|
+
additionalProperties: false,
|
|
712
|
+
required: ["annotationId"],
|
|
713
|
+
properties: {
|
|
714
|
+
annotationId: {
|
|
715
|
+
type: "integer",
|
|
716
|
+
description: "対象 annotation の id (= AUTOINCREMENT 数値)",
|
|
717
|
+
minimum: 1,
|
|
718
|
+
},
|
|
719
|
+
annotationText: {
|
|
720
|
+
type: "string",
|
|
721
|
+
description: "新しいコメント (0-2000 文字)。 省略で 既存値維持",
|
|
722
|
+
maxLength: 2000,
|
|
723
|
+
},
|
|
724
|
+
label: {
|
|
725
|
+
type: "string",
|
|
726
|
+
description: "新しいラベル (0-50 文字、 英数 + _ - のみ)。 省略で 既存値維持",
|
|
727
|
+
maxLength: 50,
|
|
728
|
+
pattern: "^[A-Za-z0-9_-]{0,50}$",
|
|
729
|
+
},
|
|
730
|
+
qualityScore: {
|
|
731
|
+
type: "integer",
|
|
732
|
+
description: "新しい品質スコア (= 1-5)。 省略で 既存値維持",
|
|
733
|
+
minimum: 1,
|
|
734
|
+
maximum: 5,
|
|
735
|
+
},
|
|
736
|
+
},
|
|
737
|
+
},
|
|
738
|
+
},
|
|
739
|
+
{
|
|
740
|
+
name: "delete_annotation",
|
|
741
|
+
description: "annotation を 削除する (= DELETE /v1/annotations/:id)。 関連 row は他に存在しないので CASCADE 影響なし。 " +
|
|
742
|
+
"誤削除防御のため、 必要に応じて事前に get_annotation で 詳細確認を推奨。",
|
|
743
|
+
inputSchema: {
|
|
744
|
+
type: "object",
|
|
745
|
+
additionalProperties: false,
|
|
746
|
+
required: ["annotationId"],
|
|
747
|
+
properties: {
|
|
748
|
+
annotationId: {
|
|
749
|
+
type: "integer",
|
|
750
|
+
description: "対象 annotation の id (= AUTOINCREMENT 数値)",
|
|
751
|
+
minimum: 1,
|
|
752
|
+
},
|
|
753
|
+
},
|
|
754
|
+
},
|
|
755
|
+
},
|
|
483
756
|
{
|
|
484
757
|
name: "list_eval_criteria",
|
|
485
758
|
description: "LLM-as-judge の評価軸 (criteria) 一覧を返す。 global default 5 軸 " +
|
|
@@ -513,130 +786,789 @@ export const tools = [
|
|
|
513
786
|
},
|
|
514
787
|
},
|
|
515
788
|
{
|
|
516
|
-
name: "
|
|
517
|
-
description: "
|
|
518
|
-
"
|
|
519
|
-
"
|
|
520
|
-
"
|
|
521
|
-
"登録した prompt を AI agent が 直接読んで 使う 主要 path。",
|
|
789
|
+
name: "create_eval_criterion",
|
|
790
|
+
description: "自 account custom eval criterion を 1 件作成する (= Pro+ 専用、 v1.6 #13-2)。 " +
|
|
791
|
+
"name + rubric + scaleMin + scaleMax の 4 field 全部必須。 同 account 内で 同 name 既存 = 409。 " +
|
|
792
|
+
"global default と同 name は 構造的に重複可 (= UNIQUE (account_id, name) で account_id IS NULL と分離)。 " +
|
|
793
|
+
"AI agent が dogfood eval で 「この基準を追加」 と判断した時の axis 1 path。",
|
|
522
794
|
inputSchema: {
|
|
523
795
|
type: "object",
|
|
524
796
|
additionalProperties: false,
|
|
797
|
+
required: ["name", "rubric", "scaleMin", "scaleMax"],
|
|
525
798
|
properties: {
|
|
526
|
-
|
|
799
|
+
name: {
|
|
527
800
|
type: "string",
|
|
528
|
-
description: "
|
|
529
|
-
pattern: "^[A-Za-z0-9][A-Za-z0-
|
|
801
|
+
description: "criterion 名 (= 1-50 文字、 英数始まり、 [A-Za-z0-9 _\\-.] のみ)。 例 'helpfulness' / 'concise'",
|
|
802
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9 _\\-.]{0,49}$",
|
|
803
|
+
minLength: 1,
|
|
804
|
+
maxLength: 50,
|
|
530
805
|
},
|
|
531
|
-
|
|
806
|
+
rubric: {
|
|
532
807
|
type: "string",
|
|
533
|
-
description: "
|
|
534
|
-
|
|
808
|
+
description: "scoring rubric 本文 (= 10-2000 文字、 judge LLM が score の根拠とする narrative)",
|
|
809
|
+
minLength: 10,
|
|
810
|
+
maxLength: 2000,
|
|
535
811
|
},
|
|
536
|
-
|
|
812
|
+
scaleMin: {
|
|
537
813
|
type: "integer",
|
|
538
|
-
description: "
|
|
814
|
+
description: "score 下限 (= 1-100 範囲、 scaleMax より小さい)",
|
|
539
815
|
minimum: 1,
|
|
540
|
-
maximum:
|
|
541
|
-
default: 200,
|
|
816
|
+
maximum: 100,
|
|
542
817
|
},
|
|
543
|
-
|
|
544
|
-
},
|
|
545
|
-
},
|
|
546
|
-
{
|
|
547
|
-
name: "get_prompt",
|
|
548
|
-
description: "指定 prompt id の詳細を 1 件取得する。 id は list_prompts の prompts[].id を そのまま使う。 " +
|
|
549
|
-
"template + variables + labels + description を含む、 自 account scope (= backend WHERE 句で 構造防御、 他 account の id は 404)。 " +
|
|
550
|
-
"argosvix://prompts/{id} resource template と 同 endpoint。",
|
|
551
|
-
inputSchema: {
|
|
552
|
-
type: "object",
|
|
553
|
-
additionalProperties: false,
|
|
554
|
-
required: ["promptId"],
|
|
555
|
-
properties: {
|
|
556
|
-
promptId: {
|
|
818
|
+
scaleMax: {
|
|
557
819
|
type: "integer",
|
|
558
|
-
description: "
|
|
820
|
+
description: "score 上限 (= 1-100 範囲、 scaleMin より大きい)",
|
|
559
821
|
minimum: 1,
|
|
822
|
+
maximum: 100,
|
|
560
823
|
},
|
|
561
824
|
},
|
|
562
825
|
},
|
|
563
826
|
},
|
|
564
827
|
{
|
|
565
|
-
name: "
|
|
566
|
-
description: "
|
|
567
|
-
"
|
|
568
|
-
"
|
|
569
|
-
"
|
|
828
|
+
name: "update_eval_criterion",
|
|
829
|
+
description: "自 account custom criterion を full replace で更新する (= Pro+ 専用、 PATCH /v1/eval-criteria/:id)。 " +
|
|
830
|
+
"name + rubric + scaleMin + scaleMax の 4 field 全部必須 (= 部分更新ではない、 全 field 上書き)。 " +
|
|
831
|
+
"global default (= account_id IS NULL) は 構造的に対象外 (= 404)、 他 account の custom も 404。 " +
|
|
832
|
+
"同 account 内 で 同 name 衝突 = 409。",
|
|
570
833
|
inputSchema: {
|
|
571
834
|
type: "object",
|
|
572
835
|
additionalProperties: false,
|
|
836
|
+
required: ["criterionId", "name", "rubric", "scaleMin", "scaleMax"],
|
|
573
837
|
properties: {
|
|
574
|
-
|
|
838
|
+
criterionId: {
|
|
839
|
+
type: "integer",
|
|
840
|
+
description: "対象 criterion の id (= list_eval_criteria.criteria[].id)",
|
|
841
|
+
minimum: 1,
|
|
842
|
+
},
|
|
843
|
+
name: {
|
|
575
844
|
type: "string",
|
|
576
|
-
description: "
|
|
577
|
-
pattern: "^[A-Za-z0-
|
|
845
|
+
description: "新 name (= 1-50 文字、 英数始まり、 [A-Za-z0-9 _\\-.] のみ)",
|
|
846
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9 _\\-.]{0,49}$",
|
|
847
|
+
minLength: 1,
|
|
848
|
+
maxLength: 50,
|
|
578
849
|
},
|
|
579
|
-
|
|
850
|
+
rubric: {
|
|
851
|
+
type: "string",
|
|
852
|
+
description: "新 rubric (= 10-2000 文字)",
|
|
853
|
+
minLength: 10,
|
|
854
|
+
maxLength: 2000,
|
|
855
|
+
},
|
|
856
|
+
scaleMin: {
|
|
580
857
|
type: "integer",
|
|
581
|
-
description: "
|
|
858
|
+
description: "新 scaleMin (= 1-100、 scaleMax より小さい)",
|
|
582
859
|
minimum: 1,
|
|
583
|
-
maximum:
|
|
584
|
-
|
|
860
|
+
maximum: 100,
|
|
861
|
+
},
|
|
862
|
+
scaleMax: {
|
|
863
|
+
type: "integer",
|
|
864
|
+
description: "新 scaleMax (= 1-100、 scaleMin より大きい)",
|
|
865
|
+
minimum: 1,
|
|
866
|
+
maximum: 100,
|
|
585
867
|
},
|
|
586
868
|
},
|
|
587
869
|
},
|
|
588
870
|
},
|
|
589
871
|
{
|
|
590
|
-
name: "
|
|
591
|
-
description: "
|
|
592
|
-
"
|
|
593
|
-
"
|
|
872
|
+
name: "get_llm_budget",
|
|
873
|
+
description: "現在の LLM feature 月予算 (= safety classifier + PII 二次 audit + eval baseline runner の 3 軸 LLM cost cap) を取得する (= v1.6 #13-7)。 " +
|
|
874
|
+
"response = { budgetUsd, spentUsd, remainingUsd, periodStart, defaultBudgetUsd, minBudgetUsd, maxBudgetUsd }。 " +
|
|
875
|
+
"Free / Pro+ 共通で読み取り可能、 「予算 80% 到達したか?」 「raise すべきか?」 を AI agent が 判断する path で使う。 " +
|
|
876
|
+
"default = $5/月、 月跨ぎ (= YYYY-MM 単位) で 自動 reset。",
|
|
594
877
|
inputSchema: {
|
|
595
878
|
type: "object",
|
|
596
879
|
additionalProperties: false,
|
|
597
|
-
|
|
598
|
-
properties: {
|
|
599
|
-
assessmentId: {
|
|
600
|
-
type: "integer",
|
|
601
|
-
description: "対象 assessment の id (= AUTOINCREMENT 数値)",
|
|
602
|
-
minimum: 1,
|
|
603
|
-
},
|
|
604
|
-
},
|
|
880
|
+
properties: {},
|
|
605
881
|
},
|
|
606
882
|
},
|
|
607
883
|
{
|
|
608
|
-
name: "
|
|
609
|
-
description: "
|
|
610
|
-
"
|
|
611
|
-
"
|
|
884
|
+
name: "raise_llm_budget",
|
|
885
|
+
description: "LLM feature 月予算 を 引き上げる / 引き下げる (= Pro+ 専用、 v1.6 #13-7)。 " +
|
|
886
|
+
"range = $5 - $500 (= hard cap で runaway 防御)、 0.01 USD 単位。 既存 spent は そのまま carry、 月跨ぎ で 自動 reset。 " +
|
|
887
|
+
"用例: 「予算が 80% 到達した、 今月だけ $30 に上げて」 / 「使いすぎたから来月は $10 に下げて」。 " +
|
|
888
|
+
"新規 値 < 現 spent でも accept (= remaining が 0 になるだけ、 月跨ぎで 0 から計上 carry)。",
|
|
612
889
|
inputSchema: {
|
|
613
890
|
type: "object",
|
|
614
891
|
additionalProperties: false,
|
|
892
|
+
required: ["budgetUsd"],
|
|
615
893
|
properties: {
|
|
616
|
-
|
|
617
|
-
type: "
|
|
618
|
-
description: "
|
|
619
|
-
minimum:
|
|
620
|
-
maximum:
|
|
621
|
-
default: 20,
|
|
894
|
+
budgetUsd: {
|
|
895
|
+
type: "number",
|
|
896
|
+
description: "新 月予算 USD (= 5-500、 0.01 単位)。 例 30 / 50.5 / 100",
|
|
897
|
+
minimum: 5,
|
|
898
|
+
maximum: 500,
|
|
622
899
|
},
|
|
623
900
|
},
|
|
624
901
|
},
|
|
625
902
|
},
|
|
626
903
|
{
|
|
627
|
-
name: "
|
|
628
|
-
description: "指定
|
|
629
|
-
"
|
|
630
|
-
"
|
|
904
|
+
name: "test_webhook",
|
|
905
|
+
description: "指定 URL に 1 件 fabricated alert を 試送する (= Pro+ 専用、 v1.6 #13-5)。 " +
|
|
906
|
+
"user が webhook URL を 登録する 前 に 「届くか」 を 確認する 主用途。 " +
|
|
907
|
+
"SSRF 防御で https 必須 + private / loopback / cloud metadata IP は reject。 " +
|
|
908
|
+
"secret 指定時は HMAC-SHA256 署名 (= X-Argosvix-Signature) を 添付。 " +
|
|
909
|
+
"rate limit = account 単位 5/分 (= 60s sliding window、 worker instance 越境で 超過余地あり)。 " +
|
|
910
|
+
"response.delivered = receiver が 5s 以内に 2xx 返した か。 false の場合は 「URL 不正 / timeout / 5xx / network error」 のいずれか。",
|
|
631
911
|
inputSchema: {
|
|
632
912
|
type: "object",
|
|
633
913
|
additionalProperties: false,
|
|
634
|
-
required: ["
|
|
914
|
+
required: ["url"],
|
|
635
915
|
properties: {
|
|
636
|
-
|
|
637
|
-
type: "
|
|
638
|
-
description: "
|
|
639
|
-
|
|
916
|
+
url: {
|
|
917
|
+
type: "string",
|
|
918
|
+
description: "送信先 webhook URL (= https、 SSRF 防御済、 1-500 chars)",
|
|
919
|
+
minLength: 1,
|
|
920
|
+
maxLength: 500,
|
|
921
|
+
},
|
|
922
|
+
secret: {
|
|
923
|
+
type: "string",
|
|
924
|
+
description: "HMAC-SHA256 署名用 secret (= 任意、 1-256 chars、 receiver 側で X-Argosvix-Signature 検証)",
|
|
925
|
+
minLength: 1,
|
|
926
|
+
maxLength: 256,
|
|
927
|
+
},
|
|
928
|
+
alertName: {
|
|
929
|
+
type: "string",
|
|
930
|
+
description: "fabricated alert の name (= 任意、 1-64 chars、 [A-Za-z0-9 _\\-.] のみ)。 省略時は 'argosvix test alert'",
|
|
931
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9 _\\-.]{0,63}$",
|
|
932
|
+
minLength: 1,
|
|
933
|
+
maxLength: 64,
|
|
934
|
+
},
|
|
935
|
+
},
|
|
936
|
+
},
|
|
937
|
+
},
|
|
938
|
+
{
|
|
939
|
+
name: "delete_eval_criterion",
|
|
940
|
+
description: "自 account custom criterion を 削除する (= Pro+ 専用、 DELETE /v1/eval-criteria/:id、 204)。 " +
|
|
941
|
+
"global default (= account_id IS NULL) は 構造防御で対象外 = 404、 他 account も 404。 " +
|
|
942
|
+
"⚠ 過去全 eval_run の 該当 criterion score 行 (= eval_scores) も ON DELETE CASCADE で 同時に物理削除される、 " +
|
|
943
|
+
"履歴比較や score 推移分析が 永久に不可になる。 " +
|
|
944
|
+
"AI agent が 「criterion 整理」 で 軽い気持ちで 呼ぶ tool ではない、 過去 run の 該当 score が要らないと user が明示確認した時のみ carry。 " +
|
|
945
|
+
"rename したい だけ なら update_eval_criterion (= full replace) で name + rubric + scaleMin + scaleMax を carry する 方が 履歴を 失わずに 済む。",
|
|
946
|
+
inputSchema: {
|
|
947
|
+
type: "object",
|
|
948
|
+
additionalProperties: false,
|
|
949
|
+
required: ["criterionId"],
|
|
950
|
+
properties: {
|
|
951
|
+
criterionId: {
|
|
952
|
+
type: "integer",
|
|
953
|
+
description: "対象 criterion の id (= list_eval_criteria.criteria[].id)",
|
|
954
|
+
minimum: 1,
|
|
955
|
+
},
|
|
956
|
+
},
|
|
957
|
+
},
|
|
958
|
+
},
|
|
959
|
+
{
|
|
960
|
+
name: "list_prompts",
|
|
961
|
+
description: "user が登録した prompt template の一覧を返す (= migration 0038 prompt_registry、 v1.5 Round F)。 " +
|
|
962
|
+
"各 prompt は id / name / version / template / variables / labels / description / createdAt を含む。 " +
|
|
963
|
+
"「production」 等の label で filter (= ?label=xxx) ま と は 同 name の全 version 取得 " +
|
|
964
|
+
"(= ?name=xxx) が可能。 上限 200 件、 sort = name ASC + created_at DESC。 user が dashboard で " +
|
|
965
|
+
"登録した prompt を AI agent が 直接読んで 使う 主要 path。",
|
|
966
|
+
inputSchema: {
|
|
967
|
+
type: "object",
|
|
968
|
+
additionalProperties: false,
|
|
969
|
+
properties: {
|
|
970
|
+
label: {
|
|
971
|
+
type: "string",
|
|
972
|
+
description: "label filter (= 例 'production' / 'staging' / 'experiment')。 完全一致。",
|
|
973
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,31}$",
|
|
974
|
+
},
|
|
975
|
+
name: {
|
|
976
|
+
type: "string",
|
|
977
|
+
description: "name filter (= 同 name の全 version を 取得)。 完全一致。",
|
|
978
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$",
|
|
979
|
+
},
|
|
980
|
+
limit: {
|
|
981
|
+
type: "integer",
|
|
982
|
+
description: "返却 prompt 数 (1-200、 デフォルト 200)",
|
|
983
|
+
minimum: 1,
|
|
984
|
+
maximum: 200,
|
|
985
|
+
default: 200,
|
|
986
|
+
},
|
|
987
|
+
},
|
|
988
|
+
},
|
|
989
|
+
},
|
|
990
|
+
{
|
|
991
|
+
name: "get_prompt",
|
|
992
|
+
description: "指定 prompt id の詳細を 1 件取得する。 id は list_prompts の prompts[].id を そのまま使う。 " +
|
|
993
|
+
"template + variables + labels + description を含む、 自 account scope (= backend WHERE 句で 構造防御、 他 account の id は 404)。 " +
|
|
994
|
+
"argosvix://prompts/{id} resource template と 同 endpoint。",
|
|
995
|
+
inputSchema: {
|
|
996
|
+
type: "object",
|
|
997
|
+
additionalProperties: false,
|
|
998
|
+
required: ["promptId"],
|
|
999
|
+
properties: {
|
|
1000
|
+
promptId: {
|
|
1001
|
+
type: "integer",
|
|
1002
|
+
description: "対象 prompt の id (= AUTOINCREMENT 数値)",
|
|
1003
|
+
minimum: 1,
|
|
1004
|
+
},
|
|
1005
|
+
},
|
|
1006
|
+
},
|
|
1007
|
+
},
|
|
1008
|
+
{
|
|
1009
|
+
name: "create_prompt",
|
|
1010
|
+
description: "新 prompt template を 1 件登録する (= Pro+ 専用、 v1.6 #13-1)。 name + version + template が 必須、 " +
|
|
1011
|
+
"variables / labels / description は 任意。 同 (name, version) が 既存 = 409 を 返す (= UNIQUE 制約)。 " +
|
|
1012
|
+
"AI agent が dogfood eval / experiment 用に template を 自動登録する path で 使う。",
|
|
1013
|
+
inputSchema: {
|
|
1014
|
+
type: "object",
|
|
1015
|
+
additionalProperties: false,
|
|
1016
|
+
required: ["name", "version", "template"],
|
|
1017
|
+
properties: {
|
|
1018
|
+
name: {
|
|
1019
|
+
type: "string",
|
|
1020
|
+
description: "prompt 名 (= 同 series 識別子、 [A-Za-z0-9][A-Za-z0-9_-]{0,63})。 例 'customer_support'",
|
|
1021
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$",
|
|
1022
|
+
},
|
|
1023
|
+
version: {
|
|
1024
|
+
type: "string",
|
|
1025
|
+
description: "version 識別子 (= [A-Za-z0-9][A-Za-z0-9._-]{0,63})。 例 'v1' / '1.0.2' / '2026-06-03'",
|
|
1026
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$",
|
|
1027
|
+
},
|
|
1028
|
+
template: {
|
|
1029
|
+
type: "string",
|
|
1030
|
+
description: "prompt body 本文 (= 非空、 最大 50000 文字)。 {{var}} で variables 補完。",
|
|
1031
|
+
minLength: 1,
|
|
1032
|
+
maxLength: 50000,
|
|
1033
|
+
},
|
|
1034
|
+
variables: {
|
|
1035
|
+
type: "object",
|
|
1036
|
+
description: "template 内 {{var}} の default 値 (= plain object、 JSON 化後 4096 bytes 上限)。 任意。",
|
|
1037
|
+
additionalProperties: true,
|
|
1038
|
+
},
|
|
1039
|
+
labels: {
|
|
1040
|
+
type: "array",
|
|
1041
|
+
description: "label 配列 (= 最大 8 件、 各 [A-Za-z0-9][A-Za-z0-9_-]{0,31})。 例 ['production', 'staging']。",
|
|
1042
|
+
items: { type: "string", pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,31}$" },
|
|
1043
|
+
maxItems: 8,
|
|
1044
|
+
},
|
|
1045
|
+
description: {
|
|
1046
|
+
type: "string",
|
|
1047
|
+
description: "説明文 (= 最大 500 文字)。 任意。",
|
|
1048
|
+
maxLength: 500,
|
|
1049
|
+
},
|
|
1050
|
+
},
|
|
1051
|
+
},
|
|
1052
|
+
},
|
|
1053
|
+
{
|
|
1054
|
+
name: "update_prompt",
|
|
1055
|
+
description: "既存 prompt の template / variables / labels / description を 部分更新する (= Pro+ 専用、 PATCH /v1/prompts/:id)。 " +
|
|
1056
|
+
"name + version は immutable (= 変更は rename_prompt 経由)。 promptId 必須、 残 field は 指定 した もの のみ 更新。 " +
|
|
1057
|
+
"AI agent が label の付け替え (= 'staging' → 'production' 昇格) や 微修正 patch path で 使う。",
|
|
1058
|
+
inputSchema: {
|
|
1059
|
+
type: "object",
|
|
1060
|
+
additionalProperties: false,
|
|
1061
|
+
required: ["promptId"],
|
|
1062
|
+
properties: {
|
|
1063
|
+
promptId: {
|
|
1064
|
+
type: "integer",
|
|
1065
|
+
description: "対象 prompt の id (= list_prompts.prompts[].id)",
|
|
1066
|
+
minimum: 1,
|
|
1067
|
+
},
|
|
1068
|
+
template: {
|
|
1069
|
+
type: "string",
|
|
1070
|
+
description: "新 template 本文 (= 非空、 最大 50000 文字)。",
|
|
1071
|
+
minLength: 1,
|
|
1072
|
+
maxLength: 50000,
|
|
1073
|
+
},
|
|
1074
|
+
variables: {
|
|
1075
|
+
type: "object",
|
|
1076
|
+
description: "新 variables (= plain object、 null で 全消し)。",
|
|
1077
|
+
additionalProperties: true,
|
|
1078
|
+
},
|
|
1079
|
+
labels: {
|
|
1080
|
+
type: "array",
|
|
1081
|
+
description: "新 labels (= 完全置換、 最大 8 件、 各 [A-Za-z0-9][A-Za-z0-9_-]{0,31})。",
|
|
1082
|
+
items: { type: "string", pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,31}$" },
|
|
1083
|
+
maxItems: 8,
|
|
1084
|
+
},
|
|
1085
|
+
description: {
|
|
1086
|
+
type: "string",
|
|
1087
|
+
description: "新 description (= 1-500 文字)。 既存 description を 明示的に clear したい場合は この field を 渡さずに 他 field のみ で PATCH する (= 空文字 '' は schema 拒否、 LLM の hallucination で 既存 description が 消える 事故防止)。",
|
|
1088
|
+
minLength: 1,
|
|
1089
|
+
maxLength: 500,
|
|
1090
|
+
},
|
|
1091
|
+
},
|
|
1092
|
+
},
|
|
1093
|
+
},
|
|
1094
|
+
{
|
|
1095
|
+
name: "rename_prompt",
|
|
1096
|
+
description: "既存 prompt の name + version を 変更する (= Pro+ 専用、 POST /v1/prompts/:id/rename)。 " +
|
|
1097
|
+
"typo 修正軸 (= 'customer_supprt' → 'customer_support') が 主用途。 同 account 内で 既存 (name, version) と 衝突 = 409。 " +
|
|
1098
|
+
"update_prompt が name/version を 変えない 規約な ので、 rename は 別 tool で 意味的分離。",
|
|
1099
|
+
inputSchema: {
|
|
1100
|
+
type: "object",
|
|
1101
|
+
additionalProperties: false,
|
|
1102
|
+
required: ["promptId", "name", "version"],
|
|
1103
|
+
properties: {
|
|
1104
|
+
promptId: {
|
|
1105
|
+
type: "integer",
|
|
1106
|
+
description: "対象 prompt の id (= list_prompts.prompts[].id)",
|
|
1107
|
+
minimum: 1,
|
|
1108
|
+
},
|
|
1109
|
+
name: {
|
|
1110
|
+
type: "string",
|
|
1111
|
+
description: "新 name (= [A-Za-z0-9][A-Za-z0-9_-]{0,63})",
|
|
1112
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$",
|
|
1113
|
+
},
|
|
1114
|
+
version: {
|
|
1115
|
+
type: "string",
|
|
1116
|
+
description: "新 version (= [A-Za-z0-9][A-Za-z0-9._-]{0,63})",
|
|
1117
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9._-]{0,63}$",
|
|
1118
|
+
},
|
|
1119
|
+
},
|
|
1120
|
+
},
|
|
1121
|
+
},
|
|
1122
|
+
{
|
|
1123
|
+
name: "delete_prompt",
|
|
1124
|
+
description: "既存 prompt を 削除する (= Pro+ 専用、 DELETE /v1/prompts/:id、 204 No Content)。 " +
|
|
1125
|
+
"自 account scope (= 他 account の id は 404)。 ⚠ 物理削除な ので 復元不可、 " +
|
|
1126
|
+
"過去の eval_runs の prompt_registry_id は SET NULL になり 「どの prompt template で run したか」 の trace が失われる (= 履歴比較で 紐付き 不可)。 " +
|
|
1127
|
+
"AI agent は 「rotation で 旧 version を sunset」 で 使う 場合 でも、 過去 run trace が 残っている うちは update_prompt で labels を 'sunset' 等に carry して論理 sunset する 方が安全。",
|
|
1128
|
+
inputSchema: {
|
|
1129
|
+
type: "object",
|
|
1130
|
+
additionalProperties: false,
|
|
1131
|
+
required: ["promptId"],
|
|
1132
|
+
properties: {
|
|
1133
|
+
promptId: {
|
|
1134
|
+
type: "integer",
|
|
1135
|
+
description: "対象 prompt の id (= list_prompts.prompts[].id)",
|
|
1136
|
+
minimum: 1,
|
|
1137
|
+
},
|
|
1138
|
+
},
|
|
1139
|
+
},
|
|
1140
|
+
},
|
|
1141
|
+
{
|
|
1142
|
+
name: "list_safety_assessments",
|
|
1143
|
+
description: "safety classifier (= OpenAI Moderation cron) が 書き込んだ assessment を 一覧取得する。 " +
|
|
1144
|
+
"callId 指定 = 同 call の 全 classifier assessment、 callId 省略 = 自 account 全体で flagged 優先 + 直近 ORDER。 " +
|
|
1145
|
+
"OPENAI_API_KEY 未 provision の environment では cron が走らず空配列。 " +
|
|
1146
|
+
"AI agent は 「最近 flagged な call の review」 や 「特定 call の policy 違反候補確認」 path で使う。",
|
|
1147
|
+
inputSchema: {
|
|
1148
|
+
type: "object",
|
|
1149
|
+
additionalProperties: false,
|
|
1150
|
+
properties: {
|
|
1151
|
+
callId: {
|
|
1152
|
+
type: "string",
|
|
1153
|
+
description: "対象 call id (= llm_calls.id、 [A-Za-z0-9_-]{1,128})。 省略で account 全体。",
|
|
1154
|
+
pattern: "^[A-Za-z0-9_-]{1,128}$",
|
|
1155
|
+
},
|
|
1156
|
+
limit: {
|
|
1157
|
+
type: "integer",
|
|
1158
|
+
description: "返却件数 (1-200、 デフォルト 50)",
|
|
1159
|
+
minimum: 1,
|
|
1160
|
+
maximum: 200,
|
|
1161
|
+
default: 50,
|
|
1162
|
+
},
|
|
1163
|
+
},
|
|
1164
|
+
},
|
|
1165
|
+
},
|
|
1166
|
+
{
|
|
1167
|
+
name: "get_safety_assessment",
|
|
1168
|
+
description: "指定 assessment id の 1 件 detail を 取得する。 id は list_safety_assessments の assessments[].id を そのまま使う。 " +
|
|
1169
|
+
"labels (= flagged category 配列) + score (= max category score 0-1) + reasoning + classifier_id + source を含む。 " +
|
|
1170
|
+
"argosvix://safety-assessments/{id} resource template と 同 endpoint。",
|
|
1171
|
+
inputSchema: {
|
|
1172
|
+
type: "object",
|
|
1173
|
+
additionalProperties: false,
|
|
1174
|
+
required: ["assessmentId"],
|
|
1175
|
+
properties: {
|
|
1176
|
+
assessmentId: {
|
|
1177
|
+
type: "integer",
|
|
1178
|
+
description: "対象 assessment の id (= AUTOINCREMENT 数値)",
|
|
1179
|
+
minimum: 1,
|
|
1180
|
+
},
|
|
1181
|
+
},
|
|
1182
|
+
},
|
|
1183
|
+
},
|
|
1184
|
+
{
|
|
1185
|
+
name: "list_eval_runs",
|
|
1186
|
+
description: "eval baseline runner の run 履歴を 一覧取得する。 自 account 限定、 直近 ORDER。 " +
|
|
1187
|
+
"summary.scoredCount / failedCount / meanScoreByCriterion を含むので、 AI agent は 「直近の eval 結果サマリ」 や 「criterion 別の score 推移」 を 1 件で把握できる。 " +
|
|
1188
|
+
"Free user でも 過去 run が あれば 読み取り可能。",
|
|
1189
|
+
inputSchema: {
|
|
1190
|
+
type: "object",
|
|
1191
|
+
additionalProperties: false,
|
|
1192
|
+
properties: {
|
|
1193
|
+
limit: {
|
|
1194
|
+
type: "integer",
|
|
1195
|
+
description: "返却件数 (1-50、 デフォルト 20)",
|
|
1196
|
+
minimum: 1,
|
|
1197
|
+
maximum: 50,
|
|
1198
|
+
default: 20,
|
|
1199
|
+
},
|
|
1200
|
+
},
|
|
1201
|
+
},
|
|
1202
|
+
},
|
|
1203
|
+
{
|
|
1204
|
+
name: "get_eval_run",
|
|
1205
|
+
description: "指定 eval run の detail + 各 (criterion × call) score 一覧 を 取得する。 id は list_eval_runs の runs[].id を そのまま使う。 " +
|
|
1206
|
+
"scores 配列に score (= criterion scale 内 integer) + reasoning (= judge の理由 narrative) を含む。 " +
|
|
1207
|
+
"argosvix://eval-runs/{id} resource template と 同 endpoint。",
|
|
1208
|
+
inputSchema: {
|
|
1209
|
+
type: "object",
|
|
1210
|
+
additionalProperties: false,
|
|
1211
|
+
required: ["runId"],
|
|
1212
|
+
properties: {
|
|
1213
|
+
runId: {
|
|
1214
|
+
type: "integer",
|
|
1215
|
+
description: "対象 eval run の id (= AUTOINCREMENT 数値)",
|
|
1216
|
+
minimum: 1,
|
|
1217
|
+
},
|
|
1218
|
+
},
|
|
1219
|
+
},
|
|
1220
|
+
},
|
|
1221
|
+
{
|
|
1222
|
+
name: "get_percentiles",
|
|
1223
|
+
description: "calls の percentile metrics を 取得 (= POST /v1/query/percentiles)。 metric = 'latency' (= レイテンシ ms) or 'cost' (= USD)、 全期間 1 数値 or groupBy='day'/'hour'/'minute' で 時系列 series。 " +
|
|
1224
|
+
"AI agent が 「先週の p95 latency 推移を 日次で」 narrative で carry。 D1 SQLite (= percentile_cont 不在) で window function 経由 nearest-rank 法 計算。",
|
|
1225
|
+
inputSchema: {
|
|
1226
|
+
type: "object",
|
|
1227
|
+
additionalProperties: false,
|
|
1228
|
+
properties: {
|
|
1229
|
+
startTime: {
|
|
1230
|
+
type: "string",
|
|
1231
|
+
description: "範囲開始 ISO timestamp (= UTC、 省略 = 全期間)",
|
|
1232
|
+
},
|
|
1233
|
+
endTime: {
|
|
1234
|
+
type: "string",
|
|
1235
|
+
description: "範囲終了 ISO timestamp",
|
|
1236
|
+
},
|
|
1237
|
+
provider: { type: "string", description: "provider filter" },
|
|
1238
|
+
model: { type: "string", description: "model filter" },
|
|
1239
|
+
metric: {
|
|
1240
|
+
type: "string",
|
|
1241
|
+
description: "metric 種別、 default = 'latency'",
|
|
1242
|
+
enum: ["latency", "cost"],
|
|
1243
|
+
default: "latency",
|
|
1244
|
+
},
|
|
1245
|
+
groupBy: {
|
|
1246
|
+
type: "string",
|
|
1247
|
+
description: "時系列 分割 軸 (省略 = 全期間 1 数値、 'day' = 日次、 'hour' = 時間別、 'minute' = 分別)",
|
|
1248
|
+
enum: ["day", "hour", "minute"],
|
|
1249
|
+
},
|
|
1250
|
+
},
|
|
1251
|
+
},
|
|
1252
|
+
},
|
|
1253
|
+
{
|
|
1254
|
+
name: "list_projects",
|
|
1255
|
+
description: "自 account の active projects を 一覧取得 (= GET /v1/projects、 archived 除外)。 " +
|
|
1256
|
+
"v1.5 project switcher narrative で、 AI agent が 「dev / staging / prod 環境別 観測」 carry。 Pro 5 件 / Team unlimited、 Free は default のみ。",
|
|
1257
|
+
inputSchema: {
|
|
1258
|
+
type: "object",
|
|
1259
|
+
additionalProperties: false,
|
|
1260
|
+
properties: {},
|
|
1261
|
+
},
|
|
1262
|
+
},
|
|
1263
|
+
{
|
|
1264
|
+
name: "create_project",
|
|
1265
|
+
description: "新規 project を 作成 (= POST /v1/projects)。 name = 表示名、 slug = URL-safe 短い識別子 (= /^[a-z][a-z0-9-]{0,31}$/)。 " +
|
|
1266
|
+
"Pro 5 件上限、 Team unlimited、 Free は 不可 (= 403)。 mutation = session 認証時 Origin/Referer 強制 (= dashboard 経由前提)。",
|
|
1267
|
+
inputSchema: {
|
|
1268
|
+
type: "object",
|
|
1269
|
+
additionalProperties: false,
|
|
1270
|
+
required: ["name", "slug"],
|
|
1271
|
+
properties: {
|
|
1272
|
+
name: {
|
|
1273
|
+
type: "string",
|
|
1274
|
+
description: "project 表示名 (1-64 文字)",
|
|
1275
|
+
minLength: 1,
|
|
1276
|
+
maxLength: 64,
|
|
1277
|
+
},
|
|
1278
|
+
slug: {
|
|
1279
|
+
type: "string",
|
|
1280
|
+
description: "URL-safe 短い識別子 (= /^[a-z][a-z0-9-]{0,31}$/、 32 字以内、 先頭小文字、 hyphens 可)",
|
|
1281
|
+
pattern: "^[a-z][a-z0-9-]{0,31}$",
|
|
1282
|
+
},
|
|
1283
|
+
},
|
|
1284
|
+
},
|
|
1285
|
+
},
|
|
1286
|
+
{
|
|
1287
|
+
name: "rename_project",
|
|
1288
|
+
description: "既存 project の name / slug を 更新 (= PATCH /v1/projects/:id)。 name と slug は どちらか一方 / 両方指定可。 slug は URL-safe 制約 (= /^[a-z][a-z0-9-]{0,31}$/)。 default project の rename は 許可。",
|
|
1289
|
+
inputSchema: {
|
|
1290
|
+
type: "object",
|
|
1291
|
+
additionalProperties: false,
|
|
1292
|
+
required: ["projectId"],
|
|
1293
|
+
properties: {
|
|
1294
|
+
projectId: {
|
|
1295
|
+
type: "string",
|
|
1296
|
+
description: "対象 project の id (= list_projects で 取得した UUID)",
|
|
1297
|
+
minLength: 1,
|
|
1298
|
+
maxLength: 64,
|
|
1299
|
+
},
|
|
1300
|
+
name: {
|
|
1301
|
+
type: "string",
|
|
1302
|
+
description: "新しい表示名 (省略時 不変)",
|
|
1303
|
+
minLength: 1,
|
|
1304
|
+
maxLength: 64,
|
|
1305
|
+
},
|
|
1306
|
+
slug: {
|
|
1307
|
+
type: "string",
|
|
1308
|
+
description: "新しい slug (省略時 不変、 /^[a-z][a-z0-9-]{0,31}$/)",
|
|
1309
|
+
pattern: "^[a-z][a-z0-9-]{0,31}$",
|
|
1310
|
+
},
|
|
1311
|
+
},
|
|
1312
|
+
},
|
|
1313
|
+
},
|
|
1314
|
+
{
|
|
1315
|
+
name: "delete_project",
|
|
1316
|
+
description: "project を soft delete (= DELETE /v1/projects/:id、 archived_at 設定で 論理削除)。 default project は 削除不可 (= accounts.default_project_id 参照 整合 のため 400)。 " +
|
|
1317
|
+
"archived 後 calls / alerts は そのまま (= 過去観測は keep)、 新規 record は 別 project に carry する narrative。",
|
|
1318
|
+
inputSchema: {
|
|
1319
|
+
type: "object",
|
|
1320
|
+
additionalProperties: false,
|
|
1321
|
+
required: ["projectId"],
|
|
1322
|
+
properties: {
|
|
1323
|
+
projectId: {
|
|
1324
|
+
type: "string",
|
|
1325
|
+
description: "削除対象 project の id (= list_projects 経由)",
|
|
1326
|
+
minLength: 1,
|
|
1327
|
+
maxLength: 64,
|
|
1328
|
+
},
|
|
1329
|
+
},
|
|
1330
|
+
},
|
|
1331
|
+
},
|
|
1332
|
+
{
|
|
1333
|
+
name: "aggregate_calls",
|
|
1334
|
+
description: "calls の 集計 cube を 取得 (= POST /v1/query/aggregate)。 groupBy (= provider / model / day / hour / minute / tag) × metric (= cost / latency / tokens / count / error_rate) で 1 call で AI agent が 「今月の cost を model 別 に集計」 narrative carry。 " +
|
|
1335
|
+
"tag mode は tagKey 必須 (= alphanumeric + _ - のみ、 例: 'env' / 'feature')。 hour mode は 168h / minute mode は 60min まで (= 超過 400)。 cost = SUM(cost_usd) / latency = AVG(latency_ms) / tokens = SUM(total_tokens) / count = COUNT(*) / error_rate = error ÷ total。 " +
|
|
1336
|
+
"返却 = { groups: [{key, value, count}], total: {value, count} } 形式。 軸 1 操作系 + 自律 AI ops の 分析 narrative の coverage 拡張。",
|
|
1337
|
+
inputSchema: {
|
|
1338
|
+
type: "object",
|
|
1339
|
+
additionalProperties: false,
|
|
1340
|
+
properties: {
|
|
1341
|
+
startTime: {
|
|
1342
|
+
type: "string",
|
|
1343
|
+
description: "範囲開始 ISO timestamp (= UTC、 省略 = 全期間)",
|
|
1344
|
+
},
|
|
1345
|
+
endTime: {
|
|
1346
|
+
type: "string",
|
|
1347
|
+
description: "範囲終了 ISO timestamp (= UTC、 省略 = 現在)",
|
|
1348
|
+
},
|
|
1349
|
+
groupBy: {
|
|
1350
|
+
type: "string",
|
|
1351
|
+
description: "集約軸 (= 'provider' / 'model' / 'day' / 'hour' / 'minute' / 'tag')、 default = 'provider'。 hour は 168h / minute は 60min まで",
|
|
1352
|
+
enum: ["provider", "model", "day", "hour", "minute", "tag"],
|
|
1353
|
+
default: "provider",
|
|
1354
|
+
},
|
|
1355
|
+
metric: {
|
|
1356
|
+
type: "string",
|
|
1357
|
+
description: "metric 種別 (= 'cost' / 'latency' / 'tokens' / 'count' / 'error_rate')、 default = 'cost'",
|
|
1358
|
+
enum: ["cost", "latency", "tokens", "count", "error_rate"],
|
|
1359
|
+
default: "cost",
|
|
1360
|
+
},
|
|
1361
|
+
provider: {
|
|
1362
|
+
type: "string",
|
|
1363
|
+
description: "provider filter (= 'openai' / 'anthropic' 等)、 省略 = 全 provider",
|
|
1364
|
+
},
|
|
1365
|
+
tagKey: {
|
|
1366
|
+
type: "string",
|
|
1367
|
+
description: "groupBy='tag' の時必須。 tags JSON 内 key 名 (alphanumeric + _- のみ、 1-64 文字)",
|
|
1368
|
+
pattern: "^[A-Za-z0-9][A-Za-z0-9_-]{0,63}$",
|
|
1369
|
+
},
|
|
1370
|
+
},
|
|
1371
|
+
},
|
|
1372
|
+
},
|
|
1373
|
+
{
|
|
1374
|
+
name: "list_audit_log",
|
|
1375
|
+
description: "Phase B audit log を 一覧 取得 (= GET /v1/audit-log)。 自 account 限定、 admin role のみ許可 (= viewer/member は 403)。 " +
|
|
1376
|
+
"AI agent が 「最近の招待 / API key revoke / プロジェクト変更」 等の 操作履歴 を 自律参照する narrative (= axis 4 自律 AI ops)。 " +
|
|
1377
|
+
"filter = eventType (= 'invitation.created' / 'api_key.revoked' 等) / targetKind / actorUserId / from / to。 " +
|
|
1378
|
+
"cursor pagination 対応 (= nextCursor 形式 = 'created_at|id')、 max limit 200。",
|
|
1379
|
+
inputSchema: {
|
|
1380
|
+
type: "object",
|
|
1381
|
+
additionalProperties: false,
|
|
1382
|
+
properties: {
|
|
1383
|
+
limit: {
|
|
1384
|
+
type: "integer",
|
|
1385
|
+
description: "返却件数 (1-200、 デフォルト 50)",
|
|
1386
|
+
minimum: 1,
|
|
1387
|
+
maximum: 200,
|
|
1388
|
+
default: 50,
|
|
1389
|
+
},
|
|
1390
|
+
eventType: {
|
|
1391
|
+
type: "string",
|
|
1392
|
+
description: "event_type 完全一致 filter (= 'invitation.created' / 'api_key.revoked' / 'membership.removed' 等)",
|
|
1393
|
+
},
|
|
1394
|
+
targetKind: {
|
|
1395
|
+
type: "string",
|
|
1396
|
+
description: "target_kind filter (= 'invitation' / 'api_key' / 'membership' 等)",
|
|
1397
|
+
},
|
|
1398
|
+
actorUserId: {
|
|
1399
|
+
type: "string",
|
|
1400
|
+
description: "actor_user_id filter (= 特定 user の操作のみ抽出)",
|
|
1401
|
+
},
|
|
1402
|
+
from: {
|
|
1403
|
+
type: "string",
|
|
1404
|
+
description: "範囲開始 ISO timestamp (= UTC)",
|
|
1405
|
+
},
|
|
1406
|
+
to: {
|
|
1407
|
+
type: "string",
|
|
1408
|
+
description: "範囲終了 ISO timestamp (= UTC)",
|
|
1409
|
+
},
|
|
1410
|
+
cursor: {
|
|
1411
|
+
type: "string",
|
|
1412
|
+
description: "ページ送り cursor (= 前 response の nextCursor を そのまま渡す、 'created_at|id' 形式)",
|
|
1413
|
+
},
|
|
1414
|
+
},
|
|
1415
|
+
},
|
|
1416
|
+
},
|
|
1417
|
+
{
|
|
1418
|
+
name: "list_saved_views",
|
|
1419
|
+
description: "保存済 saved views 一覧を 取得 (= GET /v1/saved-views)。 saved view = /calls page で よく使う filter (startDate/endDate/provider/model/limit) の組み合わせを 名前付きで 保存したもの。 " +
|
|
1420
|
+
"AI agent は 「いつもの先週の OpenAI filter で 呼び出し見せて」 narrative で carry できる。 account 単位、 max 20 件。",
|
|
1421
|
+
inputSchema: {
|
|
1422
|
+
type: "object",
|
|
1423
|
+
additionalProperties: false,
|
|
1424
|
+
properties: {},
|
|
1425
|
+
},
|
|
1426
|
+
},
|
|
1427
|
+
{
|
|
1428
|
+
name: "create_saved_view",
|
|
1429
|
+
description: "新規 saved view を 作成 / 同名なら上書き (= POST /v1/saved-views)。 name は account 内一意。 filter は SavedViewFilter shape (= startDate / endDate / provider / model / limit / preset / sortBy? / sortOrder?)。 " +
|
|
1430
|
+
"AI agent が 自動で よく使う filter を 名前付き保存 narrative。 例: 「直近 7 日 GPT-4 のみ」 view を 作って 後で呼ぶ。",
|
|
1431
|
+
inputSchema: {
|
|
1432
|
+
type: "object",
|
|
1433
|
+
additionalProperties: false,
|
|
1434
|
+
required: ["name", "filter"],
|
|
1435
|
+
properties: {
|
|
1436
|
+
name: {
|
|
1437
|
+
type: "string",
|
|
1438
|
+
description: "saved view の名前 (1-80 文字、 改行不可)。 同名で 既存なら 上書き",
|
|
1439
|
+
minLength: 1,
|
|
1440
|
+
maxLength: 80,
|
|
1441
|
+
},
|
|
1442
|
+
filter: {
|
|
1443
|
+
type: "object",
|
|
1444
|
+
description: "filter shape = startDate (ISO) + endDate (ISO) + provider (空可) + model (空可) + limit (number) + preset (string|null) + sortBy? + sortOrder?",
|
|
1445
|
+
required: ["startDate", "endDate", "provider", "model", "limit", "preset"],
|
|
1446
|
+
properties: {
|
|
1447
|
+
startDate: { type: "string", description: "ISO timestamp (= 範囲開始)" },
|
|
1448
|
+
endDate: { type: "string", description: "ISO timestamp (= 範囲終了)" },
|
|
1449
|
+
provider: {
|
|
1450
|
+
type: "string",
|
|
1451
|
+
description: "プロバイダー (= 'openai' / 'anthropic' / 'google' 等)、 空 = 全 provider",
|
|
1452
|
+
},
|
|
1453
|
+
model: {
|
|
1454
|
+
type: "string",
|
|
1455
|
+
description: "モデル名、 空 = 全 model",
|
|
1456
|
+
},
|
|
1457
|
+
limit: {
|
|
1458
|
+
type: "integer",
|
|
1459
|
+
description: "返却件数 cap",
|
|
1460
|
+
minimum: 1,
|
|
1461
|
+
},
|
|
1462
|
+
preset: {
|
|
1463
|
+
type: ["string", "null"],
|
|
1464
|
+
description: "preset 識別子 (= dashboard 既定 filter、 null 可)",
|
|
1465
|
+
},
|
|
1466
|
+
sortBy: { type: "string", description: "ソート対象 column" },
|
|
1467
|
+
sortOrder: {
|
|
1468
|
+
type: "string",
|
|
1469
|
+
description: "ソート方向 ('asc' / 'desc')",
|
|
1470
|
+
enum: ["asc", "desc"],
|
|
1471
|
+
},
|
|
1472
|
+
},
|
|
1473
|
+
},
|
|
1474
|
+
},
|
|
1475
|
+
},
|
|
1476
|
+
},
|
|
1477
|
+
{
|
|
1478
|
+
name: "delete_saved_view",
|
|
1479
|
+
description: "指定 id の saved view を 削除 (= DELETE /v1/saved-views/:id)。 自 account 限定。",
|
|
1480
|
+
inputSchema: {
|
|
1481
|
+
type: "object",
|
|
1482
|
+
additionalProperties: false,
|
|
1483
|
+
required: ["id"],
|
|
1484
|
+
properties: {
|
|
1485
|
+
id: {
|
|
1486
|
+
type: "string",
|
|
1487
|
+
description: "削除対象の saved view id (= UUID)",
|
|
1488
|
+
minLength: 1,
|
|
1489
|
+
maxLength: 64,
|
|
1490
|
+
},
|
|
1491
|
+
},
|
|
1492
|
+
},
|
|
1493
|
+
},
|
|
1494
|
+
{
|
|
1495
|
+
name: "export_calls",
|
|
1496
|
+
description: "calls の large batch export (= POST /v1/query/export)。 query_calls より 高 limit (= plan 別 max records: Free 1000 / Pro 50000、 config/plans.ts)、 全 plan で利用可。 " +
|
|
1497
|
+
"filter 軸 = startTime / endTime / provider / model + limit。 AI agent が 「先月分の全 GPT-4 呼び出しを取り出して傾向分析して」 narrative で 1 call carry。 " +
|
|
1498
|
+
"結果 format は query_calls と 同 JSON (= AI が そのまま CSV / 統計に carry 可能)。",
|
|
1499
|
+
inputSchema: {
|
|
1500
|
+
type: "object",
|
|
1501
|
+
additionalProperties: false,
|
|
1502
|
+
properties: {
|
|
1503
|
+
startTime: {
|
|
1504
|
+
type: "string",
|
|
1505
|
+
description: "範囲 開始 ISO timestamp (= UTC、 省略 = 全期間)",
|
|
1506
|
+
},
|
|
1507
|
+
endTime: {
|
|
1508
|
+
type: "string",
|
|
1509
|
+
description: "範囲 終了 ISO timestamp (= UTC、 省略 = 現在)",
|
|
1510
|
+
},
|
|
1511
|
+
provider: {
|
|
1512
|
+
type: "string",
|
|
1513
|
+
description: "プロバイダー fix (= openai / anthropic / google / azure / cohere)",
|
|
1514
|
+
},
|
|
1515
|
+
model: {
|
|
1516
|
+
type: "string",
|
|
1517
|
+
description: "model 名 fix (= 部分一致なし、 完全一致 例: 'gpt-4o-mini')",
|
|
1518
|
+
},
|
|
1519
|
+
limit: {
|
|
1520
|
+
type: "integer",
|
|
1521
|
+
description: "返却件数 cap。 plan 別 max 内なら そのまま、 超過は plan max に clamp",
|
|
1522
|
+
minimum: 1,
|
|
1523
|
+
},
|
|
1524
|
+
},
|
|
1525
|
+
},
|
|
1526
|
+
},
|
|
1527
|
+
{
|
|
1528
|
+
name: "bulk_delete_calls",
|
|
1529
|
+
description: "指定 call id 一覧 (= max 100) を 自 account 限定で 一括削除する (= POST /v1/calls/bulk-delete)。 " +
|
|
1530
|
+
"AI agent が dogfood / dev test で 蓄積した garbage call の cleanup narrative に carry (= 軸 1 操作系)。 " +
|
|
1531
|
+
"dryRun=true で 削除前に matched 件数を 事前確認可能。 削除は 1 SQL atomic、 audit log に bulk_deleted event を 記録。 " +
|
|
1532
|
+
"FK 制約上 関連 traces / annotations / scores は ON DELETE 経由で 連鎖削除 (= 既存 schema narrative)。",
|
|
1533
|
+
inputSchema: {
|
|
1534
|
+
type: "object",
|
|
1535
|
+
additionalProperties: false,
|
|
1536
|
+
required: ["callIds"],
|
|
1537
|
+
properties: {
|
|
1538
|
+
callIds: {
|
|
1539
|
+
type: "array",
|
|
1540
|
+
description: "削除対象の call id 配列 (1-100 件、 各 1-128 文字)",
|
|
1541
|
+
items: { type: "string", minLength: 1, maxLength: 128 },
|
|
1542
|
+
minItems: 1,
|
|
1543
|
+
maxItems: 100,
|
|
1544
|
+
},
|
|
1545
|
+
dryRun: {
|
|
1546
|
+
type: "boolean",
|
|
1547
|
+
description: "true で 削除せず matched 件数のみ 返却 (= 確認 UX)",
|
|
1548
|
+
default: false,
|
|
1549
|
+
},
|
|
1550
|
+
},
|
|
1551
|
+
},
|
|
1552
|
+
},
|
|
1553
|
+
{
|
|
1554
|
+
name: "compare_eval_runs",
|
|
1555
|
+
description: "2 つの eval run (baseline / candidate) を 比較して per-criterion mean score delta + failed count delta + verdict を 返す (= GET /v1/eval-runs/compare)。 " +
|
|
1556
|
+
"AI agent は 「baseline と 比べて candidate は どう 変わったか」 を 1 call で 把握でき、 prompt 改善 効果や regress 検出 narrative に carry できる (= axis 1 操作系 + axis 4 自律 AI ops 寄与)。 " +
|
|
1557
|
+
"verdict = improved / regressed / mixed / unchanged。 failed count は score <= 2 を 「failed」 で 算出。 同 account 限定。",
|
|
1558
|
+
inputSchema: {
|
|
1559
|
+
type: "object",
|
|
1560
|
+
additionalProperties: false,
|
|
1561
|
+
required: ["baselineRunId", "candidateRunId"],
|
|
1562
|
+
properties: {
|
|
1563
|
+
baselineRunId: {
|
|
1564
|
+
type: "integer",
|
|
1565
|
+
description: "比較元 run の id (= list_eval_runs.runs[].id)",
|
|
1566
|
+
minimum: 1,
|
|
1567
|
+
},
|
|
1568
|
+
candidateRunId: {
|
|
1569
|
+
type: "integer",
|
|
1570
|
+
description: "比較先 run の id (= 同上)、 baseline と 異なる必要あり",
|
|
1571
|
+
minimum: 1,
|
|
640
1572
|
},
|
|
641
1573
|
},
|
|
642
1574
|
},
|
|
@@ -782,6 +1714,29 @@ export async function dispatchTool(input) {
|
|
|
782
1714
|
jsonBody: safeArgs,
|
|
783
1715
|
});
|
|
784
1716
|
}
|
|
1717
|
+
case "update_alert": {
|
|
1718
|
+
// 2026-06-03 v1.6 #13-4 = PATCH /v1/alerts/:id 経由で 既存 alert を partial update。
|
|
1719
|
+
// alertId は path 直前置換、 body は残りの allowlist 済 field (= name /
|
|
1720
|
+
// thresholdValue / windowMinutes / filterProvider / filterModel /
|
|
1721
|
+
// channelKinds / channelTargets / sleepMinutes / enabled / conditions)。
|
|
1722
|
+
// alertType は schema にも入れていない (= immutable)。 backend validateUpdate が
|
|
1723
|
+
// 二重防御で最終 validation。
|
|
1724
|
+
const alertId = validateAlertId(safeArgs["alertId"]);
|
|
1725
|
+
if (!alertId) {
|
|
1726
|
+
return errorResponse("alertId required (pattern: [A-Za-z0-9-]{1,64})");
|
|
1727
|
+
}
|
|
1728
|
+
const { alertId: _ignore, ...body } = safeArgs;
|
|
1729
|
+
return await callApi(apiBase, `/v1/alerts/${encodeURIComponent(alertId)}`, {}, apiKey, { method: "PATCH", jsonBody: body });
|
|
1730
|
+
}
|
|
1731
|
+
case "delete_alert": {
|
|
1732
|
+
// 2026-06-03 v1.6 #13-4 = DELETE /v1/alerts/:id。 関連 alert_events も backend
|
|
1733
|
+
// 側で CASCADE 削除される。 body なし、 alertId のみ path 直前置換。
|
|
1734
|
+
const alertId = validateAlertId(safeArgs["alertId"]);
|
|
1735
|
+
if (!alertId) {
|
|
1736
|
+
return errorResponse("alertId required (pattern: [A-Za-z0-9-]{1,64})");
|
|
1737
|
+
}
|
|
1738
|
+
return await callApi(apiBase, `/v1/alerts/${encodeURIComponent(alertId)}`, {}, apiKey, { method: "DELETE" });
|
|
1739
|
+
}
|
|
785
1740
|
case "get_alert": {
|
|
786
1741
|
const alertId = validateAlertId(safeArgs["alertId"]);
|
|
787
1742
|
if (!alertId) {
|
|
@@ -830,6 +1785,38 @@ export async function dispatchTool(input) {
|
|
|
830
1785
|
}
|
|
831
1786
|
return await callApi(apiBase, `/v1/annotations/${encodeURIComponent(annotationId)}`, {}, apiKey);
|
|
832
1787
|
}
|
|
1788
|
+
case "create_annotation": {
|
|
1789
|
+
// 2026-06-03 v1.6 #13-3 = POST /v1/annotations。 callId は body 必須、
|
|
1790
|
+
// annotationText / label / qualityScore のうち少なくとも 1 つは backend で
|
|
1791
|
+
// validate される (= 「空 annotation」 を 400)。 safeArgs は allowlist 済。
|
|
1792
|
+
const callId = validateCallId(safeArgs["callId"]);
|
|
1793
|
+
if (!callId) {
|
|
1794
|
+
return errorResponse("callId required (pattern: [A-Za-z0-9_-]{1,128})");
|
|
1795
|
+
}
|
|
1796
|
+
return await callApi(apiBase, "/v1/annotations", {}, apiKey, {
|
|
1797
|
+
method: "POST",
|
|
1798
|
+
jsonBody: safeArgs,
|
|
1799
|
+
});
|
|
1800
|
+
}
|
|
1801
|
+
case "update_annotation": {
|
|
1802
|
+
// 2026-06-03 v1.6 #13-3 = PATCH /v1/annotations/:id with allowlisted body。
|
|
1803
|
+
// annotationId は path 直前置換、 body は残りの 3 field (= annotationText /
|
|
1804
|
+
// label / qualityScore)。 callId は immutable (= schema 不在で 二重防御)。
|
|
1805
|
+
const annotationId = validateAnnotationId(safeArgs["annotationId"]);
|
|
1806
|
+
if (!annotationId) {
|
|
1807
|
+
return errorResponse("annotationId required (positive integer up to 10 digits)");
|
|
1808
|
+
}
|
|
1809
|
+
const { annotationId: _ignore, ...body } = safeArgs;
|
|
1810
|
+
return await callApi(apiBase, `/v1/annotations/${encodeURIComponent(annotationId)}`, {}, apiKey, { method: "PATCH", jsonBody: body });
|
|
1811
|
+
}
|
|
1812
|
+
case "delete_annotation": {
|
|
1813
|
+
// 2026-06-03 v1.6 #13-3 = DELETE /v1/annotations/:id。 body なし。
|
|
1814
|
+
const annotationId = validateAnnotationId(safeArgs["annotationId"]);
|
|
1815
|
+
if (!annotationId) {
|
|
1816
|
+
return errorResponse("annotationId required (positive integer up to 10 digits)");
|
|
1817
|
+
}
|
|
1818
|
+
return await callApi(apiBase, `/v1/annotations/${encodeURIComponent(annotationId)}`, {}, apiKey, { method: "DELETE" });
|
|
1819
|
+
}
|
|
833
1820
|
case "list_eval_criteria": {
|
|
834
1821
|
return await callApi(apiBase, "/v1/eval-criteria", {}, apiKey);
|
|
835
1822
|
}
|
|
@@ -840,6 +1827,63 @@ export async function dispatchTool(input) {
|
|
|
840
1827
|
}
|
|
841
1828
|
return await callApi(apiBase, `/v1/eval-criteria/${encodeURIComponent(criterionId)}`, {}, apiKey);
|
|
842
1829
|
}
|
|
1830
|
+
case "create_eval_criterion": {
|
|
1831
|
+
if (typeof safeArgs["name"] !== "string" ||
|
|
1832
|
+
typeof safeArgs["rubric"] !== "string" ||
|
|
1833
|
+
typeof safeArgs["scaleMin"] !== "number" ||
|
|
1834
|
+
typeof safeArgs["scaleMax"] !== "number") {
|
|
1835
|
+
return errorResponse("name + rubric + scaleMin + scaleMax required");
|
|
1836
|
+
}
|
|
1837
|
+
return await callApi(apiBase, "/v1/eval-criteria", {}, apiKey, {
|
|
1838
|
+
method: "POST",
|
|
1839
|
+
jsonBody: safeArgs,
|
|
1840
|
+
});
|
|
1841
|
+
}
|
|
1842
|
+
case "update_eval_criterion": {
|
|
1843
|
+
const criterionId = validateAnnotationId(safeArgs["criterionId"]);
|
|
1844
|
+
if (!criterionId) {
|
|
1845
|
+
return errorResponse("criterionId required (positive integer up to 10 digits)");
|
|
1846
|
+
}
|
|
1847
|
+
if (typeof safeArgs["name"] !== "string" ||
|
|
1848
|
+
typeof safeArgs["rubric"] !== "string" ||
|
|
1849
|
+
typeof safeArgs["scaleMin"] !== "number" ||
|
|
1850
|
+
typeof safeArgs["scaleMax"] !== "number") {
|
|
1851
|
+
return errorResponse("name + rubric + scaleMin + scaleMax required");
|
|
1852
|
+
}
|
|
1853
|
+
const { criterionId: _ignore, ...body } = safeArgs;
|
|
1854
|
+
return await callApi(apiBase, `/v1/eval-criteria/${encodeURIComponent(criterionId)}`, {}, apiKey, {
|
|
1855
|
+
method: "PATCH",
|
|
1856
|
+
jsonBody: body,
|
|
1857
|
+
});
|
|
1858
|
+
}
|
|
1859
|
+
case "delete_eval_criterion": {
|
|
1860
|
+
const criterionId = validateAnnotationId(safeArgs["criterionId"]);
|
|
1861
|
+
if (!criterionId) {
|
|
1862
|
+
return errorResponse("criterionId required (positive integer up to 10 digits)");
|
|
1863
|
+
}
|
|
1864
|
+
return await callApi(apiBase, `/v1/eval-criteria/${encodeURIComponent(criterionId)}`, {}, apiKey, { method: "DELETE" });
|
|
1865
|
+
}
|
|
1866
|
+
case "test_webhook": {
|
|
1867
|
+
if (typeof safeArgs["url"] !== "string") {
|
|
1868
|
+
return errorResponse("url required (https://...)");
|
|
1869
|
+
}
|
|
1870
|
+
return await callApi(apiBase, "/v1/alerts/test-webhook", {}, apiKey, {
|
|
1871
|
+
method: "POST",
|
|
1872
|
+
jsonBody: safeArgs,
|
|
1873
|
+
});
|
|
1874
|
+
}
|
|
1875
|
+
case "get_llm_budget": {
|
|
1876
|
+
return await callApi(apiBase, "/v1/account/llm-feature-budget", {}, apiKey);
|
|
1877
|
+
}
|
|
1878
|
+
case "raise_llm_budget": {
|
|
1879
|
+
if (typeof safeArgs["budgetUsd"] !== "number") {
|
|
1880
|
+
return errorResponse("budgetUsd required (number, 5-500)");
|
|
1881
|
+
}
|
|
1882
|
+
return await callApi(apiBase, "/v1/account/llm-feature-budget", {}, apiKey, {
|
|
1883
|
+
method: "PATCH",
|
|
1884
|
+
jsonBody: safeArgs,
|
|
1885
|
+
});
|
|
1886
|
+
}
|
|
843
1887
|
case "list_prompts": {
|
|
844
1888
|
const q = {};
|
|
845
1889
|
if (typeof safeArgs["label"] === "string")
|
|
@@ -857,6 +1901,46 @@ export async function dispatchTool(input) {
|
|
|
857
1901
|
}
|
|
858
1902
|
return await callApi(apiBase, `/v1/prompts/${encodeURIComponent(promptId)}`, {}, apiKey);
|
|
859
1903
|
}
|
|
1904
|
+
case "create_prompt": {
|
|
1905
|
+
if (typeof safeArgs["name"] !== "string" || typeof safeArgs["version"] !== "string" || typeof safeArgs["template"] !== "string") {
|
|
1906
|
+
return errorResponse("name + version + template required");
|
|
1907
|
+
}
|
|
1908
|
+
return await callApi(apiBase, "/v1/prompts", {}, apiKey, {
|
|
1909
|
+
method: "POST",
|
|
1910
|
+
jsonBody: safeArgs,
|
|
1911
|
+
});
|
|
1912
|
+
}
|
|
1913
|
+
case "update_prompt": {
|
|
1914
|
+
const promptId = validateAnnotationId(safeArgs["promptId"]);
|
|
1915
|
+
if (!promptId) {
|
|
1916
|
+
return errorResponse("promptId required (positive integer up to 10 digits)");
|
|
1917
|
+
}
|
|
1918
|
+
const { promptId: _ignore, ...body } = safeArgs;
|
|
1919
|
+
return await callApi(apiBase, `/v1/prompts/${encodeURIComponent(promptId)}`, {}, apiKey, {
|
|
1920
|
+
method: "PATCH",
|
|
1921
|
+
jsonBody: body,
|
|
1922
|
+
});
|
|
1923
|
+
}
|
|
1924
|
+
case "rename_prompt": {
|
|
1925
|
+
const promptId = validateAnnotationId(safeArgs["promptId"]);
|
|
1926
|
+
if (!promptId) {
|
|
1927
|
+
return errorResponse("promptId required (positive integer up to 10 digits)");
|
|
1928
|
+
}
|
|
1929
|
+
if (typeof safeArgs["name"] !== "string" || typeof safeArgs["version"] !== "string") {
|
|
1930
|
+
return errorResponse("name + version required");
|
|
1931
|
+
}
|
|
1932
|
+
return await callApi(apiBase, `/v1/prompts/${encodeURIComponent(promptId)}/rename`, {}, apiKey, {
|
|
1933
|
+
method: "POST",
|
|
1934
|
+
jsonBody: { name: safeArgs["name"], version: safeArgs["version"] },
|
|
1935
|
+
});
|
|
1936
|
+
}
|
|
1937
|
+
case "delete_prompt": {
|
|
1938
|
+
const promptId = validateAnnotationId(safeArgs["promptId"]);
|
|
1939
|
+
if (!promptId) {
|
|
1940
|
+
return errorResponse("promptId required (positive integer up to 10 digits)");
|
|
1941
|
+
}
|
|
1942
|
+
return await callApi(apiBase, `/v1/prompts/${encodeURIComponent(promptId)}`, {}, apiKey, { method: "DELETE" });
|
|
1943
|
+
}
|
|
860
1944
|
case "list_safety_assessments": {
|
|
861
1945
|
const q = {};
|
|
862
1946
|
if (typeof safeArgs["callId"] === "string") {
|
|
@@ -889,6 +1973,177 @@ export async function dispatchTool(input) {
|
|
|
889
1973
|
}
|
|
890
1974
|
return await callApi(apiBase, `/v1/eval-runs/${encodeURIComponent(runId)}`, {}, apiKey);
|
|
891
1975
|
}
|
|
1976
|
+
case "get_percentiles": {
|
|
1977
|
+
const body = {};
|
|
1978
|
+
if (typeof safeArgs["startTime"] === "string")
|
|
1979
|
+
body["startTime"] = safeArgs["startTime"];
|
|
1980
|
+
if (typeof safeArgs["endTime"] === "string")
|
|
1981
|
+
body["endTime"] = safeArgs["endTime"];
|
|
1982
|
+
if (typeof safeArgs["provider"] === "string")
|
|
1983
|
+
body["provider"] = safeArgs["provider"];
|
|
1984
|
+
if (typeof safeArgs["model"] === "string")
|
|
1985
|
+
body["model"] = safeArgs["model"];
|
|
1986
|
+
if (typeof safeArgs["metric"] === "string")
|
|
1987
|
+
body["metric"] = safeArgs["metric"];
|
|
1988
|
+
if (typeof safeArgs["groupBy"] === "string")
|
|
1989
|
+
body["groupBy"] = safeArgs["groupBy"];
|
|
1990
|
+
return await callApi(apiBase, "/v1/query/percentiles", {}, apiKey, {
|
|
1991
|
+
method: "POST",
|
|
1992
|
+
jsonBody: body,
|
|
1993
|
+
});
|
|
1994
|
+
}
|
|
1995
|
+
case "list_projects": {
|
|
1996
|
+
return await callApi(apiBase, "/v1/projects", {}, apiKey);
|
|
1997
|
+
}
|
|
1998
|
+
case "create_project": {
|
|
1999
|
+
const name = safeArgs["name"];
|
|
2000
|
+
const slug = safeArgs["slug"];
|
|
2001
|
+
if (typeof name !== "string" || name.length === 0 || name.length > 64) {
|
|
2002
|
+
return errorResponse("name required (1-64 chars)");
|
|
2003
|
+
}
|
|
2004
|
+
if (typeof slug !== "string" || !/^[a-z][a-z0-9-]{0,31}$/.test(slug)) {
|
|
2005
|
+
return errorResponse("slug required (lowercase alphanumeric + hyphens, max 32 chars, starts with letter)");
|
|
2006
|
+
}
|
|
2007
|
+
return await callApi(apiBase, "/v1/projects", {}, apiKey, {
|
|
2008
|
+
method: "POST",
|
|
2009
|
+
jsonBody: { name, slug },
|
|
2010
|
+
});
|
|
2011
|
+
}
|
|
2012
|
+
case "rename_project": {
|
|
2013
|
+
const projectId = safeArgs["projectId"];
|
|
2014
|
+
if (typeof projectId !== "string" || projectId.length === 0 || projectId.length > 64) {
|
|
2015
|
+
return errorResponse("projectId required (1-64 chars)");
|
|
2016
|
+
}
|
|
2017
|
+
const body = {};
|
|
2018
|
+
if (typeof safeArgs["name"] === "string")
|
|
2019
|
+
body["name"] = safeArgs["name"];
|
|
2020
|
+
if (typeof safeArgs["slug"] === "string") {
|
|
2021
|
+
if (!/^[a-z][a-z0-9-]{0,31}$/.test(safeArgs["slug"])) {
|
|
2022
|
+
return errorResponse("slug must match /^[a-z][a-z0-9-]{0,31}$/");
|
|
2023
|
+
}
|
|
2024
|
+
body["slug"] = safeArgs["slug"];
|
|
2025
|
+
}
|
|
2026
|
+
if (Object.keys(body).length === 0) {
|
|
2027
|
+
return errorResponse("at least one of name / slug required");
|
|
2028
|
+
}
|
|
2029
|
+
return await callApi(apiBase, `/v1/projects/${encodeURIComponent(projectId)}`, {}, apiKey, { method: "PATCH", jsonBody: body });
|
|
2030
|
+
}
|
|
2031
|
+
case "delete_project": {
|
|
2032
|
+
const projectId = safeArgs["projectId"];
|
|
2033
|
+
if (typeof projectId !== "string" || projectId.length === 0 || projectId.length > 64) {
|
|
2034
|
+
return errorResponse("projectId required (1-64 chars)");
|
|
2035
|
+
}
|
|
2036
|
+
return await callApi(apiBase, `/v1/projects/${encodeURIComponent(projectId)}`, {}, apiKey, { method: "DELETE" });
|
|
2037
|
+
}
|
|
2038
|
+
case "aggregate_calls": {
|
|
2039
|
+
const body = {};
|
|
2040
|
+
if (typeof safeArgs["startTime"] === "string")
|
|
2041
|
+
body["startTime"] = safeArgs["startTime"];
|
|
2042
|
+
if (typeof safeArgs["endTime"] === "string")
|
|
2043
|
+
body["endTime"] = safeArgs["endTime"];
|
|
2044
|
+
if (typeof safeArgs["groupBy"] === "string")
|
|
2045
|
+
body["groupBy"] = safeArgs["groupBy"];
|
|
2046
|
+
if (typeof safeArgs["metric"] === "string")
|
|
2047
|
+
body["metric"] = safeArgs["metric"];
|
|
2048
|
+
if (typeof safeArgs["provider"] === "string")
|
|
2049
|
+
body["provider"] = safeArgs["provider"];
|
|
2050
|
+
if (typeof safeArgs["tagKey"] === "string")
|
|
2051
|
+
body["tagKey"] = safeArgs["tagKey"];
|
|
2052
|
+
return await callApi(apiBase, "/v1/query/aggregate", {}, apiKey, {
|
|
2053
|
+
method: "POST",
|
|
2054
|
+
jsonBody: body,
|
|
2055
|
+
});
|
|
2056
|
+
}
|
|
2057
|
+
case "list_audit_log": {
|
|
2058
|
+
const q = {};
|
|
2059
|
+
if (typeof safeArgs["limit"] === "number")
|
|
2060
|
+
q["limit"] = safeArgs["limit"];
|
|
2061
|
+
if (typeof safeArgs["eventType"] === "string")
|
|
2062
|
+
q["eventType"] = safeArgs["eventType"];
|
|
2063
|
+
if (typeof safeArgs["targetKind"] === "string")
|
|
2064
|
+
q["targetKind"] = safeArgs["targetKind"];
|
|
2065
|
+
if (typeof safeArgs["actorUserId"] === "string")
|
|
2066
|
+
q["actorUserId"] = safeArgs["actorUserId"];
|
|
2067
|
+
if (typeof safeArgs["from"] === "string")
|
|
2068
|
+
q["from"] = safeArgs["from"];
|
|
2069
|
+
if (typeof safeArgs["to"] === "string")
|
|
2070
|
+
q["to"] = safeArgs["to"];
|
|
2071
|
+
if (typeof safeArgs["cursor"] === "string")
|
|
2072
|
+
q["cursor"] = safeArgs["cursor"];
|
|
2073
|
+
return await callApi(apiBase, "/v1/audit-log", q, apiKey);
|
|
2074
|
+
}
|
|
2075
|
+
case "list_saved_views": {
|
|
2076
|
+
return await callApi(apiBase, "/v1/saved-views", {}, apiKey);
|
|
2077
|
+
}
|
|
2078
|
+
case "create_saved_view": {
|
|
2079
|
+
const name = safeArgs["name"];
|
|
2080
|
+
const filter = safeArgs["filter"];
|
|
2081
|
+
if (typeof name !== "string" || name.length === 0 || name.length > 80) {
|
|
2082
|
+
return errorResponse("name required (1-80 chars)");
|
|
2083
|
+
}
|
|
2084
|
+
if (!filter || typeof filter !== "object") {
|
|
2085
|
+
return errorResponse("filter required (object)");
|
|
2086
|
+
}
|
|
2087
|
+
return await callApi(apiBase, "/v1/saved-views", {}, apiKey, {
|
|
2088
|
+
method: "POST",
|
|
2089
|
+
jsonBody: { name, filter },
|
|
2090
|
+
});
|
|
2091
|
+
}
|
|
2092
|
+
case "delete_saved_view": {
|
|
2093
|
+
const id = safeArgs["id"];
|
|
2094
|
+
if (typeof id !== "string" || id.length === 0 || id.length > 64) {
|
|
2095
|
+
return errorResponse("id required (1-64 chars)");
|
|
2096
|
+
}
|
|
2097
|
+
return await callApi(apiBase, `/v1/saved-views/${encodeURIComponent(id)}`, {}, apiKey, { method: "DELETE" });
|
|
2098
|
+
}
|
|
2099
|
+
case "export_calls": {
|
|
2100
|
+
const body = {};
|
|
2101
|
+
if (typeof safeArgs["startTime"] === "string")
|
|
2102
|
+
body["startTime"] = safeArgs["startTime"];
|
|
2103
|
+
if (typeof safeArgs["endTime"] === "string")
|
|
2104
|
+
body["endTime"] = safeArgs["endTime"];
|
|
2105
|
+
if (typeof safeArgs["provider"] === "string")
|
|
2106
|
+
body["provider"] = safeArgs["provider"];
|
|
2107
|
+
if (typeof safeArgs["model"] === "string")
|
|
2108
|
+
body["model"] = safeArgs["model"];
|
|
2109
|
+
if (typeof safeArgs["limit"] === "number")
|
|
2110
|
+
body["limit"] = safeArgs["limit"];
|
|
2111
|
+
return await callApi(apiBase, "/v1/query/export", {}, apiKey, {
|
|
2112
|
+
method: "POST",
|
|
2113
|
+
jsonBody: body,
|
|
2114
|
+
});
|
|
2115
|
+
}
|
|
2116
|
+
case "bulk_delete_calls": {
|
|
2117
|
+
const ids = safeArgs["callIds"];
|
|
2118
|
+
if (!Array.isArray(ids) || ids.length === 0 || ids.length > 100) {
|
|
2119
|
+
return errorResponse("callIds required (non-empty string array, max 100)");
|
|
2120
|
+
}
|
|
2121
|
+
const strIds = [];
|
|
2122
|
+
for (const id of ids) {
|
|
2123
|
+
if (typeof id !== "string" || id.length === 0 || id.length > 128) {
|
|
2124
|
+
return errorResponse("each callId must be non-empty string up to 128 chars");
|
|
2125
|
+
}
|
|
2126
|
+
strIds.push(id);
|
|
2127
|
+
}
|
|
2128
|
+
const body = { callIds: strIds };
|
|
2129
|
+
if (safeArgs["dryRun"] === true)
|
|
2130
|
+
body["dryRun"] = true;
|
|
2131
|
+
return await callApi(apiBase, "/v1/calls/bulk-delete", {}, apiKey, {
|
|
2132
|
+
method: "POST",
|
|
2133
|
+
jsonBody: body,
|
|
2134
|
+
});
|
|
2135
|
+
}
|
|
2136
|
+
case "compare_eval_runs": {
|
|
2137
|
+
const baselineId = validateAnnotationId(safeArgs["baselineRunId"]);
|
|
2138
|
+
const candidateId = validateAnnotationId(safeArgs["candidateRunId"]);
|
|
2139
|
+
if (!baselineId || !candidateId) {
|
|
2140
|
+
return errorResponse("baselineRunId + candidateRunId required (positive integers up to 10 digits)");
|
|
2141
|
+
}
|
|
2142
|
+
if (baselineId === candidateId) {
|
|
2143
|
+
return errorResponse("baselineRunId and candidateRunId must differ");
|
|
2144
|
+
}
|
|
2145
|
+
return await callApi(apiBase, "/v1/eval-runs/compare", { baseline: baselineId, candidate: candidateId }, apiKey);
|
|
2146
|
+
}
|
|
892
2147
|
case "run_eval": {
|
|
893
2148
|
const body = {};
|
|
894
2149
|
if (typeof safeArgs["name"] === "string")
|