harness-mcp-v2 3.0.3 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/build/prompts/create-eval-suite.js +1 -1
- package/build/prompts/run-evaluation.d.ts.map +1 -1
- package/build/prompts/run-evaluation.js +3 -1
- package/build/prompts/run-evaluation.js.map +1 -1
- package/build/registry/toolsets/ai-evals.d.ts.map +1 -1
- package/build/registry/toolsets/ai-evals.js +131 -43
- package/build/registry/toolsets/ai-evals.js.map +1 -1
- package/build/tools/harness-create.d.ts.map +1 -1
- package/build/tools/harness-create.js +2 -0
- package/build/tools/harness-create.js.map +1 -1
- package/build/tools/harness-delete.d.ts.map +1 -1
- package/build/tools/harness-delete.js +2 -0
- package/build/tools/harness-delete.js.map +1 -1
- package/build/tools/harness-describe.d.ts.map +1 -1
- package/build/tools/harness-describe.js +4 -0
- package/build/tools/harness-describe.js.map +1 -1
- package/build/tools/harness-diagnose.d.ts.map +1 -1
- package/build/tools/harness-diagnose.js +3 -0
- package/build/tools/harness-diagnose.js.map +1 -1
- package/build/tools/harness-execute.d.ts.map +1 -1
- package/build/tools/harness-execute.js +2 -0
- package/build/tools/harness-execute.js.map +1 -1
- package/build/tools/harness-get.d.ts.map +1 -1
- package/build/tools/harness-get.js +3 -0
- package/build/tools/harness-get.js.map +1 -1
- package/build/tools/harness-list.d.ts.map +1 -1
- package/build/tools/harness-list.js +3 -0
- package/build/tools/harness-list.js.map +1 -1
- package/build/tools/harness-schema.d.ts.map +1 -1
- package/build/tools/harness-schema.js +5 -1
- package/build/tools/harness-schema.js.map +1 -1
- package/build/tools/harness-search.d.ts.map +1 -1
- package/build/tools/harness-search.js +3 -0
- package/build/tools/harness-search.js.map +1 -1
- package/build/tools/harness-status.d.ts.map +1 -1
- package/build/tools/harness-status.js +3 -0
- package/build/tools/harness-status.js.map +1 -1
- package/build/tools/harness-update.d.ts.map +1 -1
- package/build/tools/harness-update.js +2 -0
- package/build/tools/harness-update.js.map +1 -1
- package/build/tools/output-schemas.d.ts +50 -0
- package/build/tools/output-schemas.d.ts.map +1 -0
- package/build/tools/output-schemas.js +59 -0
- package/build/tools/output-schemas.js.map +1 -0
- package/build/utils/response-formatter.d.ts +1 -0
- package/build/utils/response-formatter.d.ts.map +1 -1
- package/build/utils/response-formatter.js +8 -0
- package/build/utils/response-formatter.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
## Harness MCP Server 2.0
|
|
2
2
|
|
|
3
|
-
An MCP (Model Context Protocol) server that gives AI agents full access to the Harness.io platform through 11 consolidated tools and
|
|
3
|
+
An MCP (Model Context Protocol) server that gives AI agents full access to the Harness.io platform through 11 consolidated tools and 187 resource types.
|
|
4
4
|
|
|
5
5
|
## Why Use This MCP Server
|
|
6
6
|
|
|
@@ -8,8 +8,8 @@ Most MCP servers map one tool per API endpoint. For a platform as broad as Harne
|
|
|
8
8
|
|
|
9
9
|
This server is built differently:
|
|
10
10
|
|
|
11
|
-
- **11 tools,
|
|
12
|
-
- **Full platform coverage.**
|
|
11
|
+
- **11 tools, 187 resource types.** A registry-based dispatch system routes `harness_list`, `harness_get`, `harness_create`, etc. to any Harness resource — pipelines, services, environments, orgs, projects, feature flags, cost data, and more. The LLM picks from 11 tools instead of hundreds.
|
|
12
|
+
- **Full platform coverage.** 32 toolsets spanning CI/CD, GitOps, Feature Flags, Cloud Cost Management, Security Testing, Chaos Engineering, Database DevOps, Internal Developer Portal, Software Supply Chain, Governance, Service Overrides, Visualizations, and more. Not just pipelines — the entire Harness platform.
|
|
13
13
|
- **Multi-project workflows out of the box.** Agents discover organizations and projects dynamically — no hardcoded env vars needed. Ask "show failed executions across all projects" and the agent can navigate the full account hierarchy.
|
|
14
14
|
- **31 prompt templates.** Pre-built prompts for common workflows: build & deploy apps end-to-end, debug failed pipelines, review DORA metrics, triage vulnerabilities, optimize cloud costs, audit access control, plan feature flag rollouts, review pull requests, approve pending pipelines, and more.
|
|
15
15
|
- **Works everywhere.** Stdio transport for local clients (Claude Desktop, Cursor, Windsurf), HTTP transport for remote/shared deployments, Docker and Kubernetes ready.
|
|
@@ -528,7 +528,7 @@ The server automatically loads environment variables from a `.env` file in the p
|
|
|
528
528
|
| `HARNESS_MAX_BODY_SIZE_MB` | No | `10` | Max HTTP request body size in MB for `http` transport |
|
|
529
529
|
| `HARNESS_RATE_LIMIT_RPS` | No | `10` | Client-side request throttle (requests per second) to Harness APIs |
|
|
530
530
|
| `LOG_LEVEL` | No | `info` | Log verbosity: `debug`, `info`, `warn`, `error` |
|
|
531
|
-
| `HARNESS_TOOLSETS` | No | *(
|
|
531
|
+
| `HARNESS_TOOLSETS` | No | *(all)* | Comma-separated toolset list. Empty loads all toolsets. Supports `+name` to explicitly include and `-name` to remove toolsets (see [Toolset Filtering](#toolset-filtering)) |
|
|
532
532
|
| `HARNESS_READ_ONLY` | No | `false` | Block all mutating operations (create, update, delete, execute). Only list and get are allowed. Useful for shared/demo environments |
|
|
533
533
|
| `HARNESS_AUTO_APPROVE_RISK` | No | `none` | Risk-based auto-approve threshold for autonomous workflows. Operations at or below this risk proceed without confirmation. Values: `none`, `low_write`, `medium_write`, `high_write`, `all`. See [Elicitation](#elicitation) |
|
|
534
534
|
| `HARNESS_SKIP_ELICITATION` | No | `false` | **Deprecated** — use `HARNESS_AUTO_APPROVE_RISK=all` instead. Kept for backward compatibility |
|
|
@@ -991,7 +991,7 @@ Harness pipelines can be stored in three ways:
|
|
|
991
991
|
|
|
992
992
|
## Resource Types
|
|
993
993
|
|
|
994
|
-
|
|
994
|
+
187 resource types organized across 32 toolsets. Each resource type supports a subset of CRUD operations and optional execute actions.
|
|
995
995
|
|
|
996
996
|
### Platform
|
|
997
997
|
|
|
@@ -1446,14 +1446,14 @@ Inline PNG chart visualizations rendered from Harness data. These are metadata-o
|
|
|
1446
1446
|
|
|
1447
1447
|
## Toolset Filtering
|
|
1448
1448
|
|
|
1449
|
-
By default,
|
|
1449
|
+
By default, all 32 toolsets are enabled.
|
|
1450
1450
|
|
|
1451
|
-
###
|
|
1451
|
+
### Adding toolsets with `+` prefix
|
|
1452
1452
|
|
|
1453
|
-
Use the `+` prefix to
|
|
1453
|
+
Use the `+` prefix to explicitly include toolsets alongside all defaults (useful if a toolset becomes opt-in in the future):
|
|
1454
1454
|
|
|
1455
1455
|
```bash
|
|
1456
|
-
#
|
|
1456
|
+
# Explicitly include ai-evals alongside all defaults
|
|
1457
1457
|
HARNESS_TOOLSETS=+ai-evals
|
|
1458
1458
|
```
|
|
1459
1459
|
|
|
@@ -1518,7 +1518,7 @@ Available toolset names:
|
|
|
1518
1518
|
| `overrides` | service_override |
|
|
1519
1519
|
| `settings` | setting |
|
|
1520
1520
|
| `visualizations` | visual_timeline, visual_stage_flow, visual_health_dashboard, visual_pie_chart, visual_bar_chart, visual_timeseries, visual_architecture |
|
|
1521
|
-
| `ai-evals`
|
|
1521
|
+
| `ai-evals` | eval_dataset, eval_dataset_item, evaluation, eval_run, eval_run_item, eval_run_by_eval, eval_metric, eval_metric_set, eval_metric_set_entry, eval_suite, eval_suite_evaluation, eval_suite_run, eval_target, eval_model, eval_annotation, eval_analytics, eval_git_settings, eval_registry_item |
|
|
1522
1522
|
|
|
1523
1523
|
|
|
1524
1524
|
## Architecture
|
|
@@ -1537,7 +1537,7 @@ Available toolset names:
|
|
|
1537
1537
|
+--------v---------+
|
|
1538
1538
|
| Registry | <-- Declarative resource definitions
|
|
1539
1539
|
| 32 Toolsets | (data files, not code)
|
|
1540
|
-
|
|
|
1540
|
+
| 187 Resource Types|
|
|
1541
1541
|
+--------+---------+
|
|
1542
1542
|
|
|
|
1543
1543
|
+--------v---------+
|
|
@@ -27,7 +27,7 @@ export function registerCreateEvalSuitePrompt(server) {
|
|
|
27
27
|
|
|
28
28
|
1. **List existing evaluations** — \`harness_list(resource_type="evaluation")\` — pick eval IDs to include.
|
|
29
29
|
|
|
30
|
-
2. **Create the suite** — \`harness_create(resource_type="eval_suite", body={ name: "${suite_name}", pass_strategy: "all_must_pass" | "weighted_threshold", pass_threshold?: number })\`
|
|
30
|
+
2. **Create the suite** — \`harness_create(resource_type="eval_suite", body={ name: "${suite_name}", pass_strategy: "all_must_pass" | "weighted_threshold", pass_threshold?: number, schedule?: { cron: "0 8 * * *", timezone?: "UTC", enabled?: true } })\`
|
|
31
31
|
|
|
32
32
|
3. **Add members** (choose one pattern)
|
|
33
33
|
- **Add one at a time**: \`harness_create(resource_type="eval_suite_evaluation", params={ suite_id: "<SUITE_ID>" }, body={ evaluation_id: "<EVAL_ID>", is_required: true })\`
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-evaluation.d.ts","sourceRoot":"","sources":["../../src/prompts/run-evaluation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAEzE,wBAAgB,2BAA2B,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"run-evaluation.d.ts","sourceRoot":"","sources":["../../src/prompts/run-evaluation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAEzE,wBAAgB,2BAA2B,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAqDnE"}
|
|
@@ -40,7 +40,9 @@ ${org_id ? `org_id=${org_id}` : "Use default HARNESS_ORG"}; ${project_id ? `proj
|
|
|
40
40
|
## Notes
|
|
41
41
|
- All AI Evals paths are under the Harness platform host (same \`HARNESS_BASE_URL\` as the UI); scope is \`/ai-evals/api/v1/orgs/{org}/projects/{project}/...\`.
|
|
42
42
|
- Ensure \`Harness-Account\` and auth are configured on the MCP server.
|
|
43
|
-
-
|
|
43
|
+
- Resources support **git-backed storage**: pass \`storage_type: "git"\` + \`git_source: { file_path: "..." }\` on create to reference YAML definitions in a repo instead of managing inline.
|
|
44
|
+
- For comparing runs: \`harness_execute(resource_type="eval_run", action="compare", params={ run_ids: "uuid1,uuid2,uuid3" })\` (no resource_id needed for compare).
|
|
45
|
+
- For git-backed evals, pass \`branch\` in the trigger body to run against a PR branch.`,
|
|
44
46
|
},
|
|
45
47
|
},
|
|
46
48
|
],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-evaluation.js","sourceRoot":"","sources":["../../src/prompts/run-evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,CAAC,MAAM,QAAQ,CAAC;AAG5B,MAAM,UAAU,2BAA2B,CAAC,MAAiB;IAC3D,MAAM,CAAC,cAAc,CACnB,gBAAgB,EAChB;QACE,WAAW,EACT,oHAAoH;QACtH,UAAU,EAAE;YACV,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0EAA0E,CAAC;YACrG,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,CAAC,QAAQ,EAAE;YAChE,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC,QAAQ,EAAE;SACzE;KACF,EACD,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;QACvC,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,MAAe;gBACrB,OAAO,EAAE;oBACP,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE;;;EAGhB,IAAI;;;EAGJ,MAAM,CAAC,CAAC,CAAC,UAAU,MAAM,EAAE,CAAC,CAAC,CAAC,yBAAyB,KAAK,UAAU,CAAC,CAAC,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC,CAAC,6BAA6B
|
|
1
|
+
{"version":3,"file":"run-evaluation.js","sourceRoot":"","sources":["../../src/prompts/run-evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,CAAC,MAAM,QAAQ,CAAC;AAG5B,MAAM,UAAU,2BAA2B,CAAC,MAAiB;IAC3D,MAAM,CAAC,cAAc,CACnB,gBAAgB,EAChB;QACE,WAAW,EACT,oHAAoH;QACtH,UAAU,EAAE;YACV,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0EAA0E,CAAC;YACrG,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,CAAC,QAAQ,EAAE;YAChE,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC,CAAC,QAAQ,EAAE;SACzE;KACF,EACD,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;QACvC,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,MAAe;gBACrB,OAAO,EAAE;oBACP,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE;;;EAGhB,IAAI;;;EAGJ,MAAM,CAAC,CAAC,CAAC,UAAU,MAAM,EAAE,CAAC,CAAC,CAAC,yBAAyB,KAAK,UAAU,CAAC,CAAC,CAAC,cAAc,UAAU,EAAE,CAAC,CAAC,CAAC,6BAA6B;;;;;;;;;;;;;;;;;;;;;;;wFAuB7C;iBAC7E;aACF;SACF;KACF,CAAC,CACH,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-evals.d.ts","sourceRoot":"","sources":["../../../src/registry/toolsets/ai-evals.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,KAAK,EAAiC,iBAAiB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"ai-evals.d.ts","sourceRoot":"","sources":["../../../src/registry/toolsets/ai-evals.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,KAAK,EAAiC,iBAAiB,EAAE,MAAM,aAAa,CAAC;AA4jBpF,eAAO,MAAM,cAAc,EAAE,iBAitC5B,CAAC"}
|
|
@@ -24,10 +24,12 @@ const createDatasetSchema = {
|
|
|
24
24
|
name: "items",
|
|
25
25
|
type: "array",
|
|
26
26
|
required: false,
|
|
27
|
-
description: "Inline dataset items (CreateDatasetItemRequest[])",
|
|
27
|
+
description: "Inline dataset items (CreateDatasetItemRequest[]); must be empty when storage_type='git'",
|
|
28
28
|
itemType: "object",
|
|
29
29
|
},
|
|
30
30
|
{ name: "metadata", type: "object", required: false, description: "Arbitrary metadata" },
|
|
31
|
+
{ name: "storage_type", type: "string", required: false, description: "managed (default) | git" },
|
|
32
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
31
33
|
],
|
|
32
34
|
};
|
|
33
35
|
const updateDatasetSchema = {
|
|
@@ -40,22 +42,25 @@ const updateDatasetSchema = {
|
|
|
40
42
|
name: "items",
|
|
41
43
|
type: "array",
|
|
42
44
|
required: false,
|
|
43
|
-
description: "Replace items (CreateDatasetItemRequest[])",
|
|
45
|
+
description: "Replace items (CreateDatasetItemRequest[]); must be empty when storage_type='git'",
|
|
44
46
|
itemType: "object",
|
|
45
47
|
},
|
|
46
48
|
{ name: "metadata", type: "object", required: false, description: "Metadata" },
|
|
49
|
+
{ name: "storage_type", type: "string", required: false, description: "managed | git (switches storage mode)" },
|
|
50
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when switching to storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
47
51
|
],
|
|
48
52
|
};
|
|
49
53
|
const createDatasetItemSchema = {
|
|
50
54
|
description: "Create dataset item",
|
|
51
55
|
fields: [
|
|
52
|
-
{ name: "id", type: "string", required: false, description: "Business id for the row" },
|
|
56
|
+
{ name: "id", type: "string", required: false, description: "Business id for the row (required for CLI/backend run correlation)" },
|
|
53
57
|
{ name: "input", type: "object", required: true, description: "Model input (JSON)" },
|
|
54
58
|
{ name: "expected_output", type: "object", required: false, description: "Expected output" },
|
|
55
|
-
{ name: "precomputed_output", type: "object", required: false, description: "Precomputed target output" },
|
|
56
|
-
{ name: "context", type: "array", required: false, description: "
|
|
57
|
-
{ name: "expected_tools", type: "array", required: false, description: "Expected tool names", itemType: "string" },
|
|
58
|
-
{ name: "
|
|
59
|
+
{ name: "precomputed_output", type: "object", required: false, description: "Precomputed target output for offline/metrics-only scoring" },
|
|
60
|
+
{ name: "context", type: "array", required: false, description: "Retrieved chunks for RAG evaluation", itemType: "string" },
|
|
61
|
+
{ name: "expected_tools", type: "array", required: false, description: "Expected tool names for agent evaluation", itemType: "string" },
|
|
62
|
+
{ name: "expected_tool_calls", type: "array", required: false, description: "Structured tool call expectations (name + arguments) for ToolArgumentMatchMetric", itemType: "object" },
|
|
63
|
+
{ name: "comments", type: "string", required: false, description: "Internal notes, not used in evaluation" },
|
|
59
64
|
{ name: "metadata", type: "object", required: false, description: "Extra metadata" },
|
|
60
65
|
{ name: "sort_order", type: "number", required: false, description: "Sort position" },
|
|
61
66
|
],
|
|
@@ -65,10 +70,11 @@ const updateDatasetItemSchema = {
|
|
|
65
70
|
fields: [
|
|
66
71
|
{ name: "input", type: "object", required: false, description: "Input" },
|
|
67
72
|
{ name: "expected_output", type: "object", required: false, description: "Expected output" },
|
|
68
|
-
{ name: "precomputed_output", type: "object", required: false, description: "Precomputed target output" },
|
|
69
|
-
{ name: "context", type: "array", required: false, description: "
|
|
70
|
-
{ name: "expected_tools", type: "array", required: false, description: "Expected tool names", itemType: "string" },
|
|
71
|
-
{ name: "
|
|
73
|
+
{ name: "precomputed_output", type: "object", required: false, description: "Precomputed target output for offline/metrics-only scoring" },
|
|
74
|
+
{ name: "context", type: "array", required: false, description: "Retrieved chunks for RAG evaluation", itemType: "string" },
|
|
75
|
+
{ name: "expected_tools", type: "array", required: false, description: "Expected tool names for agent evaluation", itemType: "string" },
|
|
76
|
+
{ name: "expected_tool_calls", type: "array", required: false, description: "Structured tool call expectations (name + arguments) for ToolArgumentMatchMetric", itemType: "object" },
|
|
77
|
+
{ name: "comments", type: "string", required: false, description: "Internal notes, not used in evaluation" },
|
|
72
78
|
{ name: "metadata", type: "object", required: false, description: "Metadata" },
|
|
73
79
|
{ name: "sort_order", type: "number", required: false, description: "Sort position" },
|
|
74
80
|
],
|
|
@@ -87,6 +93,8 @@ const createEvalSchema = {
|
|
|
87
93
|
{ name: "concurrency", type: "number", required: false, description: "Parallelism (default 5, min 1)" },
|
|
88
94
|
{ name: "cost_limit_usd", type: "number", required: false, description: "Max cost in USD" },
|
|
89
95
|
{ name: "timeout_per_item_ms", type: "number", required: false, description: "Per-item timeout ms (default 30000, min 1000)" },
|
|
96
|
+
{ name: "storage_type", type: "string", required: false, description: "managed (default) | git" },
|
|
97
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
90
98
|
],
|
|
91
99
|
};
|
|
92
100
|
const updateEvalSchema = {
|
|
@@ -104,6 +112,8 @@ const updateEvalSchema = {
|
|
|
104
112
|
{ name: "concurrency", type: "number", required: false, description: "Parallelism (min 1)" },
|
|
105
113
|
{ name: "cost_limit_usd", type: "number", required: false, description: "Max cost in USD" },
|
|
106
114
|
{ name: "timeout_per_item_ms", type: "number", required: false, description: "Per-item timeout ms (min 1000)" },
|
|
115
|
+
{ name: "storage_type", type: "string", required: false, description: "managed | git (switches storage mode)" },
|
|
116
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when switching to storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
107
117
|
],
|
|
108
118
|
};
|
|
109
119
|
const triggerEvalRunSchema = {
|
|
@@ -120,19 +130,23 @@ const triggerEvalRunSchema = {
|
|
|
120
130
|
description: "RunInputs overrides: { model_id?, target_id?, dataset_id?, metric_set_id?, variables? }",
|
|
121
131
|
},
|
|
122
132
|
{ name: "input_set_id", type: "string", required: false, description: "Saved input set id" },
|
|
133
|
+
{ name: "branch", type: "string", required: false, description: "Override git branch (e.g. run against a PR branch)" },
|
|
123
134
|
],
|
|
124
135
|
};
|
|
125
136
|
const createRunSchema = {
|
|
126
137
|
description: "Create run. Provide eval_id XOR suite (not both).",
|
|
127
138
|
fields: [
|
|
128
139
|
{ name: "eval_id", type: "string", required: false, description: "Eval UUID (mutually exclusive with suite)" },
|
|
129
|
-
{ name: "suite", type: "object", required: false, description: "
|
|
140
|
+
{ name: "suite", type: "object", required: false, description: "Full suite definition (mutually exclusive with eval_id)" },
|
|
130
141
|
{ name: "name", type: "string", required: false, description: "Run name" },
|
|
131
142
|
{ name: "pass_threshold", type: "number", required: false, description: "Pass threshold" },
|
|
132
143
|
{ name: "dataset_id", type: "string", required: false, description: "Dataset UUID" },
|
|
133
|
-
{ name: "dataset_snapshot", type: "object", required: false, description: "Dataset snapshot" },
|
|
144
|
+
{ name: "dataset_snapshot", type: "object", required: false, description: "Dataset snapshot (when dataset_id is omitted)" },
|
|
134
145
|
{ name: "variant_id", type: "string", required: false, description: "Variant identifier" },
|
|
135
|
-
{ name: "
|
|
146
|
+
{ name: "suite_run_id", type: "string", required: false, description: "Parent SuiteRun UUID (links child run to suite run)" },
|
|
147
|
+
{ name: "target_id", type: "string", required: false, description: "Target UUID (for single-eval runs)" },
|
|
148
|
+
{ name: "metric_set_id", type: "string", required: false, description: "MetricSet UUID (for single-eval runs)" },
|
|
149
|
+
{ name: "environment", type: "string", required: false, description: "Execution environment (local, ci, prod)" },
|
|
136
150
|
{ name: "metadata", type: "object", required: false, description: "Arbitrary metadata" },
|
|
137
151
|
{ name: "trigger_type", type: "string", required: false, description: "manual | scheduled | api | ci (default manual)" },
|
|
138
152
|
],
|
|
@@ -147,6 +161,7 @@ const updateRunSchema = {
|
|
|
147
161
|
{ name: "success_count", type: "number", required: false, description: "Successful items" },
|
|
148
162
|
{ name: "failed_count", type: "number", required: false, description: "Failed items" },
|
|
149
163
|
{ name: "summary_scores", type: "object", required: false, description: "Aggregated scores { metric_name: float }" },
|
|
164
|
+
{ name: "git_commit_sha", type: "string", required: false, description: "Resolved commit SHA (max 64 chars, set once, ignored on subsequent updates)" },
|
|
150
165
|
],
|
|
151
166
|
};
|
|
152
167
|
const rescoreSchema = {
|
|
@@ -165,9 +180,9 @@ const createMetricSchema = {
|
|
|
165
180
|
description: "Create custom metric",
|
|
166
181
|
fields: [
|
|
167
182
|
{ name: "name", type: "string", required: true, description: "Metric name" },
|
|
168
|
-
{ name: "type", type: "string", required: true, description: "
|
|
183
|
+
{ name: "type", type: "string", required: true, description: "heuristic | llm | embedding | code | composite" },
|
|
169
184
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
170
|
-
{ name: "kind", type: "string", required: false, description: "
|
|
185
|
+
{ name: "kind", type: "string", required: false, description: "harness-evals metric kind identifier (e.g. exact_match, contains, levenshtein)" },
|
|
171
186
|
{ name: "config", type: "object", required: false, description: "Metric config JSON" },
|
|
172
187
|
{ name: "default_threshold", type: "number", required: false, description: "Default threshold 0-1 (default 0.8)" },
|
|
173
188
|
{ name: "tags", type: "array", required: false, description: "Tags", itemType: "string" },
|
|
@@ -191,6 +206,8 @@ const createMetricSetSchema = {
|
|
|
191
206
|
{ name: "name", type: "string", required: true, description: "Name" },
|
|
192
207
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
193
208
|
{ name: "tags", type: "array", required: false, description: "Tags", itemType: "string" },
|
|
209
|
+
{ name: "judge_model_id", type: "string", required: false, description: "Default judge model UUID for LLM metrics in this set" },
|
|
210
|
+
{ name: "entries", type: "array", required: false, description: "Initial metric entries (AddMetricSetEntryRequest[])", itemType: "object" },
|
|
194
211
|
],
|
|
195
212
|
};
|
|
196
213
|
const updateMetricSetSchema = {
|
|
@@ -199,6 +216,7 @@ const updateMetricSetSchema = {
|
|
|
199
216
|
{ name: "name", type: "string", required: false, description: "Name" },
|
|
200
217
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
201
218
|
{ name: "tags", type: "array", required: false, description: "Tags", itemType: "string" },
|
|
219
|
+
{ name: "judge_model_id", type: "string", required: false, description: "Default judge model UUID for LLM metrics" },
|
|
202
220
|
],
|
|
203
221
|
};
|
|
204
222
|
const addMetricSetEntrySchema = {
|
|
@@ -208,6 +226,7 @@ const addMetricSetEntrySchema = {
|
|
|
208
226
|
{ name: "threshold", type: "number", required: true, description: "Pass threshold 0-1" },
|
|
209
227
|
{ name: "weight", type: "number", required: false, description: "Weight" },
|
|
210
228
|
{ name: "position", type: "number", required: false, description: "Order" },
|
|
229
|
+
{ name: "config", type: "object", required: false, description: "Per-use-site config override (merged over metric's base config at eval time)" },
|
|
211
230
|
],
|
|
212
231
|
};
|
|
213
232
|
const updateMetricSetEntrySchema = {
|
|
@@ -216,6 +235,7 @@ const updateMetricSetEntrySchema = {
|
|
|
216
235
|
{ name: "threshold", type: "number", required: false, description: "Threshold" },
|
|
217
236
|
{ name: "weight", type: "number", required: false, description: "Weight" },
|
|
218
237
|
{ name: "position", type: "number", required: false, description: "Position" },
|
|
238
|
+
{ name: "config", type: "object", required: false, description: "Per-use-site config override (merged over metric's base config at eval time)" },
|
|
219
239
|
],
|
|
220
240
|
};
|
|
221
241
|
const calibrateSchema = {
|
|
@@ -230,11 +250,14 @@ const createSuiteSchema = {
|
|
|
230
250
|
fields: [
|
|
231
251
|
{ name: "name", type: "string", required: true, description: "Suite name" },
|
|
232
252
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
233
|
-
{ name: "purpose", type: "string", required: false, description: "
|
|
253
|
+
{ name: "purpose", type: "string", required: false, description: "pr_gate | cd_gate | release_gate | custom (default custom)" },
|
|
234
254
|
{ name: "pass_strategy", type: "string", required: false, description: "all_must_pass | weighted_threshold (default all_must_pass)" },
|
|
235
255
|
{ name: "pass_threshold", type: "number", required: false, description: "0-1, used when strategy is weighted_threshold" },
|
|
236
256
|
{ name: "is_blocking", type: "boolean", required: false, description: "Blocking suite (default true)" },
|
|
237
257
|
{ name: "triggered_by", type: "string", required: false, description: "Who created the suite" },
|
|
258
|
+
{ name: "schedule", type: "object", required: false, description: "Cron schedule: { cron: string, timezone?: string (default UTC), enabled?: boolean (default true) }" },
|
|
259
|
+
{ name: "storage_type", type: "string", required: false, description: "managed (default) | git" },
|
|
260
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
238
261
|
],
|
|
239
262
|
};
|
|
240
263
|
const updateSuiteSchema = {
|
|
@@ -242,10 +265,13 @@ const updateSuiteSchema = {
|
|
|
242
265
|
fields: [
|
|
243
266
|
{ name: "name", type: "string", required: false, description: "Name" },
|
|
244
267
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
245
|
-
{ name: "purpose", type: "string", required: false, description: "
|
|
268
|
+
{ name: "purpose", type: "string", required: false, description: "pr_gate | cd_gate | release_gate | custom" },
|
|
246
269
|
{ name: "pass_strategy", type: "string", required: false, description: "all_must_pass | weighted_threshold" },
|
|
247
270
|
{ name: "pass_threshold", type: "number", required: false, description: "Pass threshold 0-1" },
|
|
248
271
|
{ name: "is_blocking", type: "boolean", required: false, description: "Blocking suite" },
|
|
272
|
+
{ name: "schedule", type: "object", required: false, description: "Cron schedule: { cron, timezone?, enabled? } — set null to remove" },
|
|
273
|
+
{ name: "storage_type", type: "string", required: false, description: "managed | git (switches storage mode)" },
|
|
274
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when switching to storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
249
275
|
],
|
|
250
276
|
};
|
|
251
277
|
const addSuiteEntrySchema = {
|
|
@@ -272,8 +298,9 @@ const triggerSuiteRunSchema = {
|
|
|
272
298
|
description: "Trigger suite run",
|
|
273
299
|
fields: [
|
|
274
300
|
{ name: "triggered_by", type: "string", required: false, description: "Who triggered the run" },
|
|
275
|
-
{ name: "trigger_type", type: "string", required: false, description: "manual | ci |
|
|
276
|
-
{ name: "suite_path", type: "string", required: false, description: "Suite YAML path (git-backed)" },
|
|
301
|
+
{ name: "trigger_type", type: "string", required: false, description: "manual | api | ci | scheduled (default manual)" },
|
|
302
|
+
{ name: "suite_path", type: "string", required: false, description: "Suite YAML path (git-backed, overrides suite.source_path)" },
|
|
303
|
+
{ name: "branch", type: "string", required: false, description: "Override git branch (e.g. run against a PR branch)" },
|
|
277
304
|
{
|
|
278
305
|
name: "run_inputs",
|
|
279
306
|
type: "object",
|
|
@@ -290,15 +317,18 @@ const triggerSuiteRunSchema = {
|
|
|
290
317
|
],
|
|
291
318
|
};
|
|
292
319
|
const createTargetSchema = {
|
|
293
|
-
description: "Create target",
|
|
320
|
+
description: "Create target. For managed: type + config are required. For git-backed: omit type/config and provide storage_type='git' + git_source.",
|
|
294
321
|
fields: [
|
|
295
322
|
{ name: "name", type: "string", required: true, description: "Name" },
|
|
296
|
-
{ name: "type", type: "string", required:
|
|
297
|
-
{ name: "config", type: "object", required:
|
|
323
|
+
{ name: "type", type: "string", required: false, description: "prompt | agent | precomputed (required when storage_type='managed', omit for git)" },
|
|
324
|
+
{ name: "config", type: "object", required: false, description: "Target config (required when storage_type='managed', omit for git)" },
|
|
298
325
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
299
326
|
{ name: "tags", type: "array", required: false, description: "Tags", itemType: "string" },
|
|
300
327
|
{ name: "is_active", type: "boolean", required: false, description: "Active (default true)" },
|
|
301
328
|
{ name: "env_secrets", type: "object", required: false, description: "Env var to Harness secret ref mapping" },
|
|
329
|
+
{ name: "connector_ref", type: "string", required: false, description: "Harness HTTP connector for endpoint configuration" },
|
|
330
|
+
{ name: "storage_type", type: "string", required: false, description: "managed (default) | git" },
|
|
331
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
302
332
|
],
|
|
303
333
|
};
|
|
304
334
|
const updateTargetSchema = {
|
|
@@ -306,15 +336,22 @@ const updateTargetSchema = {
|
|
|
306
336
|
fields: [
|
|
307
337
|
{ name: "name", type: "string", required: false, description: "Name" },
|
|
308
338
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
339
|
+
{ name: "type", type: "string", required: false, description: "prompt | agent | precomputed" },
|
|
309
340
|
{ name: "config", type: "object", required: false, description: "Config" },
|
|
310
341
|
{ name: "tags", type: "array", required: false, description: "Tags", itemType: "string" },
|
|
311
342
|
{ name: "is_active", type: "boolean", required: false, description: "Active" },
|
|
312
343
|
{ name: "env_secrets", type: "object", required: false, description: "Env var to Harness secret ref mapping" },
|
|
344
|
+
{ name: "connector_ref", type: "string", required: false, description: "Harness HTTP connector for endpoint configuration" },
|
|
345
|
+
{ name: "storage_type", type: "string", required: false, description: "managed | git (switches storage mode)" },
|
|
346
|
+
{ name: "git_source", type: "object", required: false, description: "Git location (required when switching to storage_type='git'): { connector_ref?, repo?, branch?, file_path }" },
|
|
313
347
|
],
|
|
314
348
|
};
|
|
315
349
|
const testTargetSchema = {
|
|
316
350
|
description: "Test target invocation",
|
|
317
|
-
fields: [
|
|
351
|
+
fields: [
|
|
352
|
+
{ name: "input", type: "string", required: true, description: "Sample input string" },
|
|
353
|
+
{ name: "item_identifier", type: "string", required: false, description: "Dataset item identifier (used by precomputed targets to look up output)" },
|
|
354
|
+
],
|
|
318
355
|
};
|
|
319
356
|
const uploadOutputsSchema = {
|
|
320
357
|
description: "Upload static target outputs",
|
|
@@ -332,10 +369,11 @@ const createModelSchema = {
|
|
|
332
369
|
description: "Register AI model",
|
|
333
370
|
fields: [
|
|
334
371
|
{ name: "name", type: "string", required: true, description: "Display name" },
|
|
335
|
-
{ name: "provider", type: "string", required: true, description: "openai | anthropic |
|
|
372
|
+
{ name: "provider", type: "string", required: true, description: "openai | anthropic | google | azure | custom" },
|
|
336
373
|
{ name: "model_id", type: "string", required: true, description: "Provider model id" },
|
|
337
374
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
338
375
|
{ name: "api_key_secret_ref", type: "string", required: false, description: "Harness secret ref for API key" },
|
|
376
|
+
{ name: "connector_ref", type: "string", required: false, description: "Harness connector identifier for LLM credentials" },
|
|
339
377
|
{ name: "default_temperature", type: "number", required: false, description: "Default temperature 0-2" },
|
|
340
378
|
{ name: "default_max_tokens", type: "number", required: false, description: "Default max tokens (min 1)" },
|
|
341
379
|
{ name: "default_top_p", type: "number", required: false, description: "Default top_p 0-1" },
|
|
@@ -349,6 +387,7 @@ const updateModelSchema = {
|
|
|
349
387
|
fields: [
|
|
350
388
|
{ name: "name", type: "string", required: false, description: "Name" },
|
|
351
389
|
{ name: "description", type: "string", required: false, description: "Description" },
|
|
390
|
+
{ name: "connector_ref", type: "string", required: false, description: "Harness connector identifier for LLM credentials" },
|
|
352
391
|
{ name: "default_temperature", type: "number", required: false, description: "Temperature 0-2" },
|
|
353
392
|
{ name: "default_max_tokens", type: "number", required: false, description: "Max tokens (min 1)" },
|
|
354
393
|
{ name: "default_top_p", type: "number", required: false, description: "Top_p 0-1" },
|
|
@@ -465,6 +504,12 @@ const importSuiteYamlSchema = {
|
|
|
465
504
|
{ name: "dry_run", type: "boolean", required: false, description: "If true, validate the YAML without creating any entities (default false)" },
|
|
466
505
|
],
|
|
467
506
|
};
|
|
507
|
+
const bulkUpsertDatasetItemsSchema = {
|
|
508
|
+
description: "Bulk upsert dataset items (insert or update by business ID)",
|
|
509
|
+
fields: [
|
|
510
|
+
{ name: "items", type: "array", required: true, description: "Dataset items to upsert (CreateDatasetItemRequest[])", itemType: "object" },
|
|
511
|
+
],
|
|
512
|
+
};
|
|
468
513
|
/** Merge harness_execute `body` into JSON POST body */
|
|
469
514
|
function bodyFromInput(input) {
|
|
470
515
|
const b = input.body;
|
|
@@ -480,19 +525,21 @@ export const aiEvalsToolset = {
|
|
|
480
525
|
name: "ai-evals",
|
|
481
526
|
displayName: "AI Evals",
|
|
482
527
|
description: "Harness AI Evals control plane: datasets, evaluations, runs, metrics, metric sets, suites, targets, models, annotations, analytics, registry, git settings.",
|
|
483
|
-
optIn:
|
|
528
|
+
optIn: false,
|
|
484
529
|
resources: [
|
|
485
530
|
// --- Datasets ---
|
|
486
531
|
{
|
|
487
532
|
resourceType: "eval_dataset",
|
|
488
533
|
displayName: "AI Evals Dataset",
|
|
489
|
-
description: "Evaluation dataset (JSONL-backed
|
|
534
|
+
description: "Evaluation dataset (managed JSONL rows or git-backed). CRUD + items sub-resource via eval_dataset_item.",
|
|
490
535
|
toolset: "ai-evals",
|
|
491
536
|
scope: "project",
|
|
492
537
|
scopeOptional: true,
|
|
493
538
|
headerBasedScoping: true,
|
|
494
539
|
identifierFields: ["dataset_id"],
|
|
495
|
-
listFilterFields: [
|
|
540
|
+
listFilterFields: [
|
|
541
|
+
{ name: "search", description: "Search by name, identifier, or description" },
|
|
542
|
+
],
|
|
496
543
|
relatedResources: [
|
|
497
544
|
{ resourceType: "eval_dataset_item", relationship: "contains", description: "Dataset rows" },
|
|
498
545
|
{ resourceType: "evaluation", relationship: "uses", description: "Evals reference datasets" },
|
|
@@ -503,7 +550,7 @@ export const aiEvalsToolset = {
|
|
|
503
550
|
path: "",
|
|
504
551
|
pathBuilder: (input, config) => `${base(input, config)}/dataset`,
|
|
505
552
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
506
|
-
queryParams: listQ,
|
|
553
|
+
queryParams: { ...listQ, search: "search" },
|
|
507
554
|
responseExtractor: aiEvalsListExtract,
|
|
508
555
|
description: "List datasets",
|
|
509
556
|
},
|
|
@@ -645,6 +692,18 @@ export const aiEvalsToolset = {
|
|
|
645
692
|
description: "Delete item",
|
|
646
693
|
},
|
|
647
694
|
},
|
|
695
|
+
executeActions: {
|
|
696
|
+
bulk_upsert: {
|
|
697
|
+
method: "PATCH",
|
|
698
|
+
path: "",
|
|
699
|
+
pathBuilder: (input, config) => `${base(input, config)}/dataset/${input.dataset_id}/items/bulk`,
|
|
700
|
+
operationPolicy: { risk: "low_write", retryPolicy: "do_not_retry" },
|
|
701
|
+
bodyBuilder: bodyFromInput,
|
|
702
|
+
bodySchema: bulkUpsertDatasetItemsSchema,
|
|
703
|
+
responseExtractor: aiEvalsArrayExtract,
|
|
704
|
+
actionDescription: "Bulk upsert dataset items by business ID. Body: { items: CreateDatasetItemRequest[] }",
|
|
705
|
+
},
|
|
706
|
+
},
|
|
648
707
|
},
|
|
649
708
|
// --- Evaluations ---
|
|
650
709
|
{
|
|
@@ -662,6 +721,9 @@ export const aiEvalsToolset = {
|
|
|
662
721
|
description: "Filter by status",
|
|
663
722
|
enum: ["active", "draft", "archived"],
|
|
664
723
|
},
|
|
724
|
+
{ name: "target_id", description: "Filter by target UUID(s)" },
|
|
725
|
+
{ name: "metric_set_id", description: "Filter by metric set UUID" },
|
|
726
|
+
{ name: "search", description: "Search by name or description" },
|
|
665
727
|
],
|
|
666
728
|
executeHint: "Run an eval with harness_execute(resource_type='evaluation', action='run', resource_id=EVAL_ID, body={...}).",
|
|
667
729
|
operations: {
|
|
@@ -670,7 +732,7 @@ export const aiEvalsToolset = {
|
|
|
670
732
|
path: "",
|
|
671
733
|
pathBuilder: (input, config) => `${base(input, config)}/evals`,
|
|
672
734
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
673
|
-
queryParams: { ...listQ, status: "status" },
|
|
735
|
+
queryParams: { ...listQ, status: "status", target_id: "target_id", metric_set_id: "metric_set_id", search: "search" },
|
|
674
736
|
responseExtractor: aiEvalsListExtract,
|
|
675
737
|
description: "List evals",
|
|
676
738
|
},
|
|
@@ -747,15 +809,14 @@ export const aiEvalsToolset = {
|
|
|
747
809
|
{
|
|
748
810
|
resourceType: "eval_run",
|
|
749
811
|
displayName: "AI Evals Run",
|
|
750
|
-
description: "A single evaluation run. Compare runs or rescore via execute actions.",
|
|
812
|
+
description: "A single evaluation run. Compare runs or rescore via execute actions. To filter by eval_id, use the eval_run_by_eval resource instead.",
|
|
751
813
|
toolset: "ai-evals",
|
|
752
814
|
scope: "project",
|
|
753
815
|
scopeOptional: true,
|
|
754
816
|
headerBasedScoping: true,
|
|
755
817
|
identifierFields: ["run_id"],
|
|
756
818
|
listFilterFields: [
|
|
757
|
-
{ name: "
|
|
758
|
-
{ name: "status", description: "Filter by run status" },
|
|
819
|
+
{ name: "target_id", description: "Filter runs by target UUID" },
|
|
759
820
|
],
|
|
760
821
|
relatedResources: [
|
|
761
822
|
{ resourceType: "eval_run_item", relationship: "contains", description: "Per-item results" },
|
|
@@ -766,9 +827,9 @@ export const aiEvalsToolset = {
|
|
|
766
827
|
path: "",
|
|
767
828
|
pathBuilder: (input, config) => `${base(input, config)}/runs`,
|
|
768
829
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
769
|
-
queryParams: { ...listQ,
|
|
830
|
+
queryParams: { ...listQ, target_id: "target_id" },
|
|
770
831
|
responseExtractor: aiEvalsListExtract,
|
|
771
|
-
description: "List runs in project
|
|
832
|
+
description: "List runs in project. Use eval_run_by_eval resource to filter by eval_id.",
|
|
772
833
|
},
|
|
773
834
|
get: {
|
|
774
835
|
method: "GET",
|
|
@@ -967,19 +1028,22 @@ export const aiEvalsToolset = {
|
|
|
967
1028
|
{
|
|
968
1029
|
resourceType: "eval_metric_set",
|
|
969
1030
|
displayName: "AI Evals Metric Set",
|
|
970
|
-
description: "Grouped metrics with thresholds. Manage entries via eval_metric_set_entry.",
|
|
1031
|
+
description: "Grouped metrics with thresholds and optional judge model. Manage entries via eval_metric_set_entry.",
|
|
971
1032
|
toolset: "ai-evals",
|
|
972
1033
|
scope: "project",
|
|
973
1034
|
scopeOptional: true,
|
|
974
1035
|
headerBasedScoping: true,
|
|
975
1036
|
identifierFields: ["set_id"],
|
|
1037
|
+
listFilterFields: [
|
|
1038
|
+
{ name: "search", description: "Search by name or description" },
|
|
1039
|
+
],
|
|
976
1040
|
operations: {
|
|
977
1041
|
list: {
|
|
978
1042
|
method: "GET",
|
|
979
1043
|
path: "",
|
|
980
1044
|
pathBuilder: (input, config) => `${base(input, config)}/metric-sets`,
|
|
981
1045
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
982
|
-
queryParams: listQ,
|
|
1046
|
+
queryParams: { ...listQ, search: "search" },
|
|
983
1047
|
responseExtractor: aiEvalsListExtract,
|
|
984
1048
|
description: "List metric sets",
|
|
985
1049
|
},
|
|
@@ -1104,7 +1168,7 @@ export const aiEvalsToolset = {
|
|
|
1104
1168
|
{
|
|
1105
1169
|
resourceType: "eval_suite",
|
|
1106
1170
|
displayName: "AI Evals Suite",
|
|
1107
|
-
description: "Multi-eval suite with pass strategy. Members: eval_suite_evaluation.",
|
|
1171
|
+
description: "Multi-eval suite with pass strategy and optional cron schedule. Members: eval_suite_evaluation.",
|
|
1108
1172
|
toolset: "ai-evals",
|
|
1109
1173
|
scope: "project",
|
|
1110
1174
|
scopeOptional: true,
|
|
@@ -1272,20 +1336,23 @@ export const aiEvalsToolset = {
|
|
|
1272
1336
|
{
|
|
1273
1337
|
resourceType: "eval_target",
|
|
1274
1338
|
displayName: "AI Evals Target",
|
|
1275
|
-
description: "Invocation target (prompt,
|
|
1339
|
+
description: "Invocation target (prompt, agent, or precomputed).",
|
|
1276
1340
|
toolset: "ai-evals",
|
|
1277
1341
|
scope: "project",
|
|
1278
1342
|
scopeOptional: true,
|
|
1279
1343
|
headerBasedScoping: true,
|
|
1280
1344
|
identifierFields: ["target_id"],
|
|
1281
|
-
listFilterFields: [
|
|
1345
|
+
listFilterFields: [
|
|
1346
|
+
{ name: "type", description: "prompt | agent | precomputed" },
|
|
1347
|
+
{ name: "search", description: "Search by name or description" },
|
|
1348
|
+
],
|
|
1282
1349
|
operations: {
|
|
1283
1350
|
list: {
|
|
1284
1351
|
method: "GET",
|
|
1285
1352
|
path: "",
|
|
1286
1353
|
pathBuilder: (input, config) => `${base(input, config)}/targets`,
|
|
1287
1354
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
1288
|
-
queryParams: { ...listQ, type: "type" },
|
|
1355
|
+
queryParams: { ...listQ, type: "type", search: "search" },
|
|
1289
1356
|
responseExtractor: aiEvalsListExtract,
|
|
1290
1357
|
description: "List targets",
|
|
1291
1358
|
},
|
|
@@ -1357,6 +1424,24 @@ export const aiEvalsToolset = {
|
|
|
1357
1424
|
actionDescription: "List uploaded static target outputs (paginated).",
|
|
1358
1425
|
bodySchema: { description: "No body", fields: [] },
|
|
1359
1426
|
},
|
|
1427
|
+
export_yaml: {
|
|
1428
|
+
method: "GET",
|
|
1429
|
+
path: "",
|
|
1430
|
+
pathBuilder: (input, config) => `${base(input, config)}/targets/${input.target_id}/export-yaml`,
|
|
1431
|
+
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
1432
|
+
responseExtractor: passthrough,
|
|
1433
|
+
actionDescription: "Export target config as a standalone YAML document.",
|
|
1434
|
+
bodySchema: { description: "No body", fields: [] },
|
|
1435
|
+
},
|
|
1436
|
+
overview: {
|
|
1437
|
+
method: "GET",
|
|
1438
|
+
path: "",
|
|
1439
|
+
pathBuilder: (input, config) => `${base(input, config)}/targets/${input.target_id}/overview`,
|
|
1440
|
+
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
1441
|
+
responseExtractor: passthrough,
|
|
1442
|
+
actionDescription: "Summary metrics and per-eval health trend (total_evals, total_runs, last_run_at, overall_pass_rate, per-eval pass rates).",
|
|
1443
|
+
bodySchema: { description: "No body", fields: [] },
|
|
1444
|
+
},
|
|
1360
1445
|
},
|
|
1361
1446
|
},
|
|
1362
1447
|
{
|
|
@@ -1368,14 +1453,17 @@ export const aiEvalsToolset = {
|
|
|
1368
1453
|
scopeOptional: true,
|
|
1369
1454
|
headerBasedScoping: true,
|
|
1370
1455
|
identifierFields: ["model_id"],
|
|
1371
|
-
listFilterFields: [
|
|
1456
|
+
listFilterFields: [
|
|
1457
|
+
{ name: "active_only", description: "Only active models", type: "boolean" },
|
|
1458
|
+
{ name: "search", description: "Search by name, provider, or model ID" },
|
|
1459
|
+
],
|
|
1372
1460
|
operations: {
|
|
1373
1461
|
list: {
|
|
1374
1462
|
method: "GET",
|
|
1375
1463
|
path: "",
|
|
1376
1464
|
pathBuilder: (input, config) => `${base(input, config)}/models`,
|
|
1377
1465
|
operationPolicy: { risk: "read", retryPolicy: "safe" },
|
|
1378
|
-
queryParams: { ...listQ, active_only: "active_only" },
|
|
1466
|
+
queryParams: { ...listQ, active_only: "active_only", search: "search" },
|
|
1379
1467
|
responseExtractor: aiEvalsListExtract,
|
|
1380
1468
|
description: "List models",
|
|
1381
1469
|
},
|