scorecard-ai-mcp 2.6.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +76 -194
- package/code-tool-types.d.mts +7 -9
- package/code-tool-types.d.mts.map +1 -1
- package/code-tool-types.d.ts +7 -9
- package/code-tool-types.d.ts.map +1 -1
- package/code-tool.d.mts +2 -44
- package/code-tool.d.mts.map +1 -1
- package/code-tool.d.ts +2 -44
- package/code-tool.d.ts.map +1 -1
- package/code-tool.js +60 -8
- package/code-tool.js.map +1 -1
- package/code-tool.mjs +61 -9
- package/code-tool.mjs.map +1 -1
- package/docs-search-tool.d.mts +4 -3
- package/docs-search-tool.d.mts.map +1 -1
- package/docs-search-tool.d.ts +4 -3
- package/docs-search-tool.d.ts.map +1 -1
- package/docs-search-tool.js +2 -1
- package/docs-search-tool.js.map +1 -1
- package/docs-search-tool.mjs +2 -1
- package/docs-search-tool.mjs.map +1 -1
- package/http.d.mts.map +1 -1
- package/http.d.ts.map +1 -1
- package/http.js +2 -22
- package/http.js.map +1 -1
- package/http.mjs +2 -22
- package/http.mjs.map +1 -1
- package/index.js +4 -40
- package/index.js.map +1 -1
- package/index.mjs +4 -40
- package/index.mjs.map +1 -1
- package/options.d.mts +0 -9
- package/options.d.mts.map +1 -1
- package/options.d.ts +0 -9
- package/options.d.ts.map +1 -1
- package/options.js +6 -361
- package/options.js.map +1 -1
- package/options.mjs +6 -361
- package/options.mjs.map +1 -1
- package/package.json +11 -51
- package/server.d.mts +3 -8
- package/server.d.mts.map +1 -1
- package/server.d.ts +3 -8
- package/server.d.ts.map +1 -1
- package/server.js +12 -65
- package/server.js.map +1 -1
- package/server.mjs +11 -62
- package/server.mjs.map +1 -1
- package/src/code-tool-types.ts +7 -9
- package/src/code-tool.ts +66 -12
- package/src/docs-search-tool.ts +2 -1
- package/src/http.ts +3 -24
- package/src/index.ts +5 -52
- package/src/options.ts +7 -388
- package/src/server.ts +12 -72
- package/src/stdio.ts +2 -3
- package/src/{tools/types.ts → types.ts} +1 -1
- package/stdio.d.mts +1 -2
- package/stdio.d.mts.map +1 -1
- package/stdio.d.ts +1 -2
- package/stdio.d.ts.map +1 -1
- package/stdio.js +2 -2
- package/stdio.js.map +1 -1
- package/stdio.mjs +2 -2
- package/stdio.mjs.map +1 -1
- package/{tools/types.d.mts → types.d.mts} +1 -1
- package/types.d.mts.map +1 -0
- package/{tools/types.d.ts → types.d.ts} +1 -1
- package/types.d.ts.map +1 -0
- package/types.js.map +1 -0
- package/types.mjs.map +1 -0
- package/compat.d.mts +0 -58
- package/compat.d.mts.map +0 -1
- package/compat.d.ts +0 -58
- package/compat.d.ts.map +0 -1
- package/compat.js +0 -387
- package/compat.js.map +0 -1
- package/compat.mjs +0 -378
- package/compat.mjs.map +0 -1
- package/dynamic-tools.d.mts +0 -12
- package/dynamic-tools.d.mts.map +0 -1
- package/dynamic-tools.d.ts +0 -12
- package/dynamic-tools.d.ts.map +0 -1
- package/dynamic-tools.js +0 -135
- package/dynamic-tools.js.map +0 -1
- package/dynamic-tools.mjs +0 -132
- package/dynamic-tools.mjs.map +0 -1
- package/filtering.d.mts +0 -3
- package/filtering.d.mts.map +0 -1
- package/filtering.d.ts +0 -3
- package/filtering.d.ts.map +0 -1
- package/filtering.js +0 -24
- package/filtering.js.map +0 -1
- package/filtering.mjs +0 -16
- package/filtering.mjs.map +0 -1
- package/src/compat.ts +0 -483
- package/src/dynamic-tools.ts +0 -159
- package/src/filtering.ts +0 -18
- package/src/tools/index.ts +0 -131
- package/src/tools/metrics/create-metrics.ts +0 -383
- package/src/tools/metrics/delete-metrics.ts +0 -54
- package/src/tools/metrics/get-metrics.ts +0 -46
- package/src/tools/metrics/list-metrics.ts +0 -58
- package/src/tools/metrics/update-metrics.ts +0 -383
- package/src/tools/projects/create-projects.ts +0 -57
- package/src/tools/projects/list-projects.ts +0 -62
- package/src/tools/records/create-records.ts +0 -71
- package/src/tools/records/delete-records.ts +0 -54
- package/src/tools/records/list-records.ts +0 -65
- package/src/tools/runs/create-runs.ts +0 -67
- package/src/tools/runs/get-runs.ts +0 -54
- package/src/tools/runs/list-runs.ts +0 -65
- package/src/tools/scores/upsert-scores.ts +0 -65
- package/src/tools/systems/delete-systems.ts +0 -54
- package/src/tools/systems/get-systems.ts +0 -54
- package/src/tools/systems/list-systems.ts +0 -65
- package/src/tools/systems/update-systems.ts +0 -64
- package/src/tools/systems/upsert-systems.ts +0 -66
- package/src/tools/systems/versions/get-systems-versions.ts +0 -56
- package/src/tools/systems/versions/upsert-systems-versions.ts +0 -64
- package/src/tools/testcases/create-testcases.ts +0 -67
- package/src/tools/testcases/delete-testcases.ts +0 -56
- package/src/tools/testcases/get-testcases.ts +0 -54
- package/src/tools/testcases/list-testcases.ts +0 -65
- package/src/tools/testcases/update-testcases.ts +0 -59
- package/src/tools/testsets/create-testsets.ts +0 -94
- package/src/tools/testsets/delete-testsets.ts +0 -54
- package/src/tools/testsets/get-testsets.ts +0 -54
- package/src/tools/testsets/list-testsets.ts +0 -65
- package/src/tools/testsets/update-testsets.ts +0 -94
- package/src/tools.ts +0 -1
- package/tools/index.d.mts +0 -10
- package/tools/index.d.mts.map +0 -1
- package/tools/index.d.ts +0 -10
- package/tools/index.d.ts.map +0 -1
- package/tools/index.js +0 -115
- package/tools/index.js.map +0 -1
- package/tools/index.mjs +0 -108
- package/tools/index.mjs.map +0 -1
- package/tools/metrics/create-metrics.d.mts +0 -51
- package/tools/metrics/create-metrics.d.mts.map +0 -1
- package/tools/metrics/create-metrics.d.ts +0 -51
- package/tools/metrics/create-metrics.d.ts.map +0 -1
- package/tools/metrics/create-metrics.js +0 -381
- package/tools/metrics/create-metrics.js.map +0 -1
- package/tools/metrics/create-metrics.mjs +0 -374
- package/tools/metrics/create-metrics.mjs.map +0 -1
- package/tools/metrics/delete-metrics.d.mts +0 -51
- package/tools/metrics/delete-metrics.d.mts.map +0 -1
- package/tools/metrics/delete-metrics.d.ts +0 -51
- package/tools/metrics/delete-metrics.d.ts.map +0 -1
- package/tools/metrics/delete-metrics.js +0 -54
- package/tools/metrics/delete-metrics.js.map +0 -1
- package/tools/metrics/delete-metrics.mjs +0 -47
- package/tools/metrics/delete-metrics.mjs.map +0 -1
- package/tools/metrics/get-metrics.d.mts +0 -51
- package/tools/metrics/get-metrics.d.mts.map +0 -1
- package/tools/metrics/get-metrics.d.ts +0 -51
- package/tools/metrics/get-metrics.d.ts.map +0 -1
- package/tools/metrics/get-metrics.js +0 -48
- package/tools/metrics/get-metrics.js.map +0 -1
- package/tools/metrics/get-metrics.mjs +0 -41
- package/tools/metrics/get-metrics.mjs.map +0 -1
- package/tools/metrics/list-metrics.d.mts +0 -51
- package/tools/metrics/list-metrics.d.mts.map +0 -1
- package/tools/metrics/list-metrics.d.ts +0 -51
- package/tools/metrics/list-metrics.d.ts.map +0 -1
- package/tools/metrics/list-metrics.js +0 -57
- package/tools/metrics/list-metrics.js.map +0 -1
- package/tools/metrics/list-metrics.mjs +0 -50
- package/tools/metrics/list-metrics.mjs.map +0 -1
- package/tools/metrics/update-metrics.d.mts +0 -51
- package/tools/metrics/update-metrics.d.mts.map +0 -1
- package/tools/metrics/update-metrics.d.ts +0 -51
- package/tools/metrics/update-metrics.d.ts.map +0 -1
- package/tools/metrics/update-metrics.js +0 -381
- package/tools/metrics/update-metrics.js.map +0 -1
- package/tools/metrics/update-metrics.mjs +0 -374
- package/tools/metrics/update-metrics.mjs.map +0 -1
- package/tools/projects/create-projects.d.mts +0 -51
- package/tools/projects/create-projects.d.mts.map +0 -1
- package/tools/projects/create-projects.d.ts +0 -51
- package/tools/projects/create-projects.d.ts.map +0 -1
- package/tools/projects/create-projects.js +0 -57
- package/tools/projects/create-projects.js.map +0 -1
- package/tools/projects/create-projects.mjs +0 -50
- package/tools/projects/create-projects.mjs.map +0 -1
- package/tools/projects/list-projects.d.mts +0 -51
- package/tools/projects/list-projects.d.mts.map +0 -1
- package/tools/projects/list-projects.d.ts +0 -51
- package/tools/projects/list-projects.d.ts.map +0 -1
- package/tools/projects/list-projects.js +0 -60
- package/tools/projects/list-projects.js.map +0 -1
- package/tools/projects/list-projects.mjs +0 -53
- package/tools/projects/list-projects.mjs.map +0 -1
- package/tools/records/create-records.d.mts +0 -51
- package/tools/records/create-records.d.mts.map +0 -1
- package/tools/records/create-records.d.ts +0 -51
- package/tools/records/create-records.d.ts.map +0 -1
- package/tools/records/create-records.js +0 -71
- package/tools/records/create-records.js.map +0 -1
- package/tools/records/create-records.mjs +0 -64
- package/tools/records/create-records.mjs.map +0 -1
- package/tools/records/delete-records.d.mts +0 -51
- package/tools/records/delete-records.d.mts.map +0 -1
- package/tools/records/delete-records.d.ts +0 -51
- package/tools/records/delete-records.d.ts.map +0 -1
- package/tools/records/delete-records.js +0 -54
- package/tools/records/delete-records.js.map +0 -1
- package/tools/records/delete-records.mjs +0 -47
- package/tools/records/delete-records.mjs.map +0 -1
- package/tools/records/list-records.d.mts +0 -51
- package/tools/records/list-records.d.mts.map +0 -1
- package/tools/records/list-records.d.ts +0 -51
- package/tools/records/list-records.d.ts.map +0 -1
- package/tools/records/list-records.js +0 -63
- package/tools/records/list-records.js.map +0 -1
- package/tools/records/list-records.mjs +0 -56
- package/tools/records/list-records.mjs.map +0 -1
- package/tools/runs/create-runs.d.mts +0 -51
- package/tools/runs/create-runs.d.mts.map +0 -1
- package/tools/runs/create-runs.d.ts +0 -51
- package/tools/runs/create-runs.d.ts.map +0 -1
- package/tools/runs/create-runs.js +0 -67
- package/tools/runs/create-runs.js.map +0 -1
- package/tools/runs/create-runs.mjs +0 -60
- package/tools/runs/create-runs.mjs.map +0 -1
- package/tools/runs/get-runs.d.mts +0 -51
- package/tools/runs/get-runs.d.mts.map +0 -1
- package/tools/runs/get-runs.d.ts +0 -51
- package/tools/runs/get-runs.d.ts.map +0 -1
- package/tools/runs/get-runs.js +0 -54
- package/tools/runs/get-runs.js.map +0 -1
- package/tools/runs/get-runs.mjs +0 -47
- package/tools/runs/get-runs.mjs.map +0 -1
- package/tools/runs/list-runs.d.mts +0 -51
- package/tools/runs/list-runs.d.mts.map +0 -1
- package/tools/runs/list-runs.d.ts +0 -51
- package/tools/runs/list-runs.d.ts.map +0 -1
- package/tools/runs/list-runs.js +0 -63
- package/tools/runs/list-runs.js.map +0 -1
- package/tools/runs/list-runs.mjs +0 -56
- package/tools/runs/list-runs.mjs.map +0 -1
- package/tools/scores/upsert-scores.d.mts +0 -51
- package/tools/scores/upsert-scores.d.mts.map +0 -1
- package/tools/scores/upsert-scores.d.ts +0 -51
- package/tools/scores/upsert-scores.d.ts.map +0 -1
- package/tools/scores/upsert-scores.js +0 -62
- package/tools/scores/upsert-scores.js.map +0 -1
- package/tools/scores/upsert-scores.mjs +0 -55
- package/tools/scores/upsert-scores.mjs.map +0 -1
- package/tools/systems/delete-systems.d.mts +0 -51
- package/tools/systems/delete-systems.d.mts.map +0 -1
- package/tools/systems/delete-systems.d.ts +0 -51
- package/tools/systems/delete-systems.d.ts.map +0 -1
- package/tools/systems/delete-systems.js +0 -54
- package/tools/systems/delete-systems.js.map +0 -1
- package/tools/systems/delete-systems.mjs +0 -47
- package/tools/systems/delete-systems.mjs.map +0 -1
- package/tools/systems/get-systems.d.mts +0 -51
- package/tools/systems/get-systems.d.mts.map +0 -1
- package/tools/systems/get-systems.d.ts +0 -51
- package/tools/systems/get-systems.d.ts.map +0 -1
- package/tools/systems/get-systems.js +0 -54
- package/tools/systems/get-systems.js.map +0 -1
- package/tools/systems/get-systems.mjs +0 -47
- package/tools/systems/get-systems.mjs.map +0 -1
- package/tools/systems/list-systems.d.mts +0 -51
- package/tools/systems/list-systems.d.mts.map +0 -1
- package/tools/systems/list-systems.d.ts +0 -51
- package/tools/systems/list-systems.d.ts.map +0 -1
- package/tools/systems/list-systems.js +0 -63
- package/tools/systems/list-systems.js.map +0 -1
- package/tools/systems/list-systems.mjs +0 -56
- package/tools/systems/list-systems.mjs.map +0 -1
- package/tools/systems/update-systems.d.mts +0 -51
- package/tools/systems/update-systems.d.mts.map +0 -1
- package/tools/systems/update-systems.d.ts +0 -51
- package/tools/systems/update-systems.d.ts.map +0 -1
- package/tools/systems/update-systems.js +0 -64
- package/tools/systems/update-systems.js.map +0 -1
- package/tools/systems/update-systems.mjs +0 -57
- package/tools/systems/update-systems.mjs.map +0 -1
- package/tools/systems/upsert-systems.d.mts +0 -51
- package/tools/systems/upsert-systems.d.mts.map +0 -1
- package/tools/systems/upsert-systems.d.ts +0 -51
- package/tools/systems/upsert-systems.d.ts.map +0 -1
- package/tools/systems/upsert-systems.js +0 -65
- package/tools/systems/upsert-systems.js.map +0 -1
- package/tools/systems/upsert-systems.mjs +0 -58
- package/tools/systems/upsert-systems.mjs.map +0 -1
- package/tools/systems/versions/get-systems-versions.d.mts +0 -51
- package/tools/systems/versions/get-systems-versions.d.mts.map +0 -1
- package/tools/systems/versions/get-systems-versions.d.ts +0 -51
- package/tools/systems/versions/get-systems-versions.d.ts.map +0 -1
- package/tools/systems/versions/get-systems-versions.js +0 -54
- package/tools/systems/versions/get-systems-versions.js.map +0 -1
- package/tools/systems/versions/get-systems-versions.mjs +0 -47
- package/tools/systems/versions/get-systems-versions.mjs.map +0 -1
- package/tools/systems/versions/upsert-systems-versions.d.mts +0 -51
- package/tools/systems/versions/upsert-systems-versions.d.mts.map +0 -1
- package/tools/systems/versions/upsert-systems-versions.d.ts +0 -51
- package/tools/systems/versions/upsert-systems-versions.d.ts.map +0 -1
- package/tools/systems/versions/upsert-systems-versions.js +0 -61
- package/tools/systems/versions/upsert-systems-versions.js.map +0 -1
- package/tools/systems/versions/upsert-systems-versions.mjs +0 -54
- package/tools/systems/versions/upsert-systems-versions.mjs.map +0 -1
- package/tools/testcases/create-testcases.d.mts +0 -51
- package/tools/testcases/create-testcases.d.mts.map +0 -1
- package/tools/testcases/create-testcases.d.ts +0 -51
- package/tools/testcases/create-testcases.d.ts.map +0 -1
- package/tools/testcases/create-testcases.js +0 -67
- package/tools/testcases/create-testcases.js.map +0 -1
- package/tools/testcases/create-testcases.mjs +0 -60
- package/tools/testcases/create-testcases.mjs.map +0 -1
- package/tools/testcases/delete-testcases.d.mts +0 -51
- package/tools/testcases/delete-testcases.d.mts.map +0 -1
- package/tools/testcases/delete-testcases.d.ts +0 -51
- package/tools/testcases/delete-testcases.d.ts.map +0 -1
- package/tools/testcases/delete-testcases.js +0 -56
- package/tools/testcases/delete-testcases.js.map +0 -1
- package/tools/testcases/delete-testcases.mjs +0 -49
- package/tools/testcases/delete-testcases.mjs.map +0 -1
- package/tools/testcases/get-testcases.d.mts +0 -51
- package/tools/testcases/get-testcases.d.mts.map +0 -1
- package/tools/testcases/get-testcases.d.ts +0 -51
- package/tools/testcases/get-testcases.d.ts.map +0 -1
- package/tools/testcases/get-testcases.js +0 -54
- package/tools/testcases/get-testcases.js.map +0 -1
- package/tools/testcases/get-testcases.mjs +0 -47
- package/tools/testcases/get-testcases.mjs.map +0 -1
- package/tools/testcases/list-testcases.d.mts +0 -51
- package/tools/testcases/list-testcases.d.mts.map +0 -1
- package/tools/testcases/list-testcases.d.ts +0 -51
- package/tools/testcases/list-testcases.d.ts.map +0 -1
- package/tools/testcases/list-testcases.js +0 -63
- package/tools/testcases/list-testcases.js.map +0 -1
- package/tools/testcases/list-testcases.mjs +0 -56
- package/tools/testcases/list-testcases.mjs.map +0 -1
- package/tools/testcases/update-testcases.d.mts +0 -51
- package/tools/testcases/update-testcases.d.mts.map +0 -1
- package/tools/testcases/update-testcases.d.ts +0 -51
- package/tools/testcases/update-testcases.d.ts.map +0 -1
- package/tools/testcases/update-testcases.js +0 -59
- package/tools/testcases/update-testcases.js.map +0 -1
- package/tools/testcases/update-testcases.mjs +0 -52
- package/tools/testcases/update-testcases.mjs.map +0 -1
- package/tools/testsets/create-testsets.d.mts +0 -51
- package/tools/testsets/create-testsets.d.mts.map +0 -1
- package/tools/testsets/create-testsets.d.ts +0 -51
- package/tools/testsets/create-testsets.d.ts.map +0 -1
- package/tools/testsets/create-testsets.js +0 -93
- package/tools/testsets/create-testsets.js.map +0 -1
- package/tools/testsets/create-testsets.mjs +0 -86
- package/tools/testsets/create-testsets.mjs.map +0 -1
- package/tools/testsets/delete-testsets.d.mts +0 -51
- package/tools/testsets/delete-testsets.d.mts.map +0 -1
- package/tools/testsets/delete-testsets.d.ts +0 -51
- package/tools/testsets/delete-testsets.d.ts.map +0 -1
- package/tools/testsets/delete-testsets.js +0 -54
- package/tools/testsets/delete-testsets.js.map +0 -1
- package/tools/testsets/delete-testsets.mjs +0 -47
- package/tools/testsets/delete-testsets.mjs.map +0 -1
- package/tools/testsets/get-testsets.d.mts +0 -51
- package/tools/testsets/get-testsets.d.mts.map +0 -1
- package/tools/testsets/get-testsets.d.ts +0 -51
- package/tools/testsets/get-testsets.d.ts.map +0 -1
- package/tools/testsets/get-testsets.js +0 -54
- package/tools/testsets/get-testsets.js.map +0 -1
- package/tools/testsets/get-testsets.mjs +0 -47
- package/tools/testsets/get-testsets.mjs.map +0 -1
- package/tools/testsets/list-testsets.d.mts +0 -51
- package/tools/testsets/list-testsets.d.mts.map +0 -1
- package/tools/testsets/list-testsets.d.ts +0 -51
- package/tools/testsets/list-testsets.d.ts.map +0 -1
- package/tools/testsets/list-testsets.js +0 -63
- package/tools/testsets/list-testsets.js.map +0 -1
- package/tools/testsets/list-testsets.mjs +0 -56
- package/tools/testsets/list-testsets.mjs.map +0 -1
- package/tools/testsets/update-testsets.d.mts +0 -51
- package/tools/testsets/update-testsets.d.mts.map +0 -1
- package/tools/testsets/update-testsets.d.ts +0 -51
- package/tools/testsets/update-testsets.d.ts.map +0 -1
- package/tools/testsets/update-testsets.js +0 -93
- package/tools/testsets/update-testsets.js.map +0 -1
- package/tools/testsets/update-testsets.mjs +0 -86
- package/tools/testsets/update-testsets.mjs.map +0 -1
- package/tools/types.d.mts.map +0 -1
- package/tools/types.d.ts.map +0 -1
- package/tools/types.js.map +0 -1
- package/tools/types.mjs.map +0 -1
- package/tools.d.mts +0 -2
- package/tools.d.mts.map +0 -1
- package/tools.d.ts +0 -2
- package/tools.d.ts.map +0 -1
- package/tools.js +0 -18
- package/tools.js.map +0 -1
- package/tools.mjs +0 -2
- package/tools.mjs.map +0 -1
- /package/{tools/types.js → types.js} +0 -0
- /package/{tools/types.mjs → types.mjs} +0 -0
package/src/dynamic-tools.ts
DELETED
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
import Scorecard from 'scorecard-ai';
|
|
2
|
-
import { Endpoint, asTextContentResult, ToolCallResult } from './tools/types';
|
|
3
|
-
import { zodToJsonSchema } from 'zod-to-json-schema';
|
|
4
|
-
import { z } from 'zod';
|
|
5
|
-
import { Cabidela } from '@cloudflare/cabidela';
|
|
6
|
-
|
|
7
|
-
function zodToInputSchema(schema: z.ZodSchema) {
|
|
8
|
-
return {
|
|
9
|
-
type: 'object' as const,
|
|
10
|
-
...(zodToJsonSchema(schema) as any),
|
|
11
|
-
};
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* A list of tools that expose all the endpoints in the API dynamically.
|
|
16
|
-
*
|
|
17
|
-
* Instead of exposing every endpoint as its own tool, which uses up too many tokens for LLMs to use at once,
|
|
18
|
-
* we expose a single tool that can be used to search for endpoints by name, resource, operation, or tag, and then
|
|
19
|
-
* a generic endpoint that can be used to invoke any endpoint with the provided arguments.
|
|
20
|
-
*
|
|
21
|
-
* @param endpoints - The endpoints to include in the list.
|
|
22
|
-
*/
|
|
23
|
-
export function dynamicTools(endpoints: Endpoint[]): Endpoint[] {
|
|
24
|
-
const listEndpointsSchema = z.object({
|
|
25
|
-
search_query: z
|
|
26
|
-
.string()
|
|
27
|
-
.optional()
|
|
28
|
-
.describe(
|
|
29
|
-
'An optional search query to filter the endpoints by. Provide a partial name, resource, operation, or tag to filter the endpoints returned.',
|
|
30
|
-
),
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
const listEndpointsTool = {
|
|
34
|
-
metadata: {
|
|
35
|
-
resource: 'dynamic_tools',
|
|
36
|
-
operation: 'read' as const,
|
|
37
|
-
tags: [],
|
|
38
|
-
},
|
|
39
|
-
tool: {
|
|
40
|
-
name: 'list_api_endpoints',
|
|
41
|
-
description: 'List or search for all endpoints in the Scorecard TypeScript API',
|
|
42
|
-
inputSchema: zodToInputSchema(listEndpointsSchema),
|
|
43
|
-
},
|
|
44
|
-
handler: async (
|
|
45
|
-
client: Scorecard,
|
|
46
|
-
args: Record<string, unknown> | undefined,
|
|
47
|
-
): Promise<ToolCallResult> => {
|
|
48
|
-
const query = args && listEndpointsSchema.parse(args).search_query?.trim();
|
|
49
|
-
|
|
50
|
-
const filteredEndpoints =
|
|
51
|
-
query && query.length > 0 ?
|
|
52
|
-
endpoints.filter((endpoint) => {
|
|
53
|
-
const fieldsToMatch = [
|
|
54
|
-
endpoint.tool.name,
|
|
55
|
-
endpoint.tool.description,
|
|
56
|
-
endpoint.metadata.resource,
|
|
57
|
-
endpoint.metadata.operation,
|
|
58
|
-
...endpoint.metadata.tags,
|
|
59
|
-
];
|
|
60
|
-
return fieldsToMatch.some((field) => field && field.toLowerCase().includes(query.toLowerCase()));
|
|
61
|
-
})
|
|
62
|
-
: endpoints;
|
|
63
|
-
|
|
64
|
-
return asTextContentResult({
|
|
65
|
-
tools: filteredEndpoints.map(({ tool, metadata }) => ({
|
|
66
|
-
name: tool.name,
|
|
67
|
-
description: tool.description,
|
|
68
|
-
resource: metadata.resource,
|
|
69
|
-
operation: metadata.operation,
|
|
70
|
-
tags: metadata.tags,
|
|
71
|
-
})),
|
|
72
|
-
});
|
|
73
|
-
},
|
|
74
|
-
};
|
|
75
|
-
|
|
76
|
-
const getEndpointSchema = z.object({
|
|
77
|
-
endpoint: z.string().describe('The name of the endpoint to get the schema for.'),
|
|
78
|
-
});
|
|
79
|
-
const getEndpointTool = {
|
|
80
|
-
metadata: {
|
|
81
|
-
resource: 'dynamic_tools',
|
|
82
|
-
operation: 'read' as const,
|
|
83
|
-
tags: [],
|
|
84
|
-
},
|
|
85
|
-
tool: {
|
|
86
|
-
name: 'get_api_endpoint_schema',
|
|
87
|
-
description:
|
|
88
|
-
'Get the schema for an endpoint in the Scorecard TypeScript API. You can use the schema returned by this tool to invoke an endpoint with the `invoke_api_endpoint` tool.',
|
|
89
|
-
inputSchema: zodToInputSchema(getEndpointSchema),
|
|
90
|
-
},
|
|
91
|
-
handler: async (client: Scorecard, args: Record<string, unknown> | undefined) => {
|
|
92
|
-
if (!args) {
|
|
93
|
-
throw new Error('No endpoint provided');
|
|
94
|
-
}
|
|
95
|
-
const endpointName = getEndpointSchema.parse(args).endpoint;
|
|
96
|
-
|
|
97
|
-
const endpoint = endpoints.find((e) => e.tool.name === endpointName);
|
|
98
|
-
if (!endpoint) {
|
|
99
|
-
throw new Error(`Endpoint ${endpointName} not found`);
|
|
100
|
-
}
|
|
101
|
-
return asTextContentResult(endpoint.tool);
|
|
102
|
-
},
|
|
103
|
-
};
|
|
104
|
-
|
|
105
|
-
const invokeEndpointSchema = z.object({
|
|
106
|
-
endpoint_name: z.string().describe('The name of the endpoint to invoke.'),
|
|
107
|
-
args: z
|
|
108
|
-
.record(z.string(), z.any())
|
|
109
|
-
.describe(
|
|
110
|
-
'The arguments to pass to the endpoint. This must match the schema returned by the `get_api_endpoint_schema` tool.',
|
|
111
|
-
),
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
const invokeEndpointTool = {
|
|
115
|
-
metadata: {
|
|
116
|
-
resource: 'dynamic_tools',
|
|
117
|
-
operation: 'write' as const,
|
|
118
|
-
tags: [],
|
|
119
|
-
},
|
|
120
|
-
tool: {
|
|
121
|
-
name: 'invoke_api_endpoint',
|
|
122
|
-
description:
|
|
123
|
-
'Invoke an endpoint in the Scorecard TypeScript API. Note: use the `list_api_endpoints` tool to get the list of endpoints and `get_api_endpoint_schema` tool to get the schema for an endpoint.',
|
|
124
|
-
inputSchema: zodToInputSchema(invokeEndpointSchema),
|
|
125
|
-
},
|
|
126
|
-
handler: async (
|
|
127
|
-
client: Scorecard,
|
|
128
|
-
args: Record<string, unknown> | undefined,
|
|
129
|
-
): Promise<ToolCallResult> => {
|
|
130
|
-
if (!args) {
|
|
131
|
-
throw new Error('No endpoint provided');
|
|
132
|
-
}
|
|
133
|
-
const { success, data, error } = invokeEndpointSchema.safeParse(args);
|
|
134
|
-
if (!success) {
|
|
135
|
-
throw new Error(`Invalid arguments for endpoint. ${error?.format()}`);
|
|
136
|
-
}
|
|
137
|
-
const { endpoint_name, args: endpointArgs } = data;
|
|
138
|
-
|
|
139
|
-
const endpoint = endpoints.find((e) => e.tool.name === endpoint_name);
|
|
140
|
-
if (!endpoint) {
|
|
141
|
-
throw new Error(
|
|
142
|
-
`Endpoint ${endpoint_name} not found. Use the \`list_api_endpoints\` tool to get the list of available endpoints.`,
|
|
143
|
-
);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
try {
|
|
147
|
-
// Try to validate the arguments for a better error message
|
|
148
|
-
const cabidela = new Cabidela(endpoint.tool.inputSchema, { fullErrors: true });
|
|
149
|
-
cabidela.validate(endpointArgs);
|
|
150
|
-
} catch (error) {
|
|
151
|
-
throw new Error(`Invalid arguments for endpoint ${endpoint_name}:\n${error}`);
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
return await endpoint.handler(client, endpointArgs);
|
|
155
|
-
},
|
|
156
|
-
};
|
|
157
|
-
|
|
158
|
-
return [getEndpointTool, listEndpointsTool, invokeEndpointTool];
|
|
159
|
-
}
|
package/src/filtering.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
|
-
import initJq from 'jq-web';
|
|
3
|
-
|
|
4
|
-
export async function maybeFilter(jqFilter: unknown | undefined, response: any): Promise<any> {
|
|
5
|
-
if (jqFilter && typeof jqFilter === 'string') {
|
|
6
|
-
return await jq(response, jqFilter);
|
|
7
|
-
} else {
|
|
8
|
-
return response;
|
|
9
|
-
}
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
async function jq(json: any, jqFilter: string) {
|
|
13
|
-
return (await initJq).json(json, jqFilter);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export function isJqError(error: any): error is Error {
|
|
17
|
-
return error instanceof Error && 'stderr' in error;
|
|
18
|
-
}
|
package/src/tools/index.ts
DELETED
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
import { Metadata, Endpoint, HandlerFunction } from './types';
|
|
4
|
-
|
|
5
|
-
export { Metadata, Endpoint, HandlerFunction };
|
|
6
|
-
|
|
7
|
-
import create_projects from './projects/create-projects';
|
|
8
|
-
import list_projects from './projects/list-projects';
|
|
9
|
-
import create_testsets from './testsets/create-testsets';
|
|
10
|
-
import update_testsets from './testsets/update-testsets';
|
|
11
|
-
import list_testsets from './testsets/list-testsets';
|
|
12
|
-
import delete_testsets from './testsets/delete-testsets';
|
|
13
|
-
import get_testsets from './testsets/get-testsets';
|
|
14
|
-
import create_testcases from './testcases/create-testcases';
|
|
15
|
-
import update_testcases from './testcases/update-testcases';
|
|
16
|
-
import list_testcases from './testcases/list-testcases';
|
|
17
|
-
import delete_testcases from './testcases/delete-testcases';
|
|
18
|
-
import get_testcases from './testcases/get-testcases';
|
|
19
|
-
import create_runs from './runs/create-runs';
|
|
20
|
-
import list_runs from './runs/list-runs';
|
|
21
|
-
import get_runs from './runs/get-runs';
|
|
22
|
-
import create_metrics from './metrics/create-metrics';
|
|
23
|
-
import update_metrics from './metrics/update-metrics';
|
|
24
|
-
import list_metrics from './metrics/list-metrics';
|
|
25
|
-
import delete_metrics from './metrics/delete-metrics';
|
|
26
|
-
import get_metrics from './metrics/get-metrics';
|
|
27
|
-
import create_records from './records/create-records';
|
|
28
|
-
import list_records from './records/list-records';
|
|
29
|
-
import delete_records from './records/delete-records';
|
|
30
|
-
import upsert_scores from './scores/upsert-scores';
|
|
31
|
-
import update_systems from './systems/update-systems';
|
|
32
|
-
import list_systems from './systems/list-systems';
|
|
33
|
-
import delete_systems from './systems/delete-systems';
|
|
34
|
-
import get_systems from './systems/get-systems';
|
|
35
|
-
import upsert_systems from './systems/upsert-systems';
|
|
36
|
-
import get_systems_versions from './systems/versions/get-systems-versions';
|
|
37
|
-
import upsert_systems_versions from './systems/versions/upsert-systems-versions';
|
|
38
|
-
|
|
39
|
-
export const endpoints: Endpoint[] = [];
|
|
40
|
-
|
|
41
|
-
function addEndpoint(endpoint: Endpoint) {
|
|
42
|
-
endpoints.push(endpoint);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
addEndpoint(create_projects);
|
|
46
|
-
addEndpoint(list_projects);
|
|
47
|
-
addEndpoint(create_testsets);
|
|
48
|
-
addEndpoint(update_testsets);
|
|
49
|
-
addEndpoint(list_testsets);
|
|
50
|
-
addEndpoint(delete_testsets);
|
|
51
|
-
addEndpoint(get_testsets);
|
|
52
|
-
addEndpoint(create_testcases);
|
|
53
|
-
addEndpoint(update_testcases);
|
|
54
|
-
addEndpoint(list_testcases);
|
|
55
|
-
addEndpoint(delete_testcases);
|
|
56
|
-
addEndpoint(get_testcases);
|
|
57
|
-
addEndpoint(create_runs);
|
|
58
|
-
addEndpoint(list_runs);
|
|
59
|
-
addEndpoint(get_runs);
|
|
60
|
-
addEndpoint(create_metrics);
|
|
61
|
-
addEndpoint(update_metrics);
|
|
62
|
-
addEndpoint(list_metrics);
|
|
63
|
-
addEndpoint(delete_metrics);
|
|
64
|
-
addEndpoint(get_metrics);
|
|
65
|
-
addEndpoint(create_records);
|
|
66
|
-
addEndpoint(list_records);
|
|
67
|
-
addEndpoint(delete_records);
|
|
68
|
-
addEndpoint(upsert_scores);
|
|
69
|
-
addEndpoint(update_systems);
|
|
70
|
-
addEndpoint(list_systems);
|
|
71
|
-
addEndpoint(delete_systems);
|
|
72
|
-
addEndpoint(get_systems);
|
|
73
|
-
addEndpoint(upsert_systems);
|
|
74
|
-
addEndpoint(get_systems_versions);
|
|
75
|
-
addEndpoint(upsert_systems_versions);
|
|
76
|
-
|
|
77
|
-
export type Filter = {
|
|
78
|
-
type: 'resource' | 'operation' | 'tag' | 'tool';
|
|
79
|
-
op: 'include' | 'exclude';
|
|
80
|
-
value: string;
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
export function query(filters: Filter[], endpoints: Endpoint[]): Endpoint[] {
|
|
84
|
-
const allExcludes = filters.length > 0 && filters.every((filter) => filter.op === 'exclude');
|
|
85
|
-
const unmatchedFilters = new Set(filters);
|
|
86
|
-
|
|
87
|
-
const filtered = endpoints.filter((endpoint: Endpoint) => {
|
|
88
|
-
let included = false || allExcludes;
|
|
89
|
-
|
|
90
|
-
for (const filter of filters) {
|
|
91
|
-
if (match(filter, endpoint)) {
|
|
92
|
-
unmatchedFilters.delete(filter);
|
|
93
|
-
included = filter.op === 'include';
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
return included;
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
// Check if any filters didn't match
|
|
101
|
-
const unmatched = Array.from(unmatchedFilters).filter((f) => f.type === 'tool' || f.type === 'resource');
|
|
102
|
-
if (unmatched.length > 0) {
|
|
103
|
-
throw new Error(
|
|
104
|
-
`The following filters did not match any endpoints: ${unmatched
|
|
105
|
-
.map((f) => `${f.type}=${f.value}`)
|
|
106
|
-
.join(', ')}`,
|
|
107
|
-
);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
return filtered;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function match({ type, value }: Filter, endpoint: Endpoint): boolean {
|
|
114
|
-
switch (type) {
|
|
115
|
-
case 'resource': {
|
|
116
|
-
const regexStr = '^' + normalizeResource(value).replace(/\*/g, '.*') + '$';
|
|
117
|
-
const regex = new RegExp(regexStr);
|
|
118
|
-
return regex.test(normalizeResource(endpoint.metadata.resource));
|
|
119
|
-
}
|
|
120
|
-
case 'operation':
|
|
121
|
-
return endpoint.metadata.operation === value;
|
|
122
|
-
case 'tag':
|
|
123
|
-
return endpoint.metadata.tags.includes(value);
|
|
124
|
-
case 'tool':
|
|
125
|
-
return endpoint.tool.name === value;
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function normalizeResource(resource: string): string {
|
|
130
|
-
return resource.toLowerCase().replace(/[^a-z.*\-_]*/g, '');
|
|
131
|
-
}
|
|
@@ -1,383 +0,0 @@
|
|
|
1
|
-
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
-
|
|
3
|
-
import { Metadata, asErrorResult, asTextContentResult } from 'scorecard-ai-mcp/tools/types';
|
|
4
|
-
|
|
5
|
-
import { Tool } from '@modelcontextprotocol/sdk/types.js';
|
|
6
|
-
import Scorecard from 'scorecard-ai';
|
|
7
|
-
|
|
8
|
-
export const metadata: Metadata = {
|
|
9
|
-
resource: 'metrics',
|
|
10
|
-
operation: 'write',
|
|
11
|
-
tags: [],
|
|
12
|
-
httpMethod: 'post',
|
|
13
|
-
httpPath: '/projects/{projectId}/metrics',
|
|
14
|
-
operationId: 'createMetric',
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
export const tool: Tool = {
|
|
18
|
-
name: 'create_metrics',
|
|
19
|
-
description:
|
|
20
|
-
'Create a new Metric for evaluating system outputs. The structure of a metric depends on the evalType and outputType of the metric.',
|
|
21
|
-
inputSchema: {
|
|
22
|
-
type: 'object',
|
|
23
|
-
anyOf: [
|
|
24
|
-
{
|
|
25
|
-
type: 'object',
|
|
26
|
-
properties: {
|
|
27
|
-
projectId: {
|
|
28
|
-
type: 'string',
|
|
29
|
-
},
|
|
30
|
-
evalType: {
|
|
31
|
-
type: 'string',
|
|
32
|
-
description: 'AI-based evaluation type.',
|
|
33
|
-
enum: ['ai'],
|
|
34
|
-
},
|
|
35
|
-
name: {
|
|
36
|
-
type: 'string',
|
|
37
|
-
description: 'The name of the Metric.',
|
|
38
|
-
},
|
|
39
|
-
outputType: {
|
|
40
|
-
type: 'string',
|
|
41
|
-
description: 'Integer output type.',
|
|
42
|
-
enum: ['int'],
|
|
43
|
-
},
|
|
44
|
-
promptTemplate: {
|
|
45
|
-
type: 'string',
|
|
46
|
-
description:
|
|
47
|
-
'The complete prompt template for AI evaluation. Should include placeholders for dynamic content.',
|
|
48
|
-
},
|
|
49
|
-
description: {
|
|
50
|
-
type: 'string',
|
|
51
|
-
description: 'The description of the Metric.',
|
|
52
|
-
},
|
|
53
|
-
evalModelName: {
|
|
54
|
-
type: 'string',
|
|
55
|
-
description: 'The AI model to use for evaluation.',
|
|
56
|
-
},
|
|
57
|
-
guidelines: {
|
|
58
|
-
type: 'string',
|
|
59
|
-
description: 'Guidelines for AI evaluation on how to score the metric.',
|
|
60
|
-
},
|
|
61
|
-
passingThreshold: {
|
|
62
|
-
type: 'integer',
|
|
63
|
-
description: 'The threshold for determining pass/fail from integer scores (1-5).',
|
|
64
|
-
},
|
|
65
|
-
temperature: {
|
|
66
|
-
type: 'number',
|
|
67
|
-
description: 'The temperature for AI evaluation (0-2).',
|
|
68
|
-
},
|
|
69
|
-
},
|
|
70
|
-
required: ['projectId', 'evalType', 'name', 'outputType', 'promptTemplate'],
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
type: 'object',
|
|
74
|
-
properties: {
|
|
75
|
-
projectId: {
|
|
76
|
-
type: 'string',
|
|
77
|
-
},
|
|
78
|
-
evalType: {
|
|
79
|
-
type: 'string',
|
|
80
|
-
description: 'Human-based evaluation type.',
|
|
81
|
-
enum: ['human'],
|
|
82
|
-
},
|
|
83
|
-
name: {
|
|
84
|
-
type: 'string',
|
|
85
|
-
description: 'The name of the Metric.',
|
|
86
|
-
},
|
|
87
|
-
outputType: {
|
|
88
|
-
type: 'string',
|
|
89
|
-
description: 'Integer output type.',
|
|
90
|
-
enum: ['int'],
|
|
91
|
-
},
|
|
92
|
-
description: {
|
|
93
|
-
type: 'string',
|
|
94
|
-
description: 'The description of the Metric.',
|
|
95
|
-
},
|
|
96
|
-
guidelines: {
|
|
97
|
-
type: 'string',
|
|
98
|
-
description: 'Guidelines for human evaluators.',
|
|
99
|
-
},
|
|
100
|
-
passingThreshold: {
|
|
101
|
-
type: 'integer',
|
|
102
|
-
description: 'The threshold for determining pass/fail from integer scores (1-5).',
|
|
103
|
-
},
|
|
104
|
-
},
|
|
105
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
106
|
-
},
|
|
107
|
-
{
|
|
108
|
-
type: 'object',
|
|
109
|
-
properties: {
|
|
110
|
-
projectId: {
|
|
111
|
-
type: 'string',
|
|
112
|
-
},
|
|
113
|
-
evalType: {
|
|
114
|
-
type: 'string',
|
|
115
|
-
description: 'Heuristic-based evaluation type.',
|
|
116
|
-
enum: ['heuristic'],
|
|
117
|
-
},
|
|
118
|
-
name: {
|
|
119
|
-
type: 'string',
|
|
120
|
-
description: 'The name of the Metric.',
|
|
121
|
-
},
|
|
122
|
-
outputType: {
|
|
123
|
-
type: 'string',
|
|
124
|
-
description: 'Integer output type.',
|
|
125
|
-
enum: ['int'],
|
|
126
|
-
},
|
|
127
|
-
description: {
|
|
128
|
-
type: 'string',
|
|
129
|
-
description: 'The description of the Metric.',
|
|
130
|
-
},
|
|
131
|
-
guidelines: {
|
|
132
|
-
type: 'string',
|
|
133
|
-
description: 'Guidelines for heuristic evaluation logic.',
|
|
134
|
-
},
|
|
135
|
-
passingThreshold: {
|
|
136
|
-
type: 'integer',
|
|
137
|
-
description: 'The threshold for determining pass/fail from integer scores (1-5).',
|
|
138
|
-
},
|
|
139
|
-
},
|
|
140
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
141
|
-
},
|
|
142
|
-
{
|
|
143
|
-
type: 'object',
|
|
144
|
-
properties: {
|
|
145
|
-
projectId: {
|
|
146
|
-
type: 'string',
|
|
147
|
-
},
|
|
148
|
-
evalType: {
|
|
149
|
-
type: 'string',
|
|
150
|
-
description: 'AI-based evaluation type.',
|
|
151
|
-
enum: ['ai'],
|
|
152
|
-
},
|
|
153
|
-
name: {
|
|
154
|
-
type: 'string',
|
|
155
|
-
description: 'The name of the Metric.',
|
|
156
|
-
},
|
|
157
|
-
outputType: {
|
|
158
|
-
type: 'string',
|
|
159
|
-
description: 'Float output type (0-1).',
|
|
160
|
-
enum: ['float'],
|
|
161
|
-
},
|
|
162
|
-
promptTemplate: {
|
|
163
|
-
type: 'string',
|
|
164
|
-
description:
|
|
165
|
-
'The complete prompt template for AI evaluation. Should include placeholders for dynamic content.',
|
|
166
|
-
},
|
|
167
|
-
description: {
|
|
168
|
-
type: 'string',
|
|
169
|
-
description: 'The description of the Metric.',
|
|
170
|
-
},
|
|
171
|
-
evalModelName: {
|
|
172
|
-
type: 'string',
|
|
173
|
-
description: 'The AI model to use for evaluation.',
|
|
174
|
-
},
|
|
175
|
-
guidelines: {
|
|
176
|
-
type: 'string',
|
|
177
|
-
description: 'Guidelines for AI evaluation on how to score the metric.',
|
|
178
|
-
},
|
|
179
|
-
passingThreshold: {
|
|
180
|
-
type: 'number',
|
|
181
|
-
description: 'Threshold for determining pass/fail from float scores (0.0-1.0).',
|
|
182
|
-
},
|
|
183
|
-
temperature: {
|
|
184
|
-
type: 'number',
|
|
185
|
-
description: 'The temperature for AI evaluation (0-2).',
|
|
186
|
-
},
|
|
187
|
-
},
|
|
188
|
-
required: ['projectId', 'evalType', 'name', 'outputType', 'promptTemplate'],
|
|
189
|
-
},
|
|
190
|
-
{
|
|
191
|
-
type: 'object',
|
|
192
|
-
properties: {
|
|
193
|
-
projectId: {
|
|
194
|
-
type: 'string',
|
|
195
|
-
},
|
|
196
|
-
evalType: {
|
|
197
|
-
type: 'string',
|
|
198
|
-
description: 'Human-based evaluation type.',
|
|
199
|
-
enum: ['human'],
|
|
200
|
-
},
|
|
201
|
-
name: {
|
|
202
|
-
type: 'string',
|
|
203
|
-
description: 'The name of the Metric.',
|
|
204
|
-
},
|
|
205
|
-
outputType: {
|
|
206
|
-
type: 'string',
|
|
207
|
-
description: 'Float output type (0-1).',
|
|
208
|
-
enum: ['float'],
|
|
209
|
-
},
|
|
210
|
-
description: {
|
|
211
|
-
type: 'string',
|
|
212
|
-
description: 'The description of the Metric.',
|
|
213
|
-
},
|
|
214
|
-
guidelines: {
|
|
215
|
-
type: 'string',
|
|
216
|
-
description: 'Guidelines for human evaluators.',
|
|
217
|
-
},
|
|
218
|
-
passingThreshold: {
|
|
219
|
-
type: 'number',
|
|
220
|
-
description: 'Threshold for determining pass/fail from float scores (0.0-1.0).',
|
|
221
|
-
},
|
|
222
|
-
},
|
|
223
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
224
|
-
},
|
|
225
|
-
{
|
|
226
|
-
type: 'object',
|
|
227
|
-
properties: {
|
|
228
|
-
projectId: {
|
|
229
|
-
type: 'string',
|
|
230
|
-
},
|
|
231
|
-
evalType: {
|
|
232
|
-
type: 'string',
|
|
233
|
-
description: 'Heuristic-based evaluation type.',
|
|
234
|
-
enum: ['heuristic'],
|
|
235
|
-
},
|
|
236
|
-
name: {
|
|
237
|
-
type: 'string',
|
|
238
|
-
description: 'The name of the Metric.',
|
|
239
|
-
},
|
|
240
|
-
outputType: {
|
|
241
|
-
type: 'string',
|
|
242
|
-
description: 'Float output type (0-1).',
|
|
243
|
-
enum: ['float'],
|
|
244
|
-
},
|
|
245
|
-
description: {
|
|
246
|
-
type: 'string',
|
|
247
|
-
description: 'The description of the Metric.',
|
|
248
|
-
},
|
|
249
|
-
guidelines: {
|
|
250
|
-
type: 'string',
|
|
251
|
-
description: 'Guidelines for heuristic evaluation logic.',
|
|
252
|
-
},
|
|
253
|
-
passingThreshold: {
|
|
254
|
-
type: 'number',
|
|
255
|
-
description: 'Threshold for determining pass/fail from float scores (0.0-1.0).',
|
|
256
|
-
},
|
|
257
|
-
},
|
|
258
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
259
|
-
},
|
|
260
|
-
{
|
|
261
|
-
type: 'object',
|
|
262
|
-
properties: {
|
|
263
|
-
projectId: {
|
|
264
|
-
type: 'string',
|
|
265
|
-
},
|
|
266
|
-
evalType: {
|
|
267
|
-
type: 'string',
|
|
268
|
-
description: 'AI-based evaluation type.',
|
|
269
|
-
enum: ['ai'],
|
|
270
|
-
},
|
|
271
|
-
name: {
|
|
272
|
-
type: 'string',
|
|
273
|
-
description: 'The name of the Metric.',
|
|
274
|
-
},
|
|
275
|
-
outputType: {
|
|
276
|
-
type: 'string',
|
|
277
|
-
description: 'Boolean output type.',
|
|
278
|
-
enum: ['boolean'],
|
|
279
|
-
},
|
|
280
|
-
promptTemplate: {
|
|
281
|
-
type: 'string',
|
|
282
|
-
description:
|
|
283
|
-
'The complete prompt template for AI evaluation. Should include placeholders for dynamic content.',
|
|
284
|
-
},
|
|
285
|
-
description: {
|
|
286
|
-
type: 'string',
|
|
287
|
-
description: 'The description of the Metric.',
|
|
288
|
-
},
|
|
289
|
-
evalModelName: {
|
|
290
|
-
type: 'string',
|
|
291
|
-
description: 'The AI model to use for evaluation.',
|
|
292
|
-
},
|
|
293
|
-
guidelines: {
|
|
294
|
-
type: 'string',
|
|
295
|
-
description: 'Guidelines for AI evaluation on how to score the metric.',
|
|
296
|
-
},
|
|
297
|
-
temperature: {
|
|
298
|
-
type: 'number',
|
|
299
|
-
description: 'The temperature for AI evaluation (0-2).',
|
|
300
|
-
},
|
|
301
|
-
},
|
|
302
|
-
required: ['projectId', 'evalType', 'name', 'outputType', 'promptTemplate'],
|
|
303
|
-
},
|
|
304
|
-
{
|
|
305
|
-
type: 'object',
|
|
306
|
-
properties: {
|
|
307
|
-
projectId: {
|
|
308
|
-
type: 'string',
|
|
309
|
-
},
|
|
310
|
-
evalType: {
|
|
311
|
-
type: 'string',
|
|
312
|
-
description: 'Human-based evaluation type.',
|
|
313
|
-
enum: ['human'],
|
|
314
|
-
},
|
|
315
|
-
name: {
|
|
316
|
-
type: 'string',
|
|
317
|
-
description: 'The name of the Metric.',
|
|
318
|
-
},
|
|
319
|
-
outputType: {
|
|
320
|
-
type: 'string',
|
|
321
|
-
description: 'Boolean output type.',
|
|
322
|
-
enum: ['boolean'],
|
|
323
|
-
},
|
|
324
|
-
description: {
|
|
325
|
-
type: 'string',
|
|
326
|
-
description: 'The description of the Metric.',
|
|
327
|
-
},
|
|
328
|
-
guidelines: {
|
|
329
|
-
type: 'string',
|
|
330
|
-
description: 'Guidelines for human evaluators.',
|
|
331
|
-
},
|
|
332
|
-
},
|
|
333
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
334
|
-
},
|
|
335
|
-
{
|
|
336
|
-
type: 'object',
|
|
337
|
-
properties: {
|
|
338
|
-
projectId: {
|
|
339
|
-
type: 'string',
|
|
340
|
-
},
|
|
341
|
-
evalType: {
|
|
342
|
-
type: 'string',
|
|
343
|
-
description: 'Heuristic-based evaluation type.',
|
|
344
|
-
enum: ['heuristic'],
|
|
345
|
-
},
|
|
346
|
-
name: {
|
|
347
|
-
type: 'string',
|
|
348
|
-
description: 'The name of the Metric.',
|
|
349
|
-
},
|
|
350
|
-
outputType: {
|
|
351
|
-
type: 'string',
|
|
352
|
-
description: 'Boolean output type.',
|
|
353
|
-
enum: ['boolean'],
|
|
354
|
-
},
|
|
355
|
-
description: {
|
|
356
|
-
type: 'string',
|
|
357
|
-
description: 'The description of the Metric.',
|
|
358
|
-
},
|
|
359
|
-
guidelines: {
|
|
360
|
-
type: 'string',
|
|
361
|
-
description: 'Guidelines for heuristic evaluation logic.',
|
|
362
|
-
},
|
|
363
|
-
},
|
|
364
|
-
required: ['projectId', 'evalType', 'name', 'outputType'],
|
|
365
|
-
},
|
|
366
|
-
],
|
|
367
|
-
},
|
|
368
|
-
annotations: {},
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
export const handler = async (client: Scorecard, args: Record<string, unknown> | undefined) => {
|
|
372
|
-
const { projectId, ...body } = args as any;
|
|
373
|
-
try {
|
|
374
|
-
return asTextContentResult(await client.metrics.create(projectId, body));
|
|
375
|
-
} catch (error) {
|
|
376
|
-
if (error instanceof Scorecard.APIError) {
|
|
377
|
-
return asErrorResult(error.message);
|
|
378
|
-
}
|
|
379
|
-
throw error;
|
|
380
|
-
}
|
|
381
|
-
};
|
|
382
|
-
|
|
383
|
-
export default { metadata, tool, handler };
|