waypoi 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/instructions/ui.instructions.md +42 -0
- package/.github/workflows/ci.yml +35 -0
- package/.github/workflows/publish.yml +71 -0
- package/.github/workflows/release.yml +48 -0
- package/.playwright-mcp/console-2026-04-04T01-41-10-746Z.log +2 -0
- package/.playwright-mcp/console-2026-04-04T01-41-28-799Z.log +3 -0
- package/.playwright-mcp/console-2026-04-05T02-26-51-909Z.log +76 -0
- package/.playwright-mcp/page-2026-04-04T01-41-10-816Z.yml +1 -0
- package/.playwright-mcp/page-2026-04-04T01-41-29-141Z.yml +77 -0
- package/.playwright-mcp/page-2026-04-04T01-41-42-633Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T01-42-03-929Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-12-54-813Z.yml +6 -0
- package/.playwright-mcp/page-2026-04-04T02-14-58-600Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-03-923Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-07-426Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-25-729Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-16-22-984Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-17-00-599Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-17-50-874Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-05T02-26-55-570Z.yml +6 -0
- package/AGENTS.md +48 -0
- package/CHANGELOG.md +131 -0
- package/README.md +552 -0
- package/assets/agent-mode.png +0 -0
- package/assets/categorize.png +0 -0
- package/assets/dashboard.png +0 -0
- package/assets/endpoint-proxy.png +0 -0
- package/assets/icon.png +0 -0
- package/assets/mcp-generate-image.png +0 -0
- package/assets/mcp-understand-image.png +0 -0
- package/assets/peek-token-flow.png +0 -0
- package/assets/playground.png +0 -0
- package/assets/sankey.png +0 -0
- package/cli/index.ts +2805 -0
- package/cli/legacyRewrite.ts +108 -0
- package/cli/modelRef.ts +24 -0
- package/dist/cli/index.js +2536 -0
- package/dist/cli/legacyRewrite.js +92 -0
- package/dist/cli/modelRef.js +20 -0
- package/dist/src/benchmark/artifacts.js +131 -0
- package/dist/src/benchmark/capabilityClassifier.js +81 -0
- package/dist/src/benchmark/capabilityStore.js +144 -0
- package/dist/src/benchmark/config.js +238 -0
- package/dist/src/benchmark/gates.js +118 -0
- package/dist/src/benchmark/jobs.js +252 -0
- package/dist/src/benchmark/runner.js +1847 -0
- package/dist/src/benchmark/schema.js +353 -0
- package/dist/src/benchmark/suites.js +314 -0
- package/dist/src/benchmark/tinyQaDataset.js +422 -0
- package/dist/src/benchmark/types.js +25 -0
- package/dist/src/config.js +47 -0
- package/dist/src/index.js +178 -0
- package/dist/src/mcp/client.js +215 -0
- package/dist/src/mcp/discovery.js +226 -0
- package/dist/src/mcp/policy.js +65 -0
- package/dist/src/mcp/registry.js +129 -0
- package/dist/src/mcp/service.js +460 -0
- package/dist/src/middleware/auth.js +179 -0
- package/dist/src/middleware/requestCapture.js +192 -0
- package/dist/src/middleware/requestStats.js +118 -0
- package/dist/src/pools/builder.js +132 -0
- package/dist/src/pools/repository.js +69 -0
- package/dist/src/pools/scheduler.js +360 -0
- package/dist/src/pools/types.js +2 -0
- package/dist/src/protocols/adapters/dashscope.js +267 -0
- package/dist/src/protocols/adapters/inferenceV2.js +346 -0
- package/dist/src/protocols/adapters/openai.js +27 -0
- package/dist/src/protocols/registry.js +99 -0
- package/dist/src/protocols/types.js +2 -0
- package/dist/src/providers/health.js +153 -0
- package/dist/src/providers/importer.js +289 -0
- package/dist/src/providers/modelRegistry.js +313 -0
- package/dist/src/providers/repository.js +361 -0
- package/dist/src/providers/types.js +2 -0
- package/dist/src/routes/admin.js +531 -0
- package/dist/src/routes/audio.js +295 -0
- package/dist/src/routes/chat.js +240 -0
- package/dist/src/routes/embeddings.js +157 -0
- package/dist/src/routes/images.js +288 -0
- package/dist/src/routes/mcp.js +256 -0
- package/dist/src/routes/mcpService.js +100 -0
- package/dist/src/routes/models.js +48 -0
- package/dist/src/routes/responses.js +711 -0
- package/dist/src/routes/sessions.js +450 -0
- package/dist/src/routes/stats.js +270 -0
- package/dist/src/routes/ui.js +97 -0
- package/dist/src/routes/videos.js +107 -0
- package/dist/src/routing/router.js +338 -0
- package/dist/src/services/imageGeneration.js +280 -0
- package/dist/src/services/imageUnderstanding.js +352 -0
- package/dist/src/services/videoGeneration.js +79 -0
- package/dist/src/storage/captureRepository.js +1591 -0
- package/dist/src/storage/files.js +157 -0
- package/dist/src/storage/imageCache.js +346 -0
- package/dist/src/storage/repositories.js +388 -0
- package/dist/src/storage/sessionRepository.js +370 -0
- package/dist/src/storage/statsRepository.js +204 -0
- package/dist/src/transport/httpClient.js +126 -0
- package/dist/src/types.js +2 -0
- package/dist/src/utils/messageMedia.js +285 -0
- package/dist/src/utils/modelCapabilities.js +108 -0
- package/dist/src/utils/modelDiscovery.js +170 -0
- package/dist/src/version.js +5 -0
- package/dist/src/workers/captureRetention.js +25 -0
- package/dist/src/workers/configWatcher.js +91 -0
- package/dist/src/workers/healthChecker.js +21 -0
- package/dist/src/workers/statsRotation.js +41 -0
- package/docs/LLM/output_schema.md +312 -0
- package/docs/benchmark.md +208 -0
- package/docs/mcp-guidelines.md +125 -0
- package/docs/mcp-service.md +178 -0
- package/docs/opencode.md +86 -0
- package/docs/providers.md +79 -0
- package/examples/benchmark.config.yaml +28 -0
- package/examples/providers/alibaba-dashscope.yaml +88 -0
- package/examples/providers/alibaba-llm.yaml +64 -0
- package/examples/providers/alibaba-registry.yaml +7 -0
- package/examples/providers/inference-v2-ray.yaml +29 -0
- package/examples/scenarios/assets/omni-call-sample.wav +0 -0
- package/examples/scenarios/custom.jsonl +5 -0
- package/examples/scenarios/custom.yaml +40 -0
- package/model-form-v2.png +0 -0
- package/package.json +66 -0
- package/provider-form-v2.png +0 -0
- package/provider-form.png +0 -0
- package/scripts/manual-test.sh +11 -0
- package/scripts/version-from-git.js +23 -0
- package/src/benchmark/artifacts.ts +149 -0
- package/src/benchmark/capabilityClassifier.ts +99 -0
- package/src/benchmark/capabilityStore.ts +174 -0
- package/src/benchmark/config.ts +337 -0
- package/src/benchmark/gates.ts +164 -0
- package/src/benchmark/jobs.ts +312 -0
- package/src/benchmark/runner.ts +2519 -0
- package/src/benchmark/schema.ts +443 -0
- package/src/benchmark/suites.ts +323 -0
- package/src/benchmark/tinyQaDataset.ts +428 -0
- package/src/benchmark/types.ts +442 -0
- package/src/config.ts +44 -0
- package/src/index.ts +195 -0
- package/src/mcp/client.ts +305 -0
- package/src/mcp/discovery.ts +266 -0
- package/src/mcp/policy.ts +105 -0
- package/src/mcp/registry.ts +164 -0
- package/src/mcp/service.ts +611 -0
- package/src/middleware/auth.ts +251 -0
- package/src/middleware/requestCapture.ts +245 -0
- package/src/middleware/requestStats.ts +163 -0
- package/src/pools/builder.ts +159 -0
- package/src/pools/repository.ts +71 -0
- package/src/pools/scheduler.ts +425 -0
- package/src/pools/types.ts +117 -0
- package/src/protocols/adapters/dashscope.ts +335 -0
- package/src/protocols/adapters/inferenceV2.ts +428 -0
- package/src/protocols/adapters/openai.ts +32 -0
- package/src/protocols/registry.ts +117 -0
- package/src/protocols/types.ts +81 -0
- package/src/providers/health.ts +207 -0
- package/src/providers/importer.ts +402 -0
- package/src/providers/modelRegistry.ts +415 -0
- package/src/providers/repository.ts +439 -0
- package/src/providers/types.ts +113 -0
- package/src/routes/admin.ts +666 -0
- package/src/routes/audio.ts +372 -0
- package/src/routes/chat.ts +301 -0
- package/src/routes/embeddings.ts +197 -0
- package/src/routes/images.ts +356 -0
- package/src/routes/mcp.ts +320 -0
- package/src/routes/mcpService.ts +114 -0
- package/src/routes/models.ts +50 -0
- package/src/routes/responses.ts +872 -0
- package/src/routes/sessions.ts +558 -0
- package/src/routes/stats.ts +312 -0
- package/src/routes/ui.ts +96 -0
- package/src/routes/videos.ts +132 -0
- package/src/routing/router.ts +501 -0
- package/src/services/imageGeneration.ts +396 -0
- package/src/services/imageUnderstanding.ts +449 -0
- package/src/services/videoGeneration.ts +127 -0
- package/src/storage/captureRepository.ts +1835 -0
- package/src/storage/files.ts +178 -0
- package/src/storage/imageCache.ts +405 -0
- package/src/storage/repositories.ts +494 -0
- package/src/storage/sessionRepository.ts +419 -0
- package/src/storage/statsRepository.ts +238 -0
- package/src/transport/httpClient.ts +145 -0
- package/src/types.ts +322 -0
- package/src/utils/messageMedia.ts +293 -0
- package/src/utils/modelCapabilities.ts +161 -0
- package/src/utils/modelDiscovery.ts +203 -0
- package/src/workers/captureRetention.ts +25 -0
- package/src/workers/configWatcher.ts +115 -0
- package/src/workers/healthChecker.ts +22 -0
- package/src/workers/statsRotation.ts +49 -0
- package/tests/benchmarkAdminRoutes.test.ts +82 -0
- package/tests/benchmarkBasics.test.ts +116 -0
- package/tests/captureAdminRoutes.test.ts +420 -0
- package/tests/captureRepository.test.ts +797 -0
- package/tests/cliLegacyRewrite.test.ts +45 -0
- package/tests/imageGeneration.service.test.ts +107 -0
- package/tests/imageUnderstanding.service.test.ts +123 -0
- package/tests/mcpPolicy.test.ts +105 -0
- package/tests/mcpService.test.ts +1245 -0
- package/tests/modelRef.test.ts +23 -0
- package/tests/modelsRoutes.test.ts +154 -0
- package/tests/sessionMediaCache.test.ts +167 -0
- package/tests/statsRoutes.test.ts +323 -0
- package/tsconfig.json +15 -0
- package/ui/index.html +16 -0
- package/ui/package-lock.json +8521 -0
- package/ui/package.json +52 -0
- package/ui/postcss.config.js +6 -0
- package/ui/public/assets/apple-touch-icon.png +0 -0
- package/ui/public/assets/favicon-16.png +0 -0
- package/ui/public/assets/favicon-32.png +0 -0
- package/ui/public/assets/icon-192.png +0 -0
- package/ui/public/assets/icon-512.png +0 -0
- package/ui/src/App.tsx +27 -0
- package/ui/src/api/client.ts +1503 -0
- package/ui/src/components/EndpointUsageGuide.tsx +361 -0
- package/ui/src/components/Layout.tsx +124 -0
- package/ui/src/components/MessageContent.tsx +365 -0
- package/ui/src/components/ToolCallMessage.tsx +179 -0
- package/ui/src/components/ToolPicker.tsx +442 -0
- package/ui/src/components/messageContentParser.test.ts +41 -0
- package/ui/src/components/messageContentParser.ts +73 -0
- package/ui/src/components/thinkingPreview.test.ts +27 -0
- package/ui/src/components/thinkingPreview.ts +15 -0
- package/ui/src/components/toMermaidSankey.test.ts +78 -0
- package/ui/src/components/toMermaidSankey.ts +56 -0
- package/ui/src/components/ui/button.tsx +58 -0
- package/ui/src/components/ui/input.tsx +21 -0
- package/ui/src/components/ui/textarea.tsx +21 -0
- package/ui/src/lib/utils.ts +6 -0
- package/ui/src/main.tsx +9 -0
- package/ui/src/pages/AgentPlayground.tsx +2010 -0
- package/ui/src/pages/Benchmark.tsx +988 -0
- package/ui/src/pages/Dashboard.tsx +581 -0
- package/ui/src/pages/Peek.tsx +962 -0
- package/ui/src/pages/Settings.tsx +2013 -0
- package/ui/src/pages/agentPlaygroundPayload.test.ts +109 -0
- package/ui/src/pages/agentPlaygroundPayload.ts +97 -0
- package/ui/src/pages/agentThinkingContent.test.ts +50 -0
- package/ui/src/pages/agentThinkingContent.ts +57 -0
- package/ui/src/pages/dashboardTokenUsage.test.ts +66 -0
- package/ui/src/pages/dashboardTokenUsage.ts +36 -0
- package/ui/src/pages/imageUpload.test.ts +39 -0
- package/ui/src/pages/imageUpload.ts +71 -0
- package/ui/src/pages/peekFilters.test.ts +29 -0
- package/ui/src/pages/peekFilters.ts +13 -0
- package/ui/src/pages/peekMedia.test.ts +58 -0
- package/ui/src/pages/peekMedia.ts +148 -0
- package/ui/src/pages/sessionAutoTitle.test.ts +128 -0
- package/ui/src/pages/sessionAutoTitle.ts +106 -0
- package/ui/src/stores/settings.ts +58 -0
- package/ui/src/styles/globals.css +223 -0
- package/ui/src/vite-env.d.ts +8 -0
- package/ui/tailwind.config.js +106 -0
- package/ui/tsconfig.json +32 -0
- package/ui/vite.config.ts +37 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateScenarioCollection = validateScenarioCollection;
|
|
4
|
+
const types_1 = require("./types");
|
|
5
|
+
const SCENARIO_KEYS = new Set([
|
|
6
|
+
"id",
|
|
7
|
+
"mode",
|
|
8
|
+
"title",
|
|
9
|
+
"summary",
|
|
10
|
+
"userVisibleGoal",
|
|
11
|
+
"exampleSource",
|
|
12
|
+
"inputPreview",
|
|
13
|
+
"successCriteria",
|
|
14
|
+
"expectedHighlights",
|
|
15
|
+
"capability",
|
|
16
|
+
"model",
|
|
17
|
+
"timeoutMs",
|
|
18
|
+
"requiresAvailableTools",
|
|
19
|
+
"assertions",
|
|
20
|
+
"prompt",
|
|
21
|
+
"tools",
|
|
22
|
+
"maxIterations",
|
|
23
|
+
"temperature",
|
|
24
|
+
"top_p",
|
|
25
|
+
"max_tokens",
|
|
26
|
+
"presence_penalty",
|
|
27
|
+
"frequency_penalty",
|
|
28
|
+
"seed",
|
|
29
|
+
"stop",
|
|
30
|
+
"input",
|
|
31
|
+
"n",
|
|
32
|
+
"size",
|
|
33
|
+
"audioFile",
|
|
34
|
+
"inputText",
|
|
35
|
+
"voice",
|
|
36
|
+
"response_format",
|
|
37
|
+
]);
|
|
38
|
+
const ASSERTION_KEYS = new Set([
|
|
39
|
+
"contains",
|
|
40
|
+
"notContains",
|
|
41
|
+
"requiredToolNames",
|
|
42
|
+
"minToolCalls",
|
|
43
|
+
"maxToolCalls",
|
|
44
|
+
"maxLatencyMs",
|
|
45
|
+
"statusCode",
|
|
46
|
+
"minItems",
|
|
47
|
+
"minVectorLength",
|
|
48
|
+
"minImages",
|
|
49
|
+
"containsText",
|
|
50
|
+
"notContainsText",
|
|
51
|
+
"minBytes",
|
|
52
|
+
"contentType",
|
|
53
|
+
]);
|
|
54
|
+
function validateScenarioCollection(rawScenarios, sourceLabel) {
|
|
55
|
+
const warnings = [];
|
|
56
|
+
const scenarios = rawScenarios.map((raw, index) => validateScenario(raw, sourceLabel, index, warnings));
|
|
57
|
+
const ids = new Set();
|
|
58
|
+
for (const scenario of scenarios) {
|
|
59
|
+
if (ids.has(scenario.id)) {
|
|
60
|
+
throw new Error(`${sourceLabel} scenario '${scenario.id}' is duplicated.`);
|
|
61
|
+
}
|
|
62
|
+
ids.add(scenario.id);
|
|
63
|
+
}
|
|
64
|
+
return { scenarios, warnings };
|
|
65
|
+
}
|
|
66
|
+
function validateScenario(raw, sourceLabel, index, warnings) {
|
|
67
|
+
const ctx = `${sourceLabel} scenario[${index}]`;
|
|
68
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
69
|
+
throw new Error(`${ctx}: expected object.`);
|
|
70
|
+
}
|
|
71
|
+
const input = raw;
|
|
72
|
+
for (const key of Object.keys(input)) {
|
|
73
|
+
if (!SCENARIO_KEYS.has(key)) {
|
|
74
|
+
warnings.push(`${ctx}: unknown field '${key}' is ignored.`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const id = requiredString(input.id, `${ctx}.id`);
|
|
78
|
+
const mode = validateMode(input.mode, `${ctx}.mode`);
|
|
79
|
+
const capability = optionalCapabilityKey(input.capability, `${ctx}.capability`);
|
|
80
|
+
const model = optionalString(input.model, `${ctx}.model`);
|
|
81
|
+
const timeoutMs = optionalInteger(input.timeoutMs, `${ctx}.timeoutMs`, 1);
|
|
82
|
+
const assertions = validateAssertions(input.assertions, `${ctx}.assertions`, warnings);
|
|
83
|
+
const scenario = {
|
|
84
|
+
id,
|
|
85
|
+
mode,
|
|
86
|
+
capability,
|
|
87
|
+
model,
|
|
88
|
+
timeoutMs,
|
|
89
|
+
assertions,
|
|
90
|
+
};
|
|
91
|
+
scenario.title = optionalString(input.title, `${ctx}.title`);
|
|
92
|
+
scenario.summary = optionalString(input.summary, `${ctx}.summary`);
|
|
93
|
+
scenario.userVisibleGoal = optionalString(input.userVisibleGoal, `${ctx}.userVisibleGoal`);
|
|
94
|
+
scenario.exampleSource = optionalExampleSource(input.exampleSource, `${ctx}.exampleSource`);
|
|
95
|
+
scenario.inputPreview = optionalString(input.inputPreview, `${ctx}.inputPreview`);
|
|
96
|
+
scenario.successCriteria = optionalString(input.successCriteria, `${ctx}.successCriteria`);
|
|
97
|
+
scenario.expectedHighlights = optionalStringArray(input.expectedHighlights, `${ctx}.expectedHighlights`);
|
|
98
|
+
scenario.prompt = optionalString(input.prompt, `${ctx}.prompt`);
|
|
99
|
+
scenario.tools = optionalStringArray(input.tools, `${ctx}.tools`);
|
|
100
|
+
scenario.maxIterations = optionalInteger(input.maxIterations, `${ctx}.maxIterations`, 1, 20);
|
|
101
|
+
scenario.temperature = optionalFiniteNumber(input.temperature, `${ctx}.temperature`);
|
|
102
|
+
scenario.top_p = optionalFiniteNumber(input.top_p, `${ctx}.top_p`, 0, 1);
|
|
103
|
+
scenario.max_tokens = optionalInteger(input.max_tokens, `${ctx}.max_tokens`, 1);
|
|
104
|
+
scenario.presence_penalty = optionalFiniteNumber(input.presence_penalty, `${ctx}.presence_penalty`, -2, 2);
|
|
105
|
+
scenario.frequency_penalty = optionalFiniteNumber(input.frequency_penalty, `${ctx}.frequency_penalty`, -2, 2);
|
|
106
|
+
scenario.seed = optionalInteger(input.seed, `${ctx}.seed`, 0);
|
|
107
|
+
scenario.stop = optionalStopField(input.stop, `${ctx}.stop`);
|
|
108
|
+
scenario.requiresAvailableTools = optionalBoolean(input.requiresAvailableTools, `${ctx}.requiresAvailableTools`);
|
|
109
|
+
scenario.input = optionalInputValue(input.input, `${ctx}.input`);
|
|
110
|
+
scenario.n = optionalInteger(input.n, `${ctx}.n`, 1);
|
|
111
|
+
scenario.size = optionalString(input.size, `${ctx}.size`);
|
|
112
|
+
scenario.audioFile = optionalString(input.audioFile, `${ctx}.audioFile`);
|
|
113
|
+
scenario.inputText = optionalString(input.inputText, `${ctx}.inputText`);
|
|
114
|
+
scenario.voice = optionalString(input.voice, `${ctx}.voice`);
|
|
115
|
+
scenario.response_format = optionalString(input.response_format, `${ctx}.response_format`);
|
|
116
|
+
validateScenarioByMode(scenario, ctx);
|
|
117
|
+
return scenario;
|
|
118
|
+
}
|
|
119
|
+
function validateScenarioByMode(scenario, ctx) {
|
|
120
|
+
if (scenario.mode === "chat" ||
|
|
121
|
+
scenario.mode === "agent" ||
|
|
122
|
+
scenario.mode === "responses" ||
|
|
123
|
+
scenario.mode === "image_generation") {
|
|
124
|
+
if (!scenario.prompt) {
|
|
125
|
+
throw new Error(`${ctx}.prompt: required for mode '${scenario.mode}'.`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (scenario.mode === "embeddings") {
|
|
129
|
+
if (scenario.input === undefined) {
|
|
130
|
+
throw new Error(`${ctx}.input: required for mode 'embeddings'.`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (scenario.mode === "audio_transcription") {
|
|
134
|
+
if (!scenario.audioFile) {
|
|
135
|
+
throw new Error(`${ctx}.audioFile: required for mode 'audio_transcription'.`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (scenario.mode === "omni_call") {
|
|
139
|
+
if (!scenario.audioFile) {
|
|
140
|
+
throw new Error(`${ctx}.audioFile: required for mode 'omni_call'.`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
if (scenario.mode === "audio_speech") {
|
|
144
|
+
if (!scenario.inputText) {
|
|
145
|
+
throw new Error(`${ctx}.inputText: required for mode 'audio_speech'.`);
|
|
146
|
+
}
|
|
147
|
+
if (!scenario.voice) {
|
|
148
|
+
throw new Error(`${ctx}.voice: required for mode 'audio_speech'.`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
function validateAssertions(raw, field, warnings) {
|
|
153
|
+
if (raw === undefined || raw === null) {
|
|
154
|
+
return { statusCode: 200 };
|
|
155
|
+
}
|
|
156
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
157
|
+
throw new Error(`${field}: expected object.`);
|
|
158
|
+
}
|
|
159
|
+
const input = raw;
|
|
160
|
+
for (const key of Object.keys(input)) {
|
|
161
|
+
if (!ASSERTION_KEYS.has(key)) {
|
|
162
|
+
warnings.push(`${field}: unknown field '${key}' is ignored.`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
const contains = optionalStringArray(input.contains, `${field}.contains`);
|
|
166
|
+
const notContains = optionalStringArray(input.notContains, `${field}.notContains`);
|
|
167
|
+
const requiredToolNames = optionalStringArray(input.requiredToolNames, `${field}.requiredToolNames`);
|
|
168
|
+
const minToolCalls = optionalInteger(input.minToolCalls, `${field}.minToolCalls`, 0);
|
|
169
|
+
const maxToolCalls = optionalInteger(input.maxToolCalls, `${field}.maxToolCalls`, 0);
|
|
170
|
+
const maxLatencyMs = optionalInteger(input.maxLatencyMs, `${field}.maxLatencyMs`, 1);
|
|
171
|
+
const statusCode = optionalInteger(input.statusCode, `${field}.statusCode`, 100, 599) ?? 200;
|
|
172
|
+
const minItems = optionalInteger(input.minItems, `${field}.minItems`, 1);
|
|
173
|
+
const minVectorLength = optionalInteger(input.minVectorLength, `${field}.minVectorLength`, 1);
|
|
174
|
+
const minImages = optionalInteger(input.minImages, `${field}.minImages`, 1);
|
|
175
|
+
const containsText = optionalStringArray(input.containsText, `${field}.containsText`);
|
|
176
|
+
const notContainsText = optionalStringArray(input.notContainsText, `${field}.notContainsText`);
|
|
177
|
+
const minBytes = optionalInteger(input.minBytes, `${field}.minBytes`, 1);
|
|
178
|
+
const contentType = optionalString(input.contentType, `${field}.contentType`);
|
|
179
|
+
if (typeof minToolCalls === "number" &&
|
|
180
|
+
typeof maxToolCalls === "number" &&
|
|
181
|
+
maxToolCalls < minToolCalls) {
|
|
182
|
+
throw new Error(`${field}: maxToolCalls must be >= minToolCalls.`);
|
|
183
|
+
}
|
|
184
|
+
return {
|
|
185
|
+
contains,
|
|
186
|
+
notContains,
|
|
187
|
+
requiredToolNames,
|
|
188
|
+
minToolCalls,
|
|
189
|
+
maxToolCalls,
|
|
190
|
+
maxLatencyMs,
|
|
191
|
+
statusCode,
|
|
192
|
+
minItems,
|
|
193
|
+
minVectorLength,
|
|
194
|
+
minImages,
|
|
195
|
+
containsText,
|
|
196
|
+
notContainsText,
|
|
197
|
+
minBytes,
|
|
198
|
+
contentType,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
function optionalExampleSource(value, field) {
|
|
202
|
+
const parsed = optionalString(value, field);
|
|
203
|
+
if (!parsed) {
|
|
204
|
+
return undefined;
|
|
205
|
+
}
|
|
206
|
+
if (parsed !== "opencode" && parsed !== "builtin" && parsed !== "file" && parsed !== "huggingface") {
|
|
207
|
+
throw new Error(`${field}: expected 'opencode', 'builtin', 'file', or 'huggingface'.`);
|
|
208
|
+
}
|
|
209
|
+
return parsed;
|
|
210
|
+
}
|
|
211
|
+
function optionalBoolean(value, field) {
|
|
212
|
+
if (value === undefined || value === null) {
|
|
213
|
+
return undefined;
|
|
214
|
+
}
|
|
215
|
+
if (typeof value !== "boolean") {
|
|
216
|
+
throw new Error(`${field}: expected boolean.`);
|
|
217
|
+
}
|
|
218
|
+
return value;
|
|
219
|
+
}
|
|
220
|
+
function validateMode(value, field) {
|
|
221
|
+
if (typeof value !== "string") {
|
|
222
|
+
throw new Error(`${field}: expected mode string.`);
|
|
223
|
+
}
|
|
224
|
+
if (!types_1.BENCHMARK_MODES.includes(value)) {
|
|
225
|
+
throw new Error(`${field}: unsupported mode '${value}'.`);
|
|
226
|
+
}
|
|
227
|
+
return value;
|
|
228
|
+
}
|
|
229
|
+
function requiredString(value, field) {
|
|
230
|
+
const parsed = optionalString(value, field);
|
|
231
|
+
if (!parsed) {
|
|
232
|
+
throw new Error(`${field}: required non-empty string.`);
|
|
233
|
+
}
|
|
234
|
+
return parsed;
|
|
235
|
+
}
|
|
236
|
+
function optionalString(value, field) {
|
|
237
|
+
if (value === undefined || value === null) {
|
|
238
|
+
return undefined;
|
|
239
|
+
}
|
|
240
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
241
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
242
|
+
}
|
|
243
|
+
return value.trim();
|
|
244
|
+
}
|
|
245
|
+
function optionalStringArray(value, field) {
|
|
246
|
+
if (value === undefined || value === null) {
|
|
247
|
+
return undefined;
|
|
248
|
+
}
|
|
249
|
+
if (typeof value === "string") {
|
|
250
|
+
const single = value.trim();
|
|
251
|
+
if (!single) {
|
|
252
|
+
throw new Error(`${field}: string must be non-empty.`);
|
|
253
|
+
}
|
|
254
|
+
return [single];
|
|
255
|
+
}
|
|
256
|
+
if (!Array.isArray(value)) {
|
|
257
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
258
|
+
}
|
|
259
|
+
return value.map((entry, index) => {
|
|
260
|
+
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
261
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
262
|
+
}
|
|
263
|
+
return entry.trim();
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
function optionalInputValue(value, field) {
|
|
267
|
+
if (value === undefined || value === null) {
|
|
268
|
+
return undefined;
|
|
269
|
+
}
|
|
270
|
+
if (typeof value === "string") {
|
|
271
|
+
if (!value.trim()) {
|
|
272
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
273
|
+
}
|
|
274
|
+
return value;
|
|
275
|
+
}
|
|
276
|
+
if (!Array.isArray(value)) {
|
|
277
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
278
|
+
}
|
|
279
|
+
const normalized = value.map((entry, index) => {
|
|
280
|
+
if (typeof entry !== "string" || !entry.trim()) {
|
|
281
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
282
|
+
}
|
|
283
|
+
return entry;
|
|
284
|
+
});
|
|
285
|
+
return normalized;
|
|
286
|
+
}
|
|
287
|
+
function optionalCapabilityKey(value, field) {
|
|
288
|
+
if (value === undefined || value === null) {
|
|
289
|
+
return undefined;
|
|
290
|
+
}
|
|
291
|
+
if (typeof value !== "string") {
|
|
292
|
+
throw new Error(`${field}: expected capability key string.`);
|
|
293
|
+
}
|
|
294
|
+
if (!types_1.BENCHMARK_CAPABILITY_KEYS.includes(value)) {
|
|
295
|
+
throw new Error(`${field}: unsupported capability '${value}'.`);
|
|
296
|
+
}
|
|
297
|
+
return value;
|
|
298
|
+
}
|
|
299
|
+
function optionalInteger(value, field, min, max) {
|
|
300
|
+
if (value === undefined || value === null) {
|
|
301
|
+
return undefined;
|
|
302
|
+
}
|
|
303
|
+
if (!Number.isInteger(value)) {
|
|
304
|
+
throw new Error(`${field}: expected integer.`);
|
|
305
|
+
}
|
|
306
|
+
const num = value;
|
|
307
|
+
if (num < min) {
|
|
308
|
+
throw new Error(`${field}: must be >= ${min}.`);
|
|
309
|
+
}
|
|
310
|
+
if (typeof max === "number" && num > max) {
|
|
311
|
+
throw new Error(`${field}: must be <= ${max}.`);
|
|
312
|
+
}
|
|
313
|
+
return num;
|
|
314
|
+
}
|
|
315
|
+
function optionalFiniteNumber(value, field, min, max) {
|
|
316
|
+
if (value === undefined || value === null) {
|
|
317
|
+
return undefined;
|
|
318
|
+
}
|
|
319
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
320
|
+
throw new Error(`${field}: expected finite number.`);
|
|
321
|
+
}
|
|
322
|
+
if (typeof min === "number" && value < min) {
|
|
323
|
+
throw new Error(`${field}: must be >= ${min}.`);
|
|
324
|
+
}
|
|
325
|
+
if (typeof max === "number" && value > max) {
|
|
326
|
+
throw new Error(`${field}: must be <= ${max}.`);
|
|
327
|
+
}
|
|
328
|
+
return value;
|
|
329
|
+
}
|
|
330
|
+
function optionalStopField(value, field) {
|
|
331
|
+
if (value === undefined || value === null) {
|
|
332
|
+
return undefined;
|
|
333
|
+
}
|
|
334
|
+
if (typeof value === "string") {
|
|
335
|
+
const trimmed = value.trim();
|
|
336
|
+
if (!trimmed) {
|
|
337
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
338
|
+
}
|
|
339
|
+
return trimmed;
|
|
340
|
+
}
|
|
341
|
+
if (!Array.isArray(value)) {
|
|
342
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
343
|
+
}
|
|
344
|
+
if (value.length === 0) {
|
|
345
|
+
throw new Error(`${field}: expected at least one stop sequence.`);
|
|
346
|
+
}
|
|
347
|
+
return value.map((entry, index) => {
|
|
348
|
+
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
349
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
350
|
+
}
|
|
351
|
+
return entry.trim();
|
|
352
|
+
});
|
|
353
|
+
}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.builtInSuite = builtInSuite;
|
|
4
|
+
exports.listBuiltInSuites = listBuiltInSuites;
|
|
5
|
+
exports.listSuiteExamples = listSuiteExamples;
|
|
6
|
+
const tinyQaDataset_1 = require("./tinyQaDataset");
|
|
7
|
+
const SHOWCASE_SUITE = tinyQaDataset_1.TINY_QA_BENCHMARK.map((row) => toTinyQaShowcaseScenario(row));
|
|
8
|
+
const SUITES = {
|
|
9
|
+
showcase: SHOWCASE_SUITE,
|
|
10
|
+
smoke: [
|
|
11
|
+
{
|
|
12
|
+
id: "smoke-chat-exact",
|
|
13
|
+
mode: "chat",
|
|
14
|
+
prompt: "Reply exactly with: WAYPOI_SMOKE_OK",
|
|
15
|
+
assertions: {
|
|
16
|
+
contains: ["WAYPOI_SMOKE_OK"],
|
|
17
|
+
statusCode: 200,
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
id: "smoke-agent-loop",
|
|
22
|
+
mode: "agent",
|
|
23
|
+
prompt: "If tools are available, call exactly one and summarize. If no tools are available, output NO_TOOLS_AVAILABLE. Prefix final answer with WAYPOI_AGENT_DONE:",
|
|
24
|
+
maxIterations: 4,
|
|
25
|
+
assertions: {
|
|
26
|
+
contains: ["WAYPOI_AGENT_DONE"],
|
|
27
|
+
statusCode: 200,
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
id: "smoke-embeddings-basic",
|
|
32
|
+
mode: "embeddings",
|
|
33
|
+
input: "waypoi benchmark smoke",
|
|
34
|
+
assertions: {
|
|
35
|
+
minItems: 1,
|
|
36
|
+
minVectorLength: 1,
|
|
37
|
+
statusCode: 200,
|
|
38
|
+
},
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
id: "smoke-image-generation",
|
|
42
|
+
mode: "image_generation",
|
|
43
|
+
prompt: "A tiny blue square on white background",
|
|
44
|
+
assertions: {
|
|
45
|
+
minImages: 1,
|
|
46
|
+
statusCode: 200,
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
id: "smoke-audio-speech",
|
|
51
|
+
mode: "audio_speech",
|
|
52
|
+
inputText: "Waypoi benchmark smoke",
|
|
53
|
+
voice: "alloy",
|
|
54
|
+
assertions: {
|
|
55
|
+
minBytes: 1,
|
|
56
|
+
statusCode: 200,
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
proxy: [
|
|
61
|
+
{
|
|
62
|
+
id: "proxy-chat-short",
|
|
63
|
+
mode: "chat",
|
|
64
|
+
prompt: "Answer with one word: waypoi",
|
|
65
|
+
assertions: {
|
|
66
|
+
contains: ["waypoi"],
|
|
67
|
+
statusCode: 200,
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
id: "proxy-embeddings",
|
|
72
|
+
mode: "embeddings",
|
|
73
|
+
input: ["waypoi", "proxy", "benchmark"],
|
|
74
|
+
assertions: {
|
|
75
|
+
minItems: 3,
|
|
76
|
+
minVectorLength: 1,
|
|
77
|
+
statusCode: 200,
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
id: "proxy-image",
|
|
82
|
+
mode: "image_generation",
|
|
83
|
+
prompt: "A minimal icon of a gateway",
|
|
84
|
+
assertions: {
|
|
85
|
+
minImages: 1,
|
|
86
|
+
statusCode: 200,
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
agent: [
|
|
91
|
+
{
|
|
92
|
+
id: "agent-tool-loop-basic",
|
|
93
|
+
mode: "agent",
|
|
94
|
+
prompt: "Use available tools if useful, then provide a concise final answer prefixed with WAYPOI_AGENT_DONE:",
|
|
95
|
+
maxIterations: 6,
|
|
96
|
+
assertions: {
|
|
97
|
+
contains: ["WAYPOI_AGENT_DONE"],
|
|
98
|
+
statusCode: 200,
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
id: "agent-tool-required",
|
|
103
|
+
mode: "agent",
|
|
104
|
+
prompt: "Use at least one tool before answering.",
|
|
105
|
+
maxIterations: 6,
|
|
106
|
+
requiresAvailableTools: true,
|
|
107
|
+
assertions: {
|
|
108
|
+
minToolCalls: 1,
|
|
109
|
+
statusCode: 200,
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
pool_smoke: [
|
|
114
|
+
{
|
|
115
|
+
id: "pool-smart-chat",
|
|
116
|
+
mode: "chat",
|
|
117
|
+
model: "smart",
|
|
118
|
+
prompt: "Reply exactly with: WAYPOI_POOL_SMOKE_OK",
|
|
119
|
+
assertions: {
|
|
120
|
+
contains: ["WAYPOI_POOL_SMOKE_OK"],
|
|
121
|
+
statusCode: 200,
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
id: "pool-smart-agent",
|
|
126
|
+
mode: "agent",
|
|
127
|
+
model: "smart",
|
|
128
|
+
prompt: "Answer with prefix WAYPOI_POOL_AGENT_DONE:",
|
|
129
|
+
assertions: {
|
|
130
|
+
contains: ["WAYPOI_POOL_AGENT_DONE:"],
|
|
131
|
+
statusCode: 200,
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
],
|
|
135
|
+
omni_call_smoke: [
|
|
136
|
+
{
|
|
137
|
+
id: "omni-call-basic",
|
|
138
|
+
mode: "omni_call",
|
|
139
|
+
prompt: "Please transcribe this audio and summarize it in one sentence.",
|
|
140
|
+
audioFile: "examples/scenarios/assets/omni-call-sample.wav",
|
|
141
|
+
assertions: {
|
|
142
|
+
statusCode: 200,
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
],
|
|
146
|
+
capabilities: [
|
|
147
|
+
{
|
|
148
|
+
id: "cap.chat_basic",
|
|
149
|
+
mode: "chat",
|
|
150
|
+
capability: "chat_basic",
|
|
151
|
+
prompt: "Reply exactly with: WAYPOI_CAP_CHAT_BASIC_OK",
|
|
152
|
+
assertions: {
|
|
153
|
+
contains: ["WAYPOI_CAP_CHAT_BASIC_OK"],
|
|
154
|
+
statusCode: 200,
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
id: "cap.chat_streaming",
|
|
159
|
+
mode: "chat",
|
|
160
|
+
capability: "chat_streaming",
|
|
161
|
+
prompt: "Reply exactly with: WAYPOI_CAP_STREAMING_OK",
|
|
162
|
+
assertions: {
|
|
163
|
+
contains: ["WAYPOI_CAP_STREAMING_OK"],
|
|
164
|
+
statusCode: 200,
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
id: "cap.chat_tool_calls",
|
|
169
|
+
mode: "agent",
|
|
170
|
+
capability: "chat_tool_calls",
|
|
171
|
+
prompt: "Use at least one tool if available, then output WAYPOI_CAP_TOOL_CALLS_OK.",
|
|
172
|
+
maxIterations: 4,
|
|
173
|
+
requiresAvailableTools: true,
|
|
174
|
+
assertions: {
|
|
175
|
+
contains: ["WAYPOI_CAP_TOOL_CALLS_OK"],
|
|
176
|
+
minToolCalls: 1,
|
|
177
|
+
statusCode: 200,
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
id: "cap.chat_vision_input",
|
|
182
|
+
mode: "chat",
|
|
183
|
+
capability: "chat_vision_input",
|
|
184
|
+
prompt: "Vision probe placeholder: reply with WAYPOI_CAP_VISION_UNKNOWN when image input is unavailable.",
|
|
185
|
+
assertions: {
|
|
186
|
+
statusCode: 200,
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
id: "cap.images_generation",
|
|
191
|
+
mode: "image_generation",
|
|
192
|
+
capability: "images_generation",
|
|
193
|
+
prompt: "A monochrome square icon.",
|
|
194
|
+
assertions: {
|
|
195
|
+
minImages: 1,
|
|
196
|
+
statusCode: 200,
|
|
197
|
+
},
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
id: "cap.images_edit",
|
|
201
|
+
mode: "image_generation",
|
|
202
|
+
capability: "images_edit",
|
|
203
|
+
prompt: "Image edit probe placeholder",
|
|
204
|
+
assertions: {
|
|
205
|
+
statusCode: 200,
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
id: "cap.embeddings",
|
|
210
|
+
mode: "embeddings",
|
|
211
|
+
capability: "embeddings",
|
|
212
|
+
input: "waypoi capability embeddings probe",
|
|
213
|
+
assertions: {
|
|
214
|
+
minItems: 1,
|
|
215
|
+
minVectorLength: 1,
|
|
216
|
+
statusCode: 200,
|
|
217
|
+
},
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
id: "cap.audio_transcription",
|
|
221
|
+
mode: "audio_transcription",
|
|
222
|
+
capability: "audio_transcription",
|
|
223
|
+
audioFile: "examples/scenarios/assets/omni-call-sample.wav",
|
|
224
|
+
assertions: {
|
|
225
|
+
statusCode: 200,
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
id: "cap.audio_speech",
|
|
230
|
+
mode: "audio_speech",
|
|
231
|
+
capability: "audio_speech",
|
|
232
|
+
inputText: "Waypoi capability speech probe",
|
|
233
|
+
voice: "alloy",
|
|
234
|
+
assertions: {
|
|
235
|
+
minBytes: 1,
|
|
236
|
+
statusCode: 200,
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
id: "cap.responses_compat",
|
|
241
|
+
mode: "responses",
|
|
242
|
+
capability: "responses_compat",
|
|
243
|
+
prompt: "Summarize why the Responses API compatibility route matters in one sentence.",
|
|
244
|
+
assertions: {
|
|
245
|
+
statusCode: 200,
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
],
|
|
249
|
+
};
|
|
250
|
+
function toTinyQaShowcaseScenario(row) {
|
|
251
|
+
const padded = String(row.id).padStart(3, "0");
|
|
252
|
+
return {
|
|
253
|
+
id: `showcase-tinyqa-${padded}`,
|
|
254
|
+
mode: "chat",
|
|
255
|
+
title: `Tiny QA #${padded}`,
|
|
256
|
+
summary: "Single-question QA probe from vincentkoc/tiny_qa_benchmark.",
|
|
257
|
+
userVisibleGoal: "Answer a tiny QA question with a concise factual response.",
|
|
258
|
+
exampleSource: "huggingface",
|
|
259
|
+
inputPreview: row.question,
|
|
260
|
+
successCriteria: `HTTP 200 and answer includes: ${row.answer}`,
|
|
261
|
+
expectedHighlights: [`category:${row.category}`, `difficulty:${row.difficulty}`, "gold-answer check"],
|
|
262
|
+
prompt: [
|
|
263
|
+
"Answer with only the final short answer.",
|
|
264
|
+
`Question: ${row.question}`,
|
|
265
|
+
`Reference: ${row.context}`,
|
|
266
|
+
].join("\n"),
|
|
267
|
+
assertions: {
|
|
268
|
+
statusCode: 200,
|
|
269
|
+
contains: [row.answer],
|
|
270
|
+
},
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
function builtInSuite(name) {
|
|
274
|
+
const suite = SUITES[name];
|
|
275
|
+
if (!suite) {
|
|
276
|
+
const available = Object.keys(SUITES).sort().join(", ");
|
|
277
|
+
throw new Error(`Unknown benchmark suite '${name}'. Available: ${available}`);
|
|
278
|
+
}
|
|
279
|
+
return suite.map(cloneScenario);
|
|
280
|
+
}
|
|
281
|
+
function listBuiltInSuites() {
|
|
282
|
+
return Object.keys(SUITES).sort();
|
|
283
|
+
}
|
|
284
|
+
function listSuiteExamples(name) {
|
|
285
|
+
return builtInSuite(name).map((scenario) => toScenarioSummary(name, scenario));
|
|
286
|
+
}
|
|
287
|
+
function cloneScenario(scenario) {
|
|
288
|
+
return {
|
|
289
|
+
...scenario,
|
|
290
|
+
assertions: { ...scenario.assertions },
|
|
291
|
+
expectedHighlights: scenario.expectedHighlights ? [...scenario.expectedHighlights] : undefined,
|
|
292
|
+
tools: scenario.tools ? [...scenario.tools] : undefined,
|
|
293
|
+
input: Array.isArray(scenario.input) ? [...scenario.input] : scenario.input,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
function toScenarioSummary(suite, scenario) {
|
|
297
|
+
return {
|
|
298
|
+
id: scenario.id,
|
|
299
|
+
suite,
|
|
300
|
+
mode: scenario.mode,
|
|
301
|
+
title: scenario.title ?? scenario.id,
|
|
302
|
+
summary: scenario.summary ?? "Built-in benchmark scenario.",
|
|
303
|
+
userVisibleGoal: scenario.userVisibleGoal ?? "Exercise the configured model path and inspect the result.",
|
|
304
|
+
exampleSource: scenario.exampleSource ?? "builtin",
|
|
305
|
+
inputPreview: scenario.inputPreview ??
|
|
306
|
+
scenario.prompt ??
|
|
307
|
+
scenario.inputText ??
|
|
308
|
+
(typeof scenario.input === "string" ? scenario.input : Array.isArray(scenario.input) ? scenario.input.join(" | ") : ""),
|
|
309
|
+
successCriteria: scenario.successCriteria ?? "All configured assertions pass.",
|
|
310
|
+
expectedHighlights: scenario.expectedHighlights ?? [],
|
|
311
|
+
requiresAvailableTools: scenario.requiresAvailableTools === true,
|
|
312
|
+
model: scenario.model,
|
|
313
|
+
};
|
|
314
|
+
}
|