waypoi 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/instructions/ui.instructions.md +42 -0
- package/.github/workflows/ci.yml +35 -0
- package/.github/workflows/publish.yml +71 -0
- package/.github/workflows/release.yml +48 -0
- package/.playwright-mcp/console-2026-04-04T01-41-10-746Z.log +2 -0
- package/.playwright-mcp/console-2026-04-04T01-41-28-799Z.log +3 -0
- package/.playwright-mcp/console-2026-04-05T02-26-51-909Z.log +76 -0
- package/.playwright-mcp/page-2026-04-04T01-41-10-816Z.yml +1 -0
- package/.playwright-mcp/page-2026-04-04T01-41-29-141Z.yml +77 -0
- package/.playwright-mcp/page-2026-04-04T01-41-42-633Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T01-42-03-929Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-12-54-813Z.yml +6 -0
- package/.playwright-mcp/page-2026-04-04T02-14-58-600Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-03-923Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-07-426Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-15-25-729Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-16-22-984Z.yml +262 -0
- package/.playwright-mcp/page-2026-04-04T02-17-00-599Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-04T02-17-50-874Z.yml +190 -0
- package/.playwright-mcp/page-2026-04-05T02-26-55-570Z.yml +6 -0
- package/AGENTS.md +48 -0
- package/CHANGELOG.md +131 -0
- package/README.md +552 -0
- package/assets/agent-mode.png +0 -0
- package/assets/categorize.png +0 -0
- package/assets/dashboard.png +0 -0
- package/assets/endpoint-proxy.png +0 -0
- package/assets/icon.png +0 -0
- package/assets/mcp-generate-image.png +0 -0
- package/assets/mcp-understand-image.png +0 -0
- package/assets/peek-token-flow.png +0 -0
- package/assets/playground.png +0 -0
- package/assets/sankey.png +0 -0
- package/cli/index.ts +2805 -0
- package/cli/legacyRewrite.ts +108 -0
- package/cli/modelRef.ts +24 -0
- package/dist/cli/index.js +2536 -0
- package/dist/cli/legacyRewrite.js +92 -0
- package/dist/cli/modelRef.js +20 -0
- package/dist/src/benchmark/artifacts.js +131 -0
- package/dist/src/benchmark/capabilityClassifier.js +81 -0
- package/dist/src/benchmark/capabilityStore.js +144 -0
- package/dist/src/benchmark/config.js +238 -0
- package/dist/src/benchmark/gates.js +118 -0
- package/dist/src/benchmark/jobs.js +252 -0
- package/dist/src/benchmark/runner.js +1847 -0
- package/dist/src/benchmark/schema.js +353 -0
- package/dist/src/benchmark/suites.js +314 -0
- package/dist/src/benchmark/tinyQaDataset.js +422 -0
- package/dist/src/benchmark/types.js +25 -0
- package/dist/src/config.js +47 -0
- package/dist/src/index.js +178 -0
- package/dist/src/mcp/client.js +215 -0
- package/dist/src/mcp/discovery.js +226 -0
- package/dist/src/mcp/policy.js +65 -0
- package/dist/src/mcp/registry.js +129 -0
- package/dist/src/mcp/service.js +460 -0
- package/dist/src/middleware/auth.js +179 -0
- package/dist/src/middleware/requestCapture.js +192 -0
- package/dist/src/middleware/requestStats.js +118 -0
- package/dist/src/pools/builder.js +132 -0
- package/dist/src/pools/repository.js +69 -0
- package/dist/src/pools/scheduler.js +360 -0
- package/dist/src/pools/types.js +2 -0
- package/dist/src/protocols/adapters/dashscope.js +267 -0
- package/dist/src/protocols/adapters/inferenceV2.js +346 -0
- package/dist/src/protocols/adapters/openai.js +27 -0
- package/dist/src/protocols/registry.js +99 -0
- package/dist/src/protocols/types.js +2 -0
- package/dist/src/providers/health.js +153 -0
- package/dist/src/providers/importer.js +289 -0
- package/dist/src/providers/modelRegistry.js +313 -0
- package/dist/src/providers/repository.js +361 -0
- package/dist/src/providers/types.js +2 -0
- package/dist/src/routes/admin.js +531 -0
- package/dist/src/routes/audio.js +295 -0
- package/dist/src/routes/chat.js +240 -0
- package/dist/src/routes/embeddings.js +157 -0
- package/dist/src/routes/images.js +288 -0
- package/dist/src/routes/mcp.js +256 -0
- package/dist/src/routes/mcpService.js +100 -0
- package/dist/src/routes/models.js +48 -0
- package/dist/src/routes/responses.js +711 -0
- package/dist/src/routes/sessions.js +450 -0
- package/dist/src/routes/stats.js +270 -0
- package/dist/src/routes/ui.js +97 -0
- package/dist/src/routes/videos.js +107 -0
- package/dist/src/routing/router.js +338 -0
- package/dist/src/services/imageGeneration.js +280 -0
- package/dist/src/services/imageUnderstanding.js +352 -0
- package/dist/src/services/videoGeneration.js +79 -0
- package/dist/src/storage/captureRepository.js +1591 -0
- package/dist/src/storage/files.js +157 -0
- package/dist/src/storage/imageCache.js +346 -0
- package/dist/src/storage/repositories.js +388 -0
- package/dist/src/storage/sessionRepository.js +370 -0
- package/dist/src/storage/statsRepository.js +204 -0
- package/dist/src/transport/httpClient.js +126 -0
- package/dist/src/types.js +2 -0
- package/dist/src/utils/messageMedia.js +285 -0
- package/dist/src/utils/modelCapabilities.js +108 -0
- package/dist/src/utils/modelDiscovery.js +170 -0
- package/dist/src/version.js +5 -0
- package/dist/src/workers/captureRetention.js +25 -0
- package/dist/src/workers/configWatcher.js +91 -0
- package/dist/src/workers/healthChecker.js +21 -0
- package/dist/src/workers/statsRotation.js +41 -0
- package/docs/LLM/output_schema.md +312 -0
- package/docs/benchmark.md +208 -0
- package/docs/mcp-guidelines.md +125 -0
- package/docs/mcp-service.md +178 -0
- package/docs/opencode.md +86 -0
- package/docs/providers.md +79 -0
- package/examples/benchmark.config.yaml +28 -0
- package/examples/providers/alibaba-dashscope.yaml +88 -0
- package/examples/providers/alibaba-llm.yaml +64 -0
- package/examples/providers/alibaba-registry.yaml +7 -0
- package/examples/providers/inference-v2-ray.yaml +29 -0
- package/examples/scenarios/assets/omni-call-sample.wav +0 -0
- package/examples/scenarios/custom.jsonl +5 -0
- package/examples/scenarios/custom.yaml +40 -0
- package/model-form-v2.png +0 -0
- package/package.json +66 -0
- package/provider-form-v2.png +0 -0
- package/provider-form.png +0 -0
- package/scripts/manual-test.sh +11 -0
- package/scripts/version-from-git.js +23 -0
- package/src/benchmark/artifacts.ts +149 -0
- package/src/benchmark/capabilityClassifier.ts +99 -0
- package/src/benchmark/capabilityStore.ts +174 -0
- package/src/benchmark/config.ts +337 -0
- package/src/benchmark/gates.ts +164 -0
- package/src/benchmark/jobs.ts +312 -0
- package/src/benchmark/runner.ts +2519 -0
- package/src/benchmark/schema.ts +443 -0
- package/src/benchmark/suites.ts +323 -0
- package/src/benchmark/tinyQaDataset.ts +428 -0
- package/src/benchmark/types.ts +442 -0
- package/src/config.ts +44 -0
- package/src/index.ts +195 -0
- package/src/mcp/client.ts +305 -0
- package/src/mcp/discovery.ts +266 -0
- package/src/mcp/policy.ts +105 -0
- package/src/mcp/registry.ts +164 -0
- package/src/mcp/service.ts +611 -0
- package/src/middleware/auth.ts +251 -0
- package/src/middleware/requestCapture.ts +245 -0
- package/src/middleware/requestStats.ts +163 -0
- package/src/pools/builder.ts +159 -0
- package/src/pools/repository.ts +71 -0
- package/src/pools/scheduler.ts +425 -0
- package/src/pools/types.ts +117 -0
- package/src/protocols/adapters/dashscope.ts +335 -0
- package/src/protocols/adapters/inferenceV2.ts +428 -0
- package/src/protocols/adapters/openai.ts +32 -0
- package/src/protocols/registry.ts +117 -0
- package/src/protocols/types.ts +81 -0
- package/src/providers/health.ts +207 -0
- package/src/providers/importer.ts +402 -0
- package/src/providers/modelRegistry.ts +415 -0
- package/src/providers/repository.ts +439 -0
- package/src/providers/types.ts +113 -0
- package/src/routes/admin.ts +666 -0
- package/src/routes/audio.ts +372 -0
- package/src/routes/chat.ts +301 -0
- package/src/routes/embeddings.ts +197 -0
- package/src/routes/images.ts +356 -0
- package/src/routes/mcp.ts +320 -0
- package/src/routes/mcpService.ts +114 -0
- package/src/routes/models.ts +50 -0
- package/src/routes/responses.ts +872 -0
- package/src/routes/sessions.ts +558 -0
- package/src/routes/stats.ts +312 -0
- package/src/routes/ui.ts +96 -0
- package/src/routes/videos.ts +132 -0
- package/src/routing/router.ts +501 -0
- package/src/services/imageGeneration.ts +396 -0
- package/src/services/imageUnderstanding.ts +449 -0
- package/src/services/videoGeneration.ts +127 -0
- package/src/storage/captureRepository.ts +1835 -0
- package/src/storage/files.ts +178 -0
- package/src/storage/imageCache.ts +405 -0
- package/src/storage/repositories.ts +494 -0
- package/src/storage/sessionRepository.ts +419 -0
- package/src/storage/statsRepository.ts +238 -0
- package/src/transport/httpClient.ts +145 -0
- package/src/types.ts +322 -0
- package/src/utils/messageMedia.ts +293 -0
- package/src/utils/modelCapabilities.ts +161 -0
- package/src/utils/modelDiscovery.ts +203 -0
- package/src/workers/captureRetention.ts +25 -0
- package/src/workers/configWatcher.ts +115 -0
- package/src/workers/healthChecker.ts +22 -0
- package/src/workers/statsRotation.ts +49 -0
- package/tests/benchmarkAdminRoutes.test.ts +82 -0
- package/tests/benchmarkBasics.test.ts +116 -0
- package/tests/captureAdminRoutes.test.ts +420 -0
- package/tests/captureRepository.test.ts +797 -0
- package/tests/cliLegacyRewrite.test.ts +45 -0
- package/tests/imageGeneration.service.test.ts +107 -0
- package/tests/imageUnderstanding.service.test.ts +123 -0
- package/tests/mcpPolicy.test.ts +105 -0
- package/tests/mcpService.test.ts +1245 -0
- package/tests/modelRef.test.ts +23 -0
- package/tests/modelsRoutes.test.ts +154 -0
- package/tests/sessionMediaCache.test.ts +167 -0
- package/tests/statsRoutes.test.ts +323 -0
- package/tsconfig.json +15 -0
- package/ui/index.html +16 -0
- package/ui/package-lock.json +8521 -0
- package/ui/package.json +52 -0
- package/ui/postcss.config.js +6 -0
- package/ui/public/assets/apple-touch-icon.png +0 -0
- package/ui/public/assets/favicon-16.png +0 -0
- package/ui/public/assets/favicon-32.png +0 -0
- package/ui/public/assets/icon-192.png +0 -0
- package/ui/public/assets/icon-512.png +0 -0
- package/ui/src/App.tsx +27 -0
- package/ui/src/api/client.ts +1503 -0
- package/ui/src/components/EndpointUsageGuide.tsx +361 -0
- package/ui/src/components/Layout.tsx +124 -0
- package/ui/src/components/MessageContent.tsx +365 -0
- package/ui/src/components/ToolCallMessage.tsx +179 -0
- package/ui/src/components/ToolPicker.tsx +442 -0
- package/ui/src/components/messageContentParser.test.ts +41 -0
- package/ui/src/components/messageContentParser.ts +73 -0
- package/ui/src/components/thinkingPreview.test.ts +27 -0
- package/ui/src/components/thinkingPreview.ts +15 -0
- package/ui/src/components/toMermaidSankey.test.ts +78 -0
- package/ui/src/components/toMermaidSankey.ts +56 -0
- package/ui/src/components/ui/button.tsx +58 -0
- package/ui/src/components/ui/input.tsx +21 -0
- package/ui/src/components/ui/textarea.tsx +21 -0
- package/ui/src/lib/utils.ts +6 -0
- package/ui/src/main.tsx +9 -0
- package/ui/src/pages/AgentPlayground.tsx +2010 -0
- package/ui/src/pages/Benchmark.tsx +988 -0
- package/ui/src/pages/Dashboard.tsx +581 -0
- package/ui/src/pages/Peek.tsx +962 -0
- package/ui/src/pages/Settings.tsx +2013 -0
- package/ui/src/pages/agentPlaygroundPayload.test.ts +109 -0
- package/ui/src/pages/agentPlaygroundPayload.ts +97 -0
- package/ui/src/pages/agentThinkingContent.test.ts +50 -0
- package/ui/src/pages/agentThinkingContent.ts +57 -0
- package/ui/src/pages/dashboardTokenUsage.test.ts +66 -0
- package/ui/src/pages/dashboardTokenUsage.ts +36 -0
- package/ui/src/pages/imageUpload.test.ts +39 -0
- package/ui/src/pages/imageUpload.ts +71 -0
- package/ui/src/pages/peekFilters.test.ts +29 -0
- package/ui/src/pages/peekFilters.ts +13 -0
- package/ui/src/pages/peekMedia.test.ts +58 -0
- package/ui/src/pages/peekMedia.ts +148 -0
- package/ui/src/pages/sessionAutoTitle.test.ts +128 -0
- package/ui/src/pages/sessionAutoTitle.ts +106 -0
- package/ui/src/stores/settings.ts +58 -0
- package/ui/src/styles/globals.css +223 -0
- package/ui/src/vite-env.d.ts +8 -0
- package/ui/tailwind.config.js +106 -0
- package/ui/tsconfig.json +32 -0
- package/ui/vite.config.ts +37 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BENCHMARK_CAPABILITY_KEYS,
|
|
3
|
+
BENCHMARK_MODES,
|
|
4
|
+
BenchmarkAssertions,
|
|
5
|
+
BenchmarkCapabilityKey,
|
|
6
|
+
BenchmarkMode,
|
|
7
|
+
BenchmarkScenario,
|
|
8
|
+
ValidationOutcome,
|
|
9
|
+
} from "./types";
|
|
10
|
+
|
|
11
|
+
const SCENARIO_KEYS = new Set([
|
|
12
|
+
"id",
|
|
13
|
+
"mode",
|
|
14
|
+
"title",
|
|
15
|
+
"summary",
|
|
16
|
+
"userVisibleGoal",
|
|
17
|
+
"exampleSource",
|
|
18
|
+
"inputPreview",
|
|
19
|
+
"successCriteria",
|
|
20
|
+
"expectedHighlights",
|
|
21
|
+
"capability",
|
|
22
|
+
"model",
|
|
23
|
+
"timeoutMs",
|
|
24
|
+
"requiresAvailableTools",
|
|
25
|
+
"assertions",
|
|
26
|
+
"prompt",
|
|
27
|
+
"tools",
|
|
28
|
+
"maxIterations",
|
|
29
|
+
"temperature",
|
|
30
|
+
"top_p",
|
|
31
|
+
"max_tokens",
|
|
32
|
+
"presence_penalty",
|
|
33
|
+
"frequency_penalty",
|
|
34
|
+
"seed",
|
|
35
|
+
"stop",
|
|
36
|
+
"input",
|
|
37
|
+
"n",
|
|
38
|
+
"size",
|
|
39
|
+
"audioFile",
|
|
40
|
+
"inputText",
|
|
41
|
+
"voice",
|
|
42
|
+
"response_format",
|
|
43
|
+
]);
|
|
44
|
+
|
|
45
|
+
const ASSERTION_KEYS = new Set([
|
|
46
|
+
"contains",
|
|
47
|
+
"notContains",
|
|
48
|
+
"requiredToolNames",
|
|
49
|
+
"minToolCalls",
|
|
50
|
+
"maxToolCalls",
|
|
51
|
+
"maxLatencyMs",
|
|
52
|
+
"statusCode",
|
|
53
|
+
"minItems",
|
|
54
|
+
"minVectorLength",
|
|
55
|
+
"minImages",
|
|
56
|
+
"containsText",
|
|
57
|
+
"notContainsText",
|
|
58
|
+
"minBytes",
|
|
59
|
+
"contentType",
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
export function validateScenarioCollection(
|
|
63
|
+
rawScenarios: unknown[],
|
|
64
|
+
sourceLabel: string
|
|
65
|
+
): ValidationOutcome {
|
|
66
|
+
const warnings: string[] = [];
|
|
67
|
+
const scenarios: BenchmarkScenario[] = rawScenarios.map((raw, index) =>
|
|
68
|
+
validateScenario(raw, sourceLabel, index, warnings)
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
const ids = new Set<string>();
|
|
72
|
+
for (const scenario of scenarios) {
|
|
73
|
+
if (ids.has(scenario.id)) {
|
|
74
|
+
throw new Error(`${sourceLabel} scenario '${scenario.id}' is duplicated.`);
|
|
75
|
+
}
|
|
76
|
+
ids.add(scenario.id);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return { scenarios, warnings };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function validateScenario(
|
|
83
|
+
raw: unknown,
|
|
84
|
+
sourceLabel: string,
|
|
85
|
+
index: number,
|
|
86
|
+
warnings: string[]
|
|
87
|
+
): BenchmarkScenario {
|
|
88
|
+
const ctx = `${sourceLabel} scenario[${index}]`;
|
|
89
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
|
|
90
|
+
throw new Error(`${ctx}: expected object.`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const input = raw as Record<string, unknown>;
|
|
94
|
+
for (const key of Object.keys(input)) {
|
|
95
|
+
if (!SCENARIO_KEYS.has(key)) {
|
|
96
|
+
warnings.push(`${ctx}: unknown field '${key}' is ignored.`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const id = requiredString(input.id, `${ctx}.id`);
|
|
101
|
+
const mode = validateMode(input.mode, `${ctx}.mode`);
|
|
102
|
+
const capability = optionalCapabilityKey(input.capability, `${ctx}.capability`);
|
|
103
|
+
const model = optionalString(input.model, `${ctx}.model`);
|
|
104
|
+
const timeoutMs = optionalInteger(input.timeoutMs, `${ctx}.timeoutMs`, 1);
|
|
105
|
+
const assertions = validateAssertions(input.assertions, `${ctx}.assertions`, warnings);
|
|
106
|
+
|
|
107
|
+
const scenario: BenchmarkScenario = {
|
|
108
|
+
id,
|
|
109
|
+
mode,
|
|
110
|
+
capability,
|
|
111
|
+
model,
|
|
112
|
+
timeoutMs,
|
|
113
|
+
assertions,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
scenario.title = optionalString(input.title, `${ctx}.title`);
|
|
117
|
+
scenario.summary = optionalString(input.summary, `${ctx}.summary`);
|
|
118
|
+
scenario.userVisibleGoal = optionalString(input.userVisibleGoal, `${ctx}.userVisibleGoal`);
|
|
119
|
+
scenario.exampleSource = optionalExampleSource(input.exampleSource, `${ctx}.exampleSource`);
|
|
120
|
+
scenario.inputPreview = optionalString(input.inputPreview, `${ctx}.inputPreview`);
|
|
121
|
+
scenario.successCriteria = optionalString(input.successCriteria, `${ctx}.successCriteria`);
|
|
122
|
+
scenario.expectedHighlights = optionalStringArray(
|
|
123
|
+
input.expectedHighlights,
|
|
124
|
+
`${ctx}.expectedHighlights`
|
|
125
|
+
);
|
|
126
|
+
scenario.prompt = optionalString(input.prompt, `${ctx}.prompt`);
|
|
127
|
+
scenario.tools = optionalStringArray(input.tools, `${ctx}.tools`);
|
|
128
|
+
scenario.maxIterations = optionalInteger(input.maxIterations, `${ctx}.maxIterations`, 1, 20);
|
|
129
|
+
scenario.temperature = optionalFiniteNumber(input.temperature, `${ctx}.temperature`);
|
|
130
|
+
scenario.top_p = optionalFiniteNumber(input.top_p, `${ctx}.top_p`, 0, 1);
|
|
131
|
+
scenario.max_tokens = optionalInteger(input.max_tokens, `${ctx}.max_tokens`, 1);
|
|
132
|
+
scenario.presence_penalty = optionalFiniteNumber(
|
|
133
|
+
input.presence_penalty,
|
|
134
|
+
`${ctx}.presence_penalty`,
|
|
135
|
+
-2,
|
|
136
|
+
2
|
|
137
|
+
);
|
|
138
|
+
scenario.frequency_penalty = optionalFiniteNumber(
|
|
139
|
+
input.frequency_penalty,
|
|
140
|
+
`${ctx}.frequency_penalty`,
|
|
141
|
+
-2,
|
|
142
|
+
2
|
|
143
|
+
);
|
|
144
|
+
scenario.seed = optionalInteger(input.seed, `${ctx}.seed`, 0);
|
|
145
|
+
scenario.stop = optionalStopField(input.stop, `${ctx}.stop`);
|
|
146
|
+
scenario.requiresAvailableTools = optionalBoolean(
|
|
147
|
+
input.requiresAvailableTools,
|
|
148
|
+
`${ctx}.requiresAvailableTools`
|
|
149
|
+
);
|
|
150
|
+
scenario.input = optionalInputValue(input.input, `${ctx}.input`);
|
|
151
|
+
scenario.n = optionalInteger(input.n, `${ctx}.n`, 1);
|
|
152
|
+
scenario.size = optionalString(input.size, `${ctx}.size`);
|
|
153
|
+
scenario.audioFile = optionalString(input.audioFile, `${ctx}.audioFile`);
|
|
154
|
+
scenario.inputText = optionalString(input.inputText, `${ctx}.inputText`);
|
|
155
|
+
scenario.voice = optionalString(input.voice, `${ctx}.voice`);
|
|
156
|
+
scenario.response_format = optionalString(input.response_format, `${ctx}.response_format`);
|
|
157
|
+
|
|
158
|
+
validateScenarioByMode(scenario, ctx);
|
|
159
|
+
|
|
160
|
+
return scenario;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function validateScenarioByMode(scenario: BenchmarkScenario, ctx: string): void {
|
|
164
|
+
if (
|
|
165
|
+
scenario.mode === "chat" ||
|
|
166
|
+
scenario.mode === "agent" ||
|
|
167
|
+
scenario.mode === "responses" ||
|
|
168
|
+
scenario.mode === "image_generation"
|
|
169
|
+
) {
|
|
170
|
+
if (!scenario.prompt) {
|
|
171
|
+
throw new Error(`${ctx}.prompt: required for mode '${scenario.mode}'.`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (scenario.mode === "embeddings") {
|
|
176
|
+
if (scenario.input === undefined) {
|
|
177
|
+
throw new Error(`${ctx}.input: required for mode 'embeddings'.`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (scenario.mode === "audio_transcription") {
|
|
182
|
+
if (!scenario.audioFile) {
|
|
183
|
+
throw new Error(`${ctx}.audioFile: required for mode 'audio_transcription'.`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (scenario.mode === "omni_call") {
|
|
188
|
+
if (!scenario.audioFile) {
|
|
189
|
+
throw new Error(`${ctx}.audioFile: required for mode 'omni_call'.`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (scenario.mode === "audio_speech") {
|
|
194
|
+
if (!scenario.inputText) {
|
|
195
|
+
throw new Error(`${ctx}.inputText: required for mode 'audio_speech'.`);
|
|
196
|
+
}
|
|
197
|
+
if (!scenario.voice) {
|
|
198
|
+
throw new Error(`${ctx}.voice: required for mode 'audio_speech'.`);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function validateAssertions(
|
|
204
|
+
raw: unknown,
|
|
205
|
+
field: string,
|
|
206
|
+
warnings: string[]
|
|
207
|
+
): BenchmarkAssertions {
|
|
208
|
+
if (raw === undefined || raw === null) {
|
|
209
|
+
return { statusCode: 200 };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
213
|
+
throw new Error(`${field}: expected object.`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const input = raw as Record<string, unknown>;
|
|
217
|
+
for (const key of Object.keys(input)) {
|
|
218
|
+
if (!ASSERTION_KEYS.has(key)) {
|
|
219
|
+
warnings.push(`${field}: unknown field '${key}' is ignored.`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const contains = optionalStringArray(input.contains, `${field}.contains`);
|
|
224
|
+
const notContains = optionalStringArray(input.notContains, `${field}.notContains`);
|
|
225
|
+
const requiredToolNames = optionalStringArray(
|
|
226
|
+
input.requiredToolNames,
|
|
227
|
+
`${field}.requiredToolNames`
|
|
228
|
+
);
|
|
229
|
+
const minToolCalls = optionalInteger(input.minToolCalls, `${field}.minToolCalls`, 0);
|
|
230
|
+
const maxToolCalls = optionalInteger(input.maxToolCalls, `${field}.maxToolCalls`, 0);
|
|
231
|
+
const maxLatencyMs = optionalInteger(input.maxLatencyMs, `${field}.maxLatencyMs`, 1);
|
|
232
|
+
const statusCode = optionalInteger(input.statusCode, `${field}.statusCode`, 100, 599) ?? 200;
|
|
233
|
+
const minItems = optionalInteger(input.minItems, `${field}.minItems`, 1);
|
|
234
|
+
const minVectorLength = optionalInteger(input.minVectorLength, `${field}.minVectorLength`, 1);
|
|
235
|
+
const minImages = optionalInteger(input.minImages, `${field}.minImages`, 1);
|
|
236
|
+
const containsText = optionalStringArray(input.containsText, `${field}.containsText`);
|
|
237
|
+
const notContainsText = optionalStringArray(input.notContainsText, `${field}.notContainsText`);
|
|
238
|
+
const minBytes = optionalInteger(input.minBytes, `${field}.minBytes`, 1);
|
|
239
|
+
const contentType = optionalString(input.contentType, `${field}.contentType`);
|
|
240
|
+
|
|
241
|
+
if (
|
|
242
|
+
typeof minToolCalls === "number" &&
|
|
243
|
+
typeof maxToolCalls === "number" &&
|
|
244
|
+
maxToolCalls < minToolCalls
|
|
245
|
+
) {
|
|
246
|
+
throw new Error(`${field}: maxToolCalls must be >= minToolCalls.`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
contains,
|
|
251
|
+
notContains,
|
|
252
|
+
requiredToolNames,
|
|
253
|
+
minToolCalls,
|
|
254
|
+
maxToolCalls,
|
|
255
|
+
maxLatencyMs,
|
|
256
|
+
statusCode,
|
|
257
|
+
minItems,
|
|
258
|
+
minVectorLength,
|
|
259
|
+
minImages,
|
|
260
|
+
containsText,
|
|
261
|
+
notContainsText,
|
|
262
|
+
minBytes,
|
|
263
|
+
contentType,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function optionalExampleSource(
|
|
268
|
+
value: unknown,
|
|
269
|
+
field: string
|
|
270
|
+
): BenchmarkScenario["exampleSource"] | undefined {
|
|
271
|
+
const parsed = optionalString(value, field);
|
|
272
|
+
if (!parsed) {
|
|
273
|
+
return undefined;
|
|
274
|
+
}
|
|
275
|
+
if (parsed !== "opencode" && parsed !== "builtin" && parsed !== "file" && parsed !== "huggingface") {
|
|
276
|
+
throw new Error(`${field}: expected 'opencode', 'builtin', 'file', or 'huggingface'.`);
|
|
277
|
+
}
|
|
278
|
+
return parsed;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function optionalBoolean(value: unknown, field: string): boolean | undefined {
|
|
282
|
+
if (value === undefined || value === null) {
|
|
283
|
+
return undefined;
|
|
284
|
+
}
|
|
285
|
+
if (typeof value !== "boolean") {
|
|
286
|
+
throw new Error(`${field}: expected boolean.`);
|
|
287
|
+
}
|
|
288
|
+
return value;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function validateMode(value: unknown, field: string): BenchmarkMode {
|
|
292
|
+
if (typeof value !== "string") {
|
|
293
|
+
throw new Error(`${field}: expected mode string.`);
|
|
294
|
+
}
|
|
295
|
+
if (!BENCHMARK_MODES.includes(value as BenchmarkMode)) {
|
|
296
|
+
throw new Error(`${field}: unsupported mode '${value}'.`);
|
|
297
|
+
}
|
|
298
|
+
return value as BenchmarkMode;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function requiredString(value: unknown, field: string): string {
|
|
302
|
+
const parsed = optionalString(value, field);
|
|
303
|
+
if (!parsed) {
|
|
304
|
+
throw new Error(`${field}: required non-empty string.`);
|
|
305
|
+
}
|
|
306
|
+
return parsed;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function optionalString(value: unknown, field: string): string | undefined {
|
|
310
|
+
if (value === undefined || value === null) {
|
|
311
|
+
return undefined;
|
|
312
|
+
}
|
|
313
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
314
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
315
|
+
}
|
|
316
|
+
return value.trim();
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function optionalStringArray(value: unknown, field: string): string[] | undefined {
|
|
320
|
+
if (value === undefined || value === null) {
|
|
321
|
+
return undefined;
|
|
322
|
+
}
|
|
323
|
+
if (typeof value === "string") {
|
|
324
|
+
const single = value.trim();
|
|
325
|
+
if (!single) {
|
|
326
|
+
throw new Error(`${field}: string must be non-empty.`);
|
|
327
|
+
}
|
|
328
|
+
return [single];
|
|
329
|
+
}
|
|
330
|
+
if (!Array.isArray(value)) {
|
|
331
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return value.map((entry, index) => {
|
|
335
|
+
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
336
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
337
|
+
}
|
|
338
|
+
return entry.trim();
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function optionalInputValue(value: unknown, field: string): string | string[] | undefined {
|
|
343
|
+
if (value === undefined || value === null) {
|
|
344
|
+
return undefined;
|
|
345
|
+
}
|
|
346
|
+
if (typeof value === "string") {
|
|
347
|
+
if (!value.trim()) {
|
|
348
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
349
|
+
}
|
|
350
|
+
return value;
|
|
351
|
+
}
|
|
352
|
+
if (!Array.isArray(value)) {
|
|
353
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
354
|
+
}
|
|
355
|
+
const normalized = value.map((entry, index) => {
|
|
356
|
+
if (typeof entry !== "string" || !entry.trim()) {
|
|
357
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
358
|
+
}
|
|
359
|
+
return entry;
|
|
360
|
+
});
|
|
361
|
+
return normalized;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function optionalCapabilityKey(value: unknown, field: string): BenchmarkCapabilityKey | undefined {
|
|
365
|
+
if (value === undefined || value === null) {
|
|
366
|
+
return undefined;
|
|
367
|
+
}
|
|
368
|
+
if (typeof value !== "string") {
|
|
369
|
+
throw new Error(`${field}: expected capability key string.`);
|
|
370
|
+
}
|
|
371
|
+
if (!BENCHMARK_CAPABILITY_KEYS.includes(value as BenchmarkCapabilityKey)) {
|
|
372
|
+
throw new Error(`${field}: unsupported capability '${value}'.`);
|
|
373
|
+
}
|
|
374
|
+
return value as BenchmarkCapabilityKey;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function optionalInteger(
|
|
378
|
+
value: unknown,
|
|
379
|
+
field: string,
|
|
380
|
+
min: number,
|
|
381
|
+
max?: number
|
|
382
|
+
): number | undefined {
|
|
383
|
+
if (value === undefined || value === null) {
|
|
384
|
+
return undefined;
|
|
385
|
+
}
|
|
386
|
+
if (!Number.isInteger(value)) {
|
|
387
|
+
throw new Error(`${field}: expected integer.`);
|
|
388
|
+
}
|
|
389
|
+
const num = value as number;
|
|
390
|
+
if (num < min) {
|
|
391
|
+
throw new Error(`${field}: must be >= ${min}.`);
|
|
392
|
+
}
|
|
393
|
+
if (typeof max === "number" && num > max) {
|
|
394
|
+
throw new Error(`${field}: must be <= ${max}.`);
|
|
395
|
+
}
|
|
396
|
+
return num;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function optionalFiniteNumber(
|
|
400
|
+
value: unknown,
|
|
401
|
+
field: string,
|
|
402
|
+
min?: number,
|
|
403
|
+
max?: number
|
|
404
|
+
): number | undefined {
|
|
405
|
+
if (value === undefined || value === null) {
|
|
406
|
+
return undefined;
|
|
407
|
+
}
|
|
408
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
409
|
+
throw new Error(`${field}: expected finite number.`);
|
|
410
|
+
}
|
|
411
|
+
if (typeof min === "number" && value < min) {
|
|
412
|
+
throw new Error(`${field}: must be >= ${min}.`);
|
|
413
|
+
}
|
|
414
|
+
if (typeof max === "number" && value > max) {
|
|
415
|
+
throw new Error(`${field}: must be <= ${max}.`);
|
|
416
|
+
}
|
|
417
|
+
return value;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function optionalStopField(value: unknown, field: string): string | string[] | undefined {
|
|
421
|
+
if (value === undefined || value === null) {
|
|
422
|
+
return undefined;
|
|
423
|
+
}
|
|
424
|
+
if (typeof value === "string") {
|
|
425
|
+
const trimmed = value.trim();
|
|
426
|
+
if (!trimmed) {
|
|
427
|
+
throw new Error(`${field}: expected non-empty string.`);
|
|
428
|
+
}
|
|
429
|
+
return trimmed;
|
|
430
|
+
}
|
|
431
|
+
if (!Array.isArray(value)) {
|
|
432
|
+
throw new Error(`${field}: expected string or string array.`);
|
|
433
|
+
}
|
|
434
|
+
if (value.length === 0) {
|
|
435
|
+
throw new Error(`${field}: expected at least one stop sequence.`);
|
|
436
|
+
}
|
|
437
|
+
return value.map((entry, index) => {
|
|
438
|
+
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
439
|
+
throw new Error(`${field}[${index}]: expected non-empty string.`);
|
|
440
|
+
}
|
|
441
|
+
return entry.trim();
|
|
442
|
+
});
|
|
443
|
+
}
|