npm - waypoi - Versions diffs - 0.0.0 - Mend

waypoi 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (260) hide show

package/.github/instructions/ui.instructions.md +42 -0
package/.github/workflows/ci.yml +35 -0
package/.github/workflows/publish.yml +71 -0
package/.github/workflows/release.yml +48 -0
package/.playwright-mcp/console-2026-04-04T01-41-10-746Z.log +2 -0
package/.playwright-mcp/console-2026-04-04T01-41-28-799Z.log +3 -0
package/.playwright-mcp/console-2026-04-05T02-26-51-909Z.log +76 -0
package/.playwright-mcp/page-2026-04-04T01-41-10-816Z.yml +1 -0
package/.playwright-mcp/page-2026-04-04T01-41-29-141Z.yml +77 -0
package/.playwright-mcp/page-2026-04-04T01-41-42-633Z.yml +190 -0
package/.playwright-mcp/page-2026-04-04T01-42-03-929Z.yml +262 -0
package/.playwright-mcp/page-2026-04-04T02-12-54-813Z.yml +6 -0
package/.playwright-mcp/page-2026-04-04T02-14-58-600Z.yml +190 -0
package/.playwright-mcp/page-2026-04-04T02-15-03-923Z.yml +190 -0
package/.playwright-mcp/page-2026-04-04T02-15-07-426Z.yml +190 -0
package/.playwright-mcp/page-2026-04-04T02-15-25-729Z.yml +262 -0
package/.playwright-mcp/page-2026-04-04T02-16-22-984Z.yml +262 -0
package/.playwright-mcp/page-2026-04-04T02-17-00-599Z.yml +190 -0
package/.playwright-mcp/page-2026-04-04T02-17-50-874Z.yml +190 -0
package/.playwright-mcp/page-2026-04-05T02-26-55-570Z.yml +6 -0
package/AGENTS.md +48 -0
package/CHANGELOG.md +131 -0
package/README.md +552 -0
package/assets/agent-mode.png +0 -0
package/assets/categorize.png +0 -0
package/assets/dashboard.png +0 -0
package/assets/endpoint-proxy.png +0 -0
package/assets/icon.png +0 -0
package/assets/mcp-generate-image.png +0 -0
package/assets/mcp-understand-image.png +0 -0
package/assets/peek-token-flow.png +0 -0
package/assets/playground.png +0 -0
package/assets/sankey.png +0 -0
package/cli/index.ts +2805 -0
package/cli/legacyRewrite.ts +108 -0
package/cli/modelRef.ts +24 -0
package/dist/cli/index.js +2536 -0
package/dist/cli/legacyRewrite.js +92 -0
package/dist/cli/modelRef.js +20 -0
package/dist/src/benchmark/artifacts.js +131 -0
package/dist/src/benchmark/capabilityClassifier.js +81 -0
package/dist/src/benchmark/capabilityStore.js +144 -0
package/dist/src/benchmark/config.js +238 -0
package/dist/src/benchmark/gates.js +118 -0
package/dist/src/benchmark/jobs.js +252 -0
package/dist/src/benchmark/runner.js +1847 -0
package/dist/src/benchmark/schema.js +353 -0
package/dist/src/benchmark/suites.js +314 -0
package/dist/src/benchmark/tinyQaDataset.js +422 -0
package/dist/src/benchmark/types.js +25 -0
package/dist/src/config.js +47 -0
package/dist/src/index.js +178 -0
package/dist/src/mcp/client.js +215 -0
package/dist/src/mcp/discovery.js +226 -0
package/dist/src/mcp/policy.js +65 -0
package/dist/src/mcp/registry.js +129 -0
package/dist/src/mcp/service.js +460 -0
package/dist/src/middleware/auth.js +179 -0
package/dist/src/middleware/requestCapture.js +192 -0
package/dist/src/middleware/requestStats.js +118 -0
package/dist/src/pools/builder.js +132 -0
package/dist/src/pools/repository.js +69 -0
package/dist/src/pools/scheduler.js +360 -0
package/dist/src/pools/types.js +2 -0
package/dist/src/protocols/adapters/dashscope.js +267 -0
package/dist/src/protocols/adapters/inferenceV2.js +346 -0
package/dist/src/protocols/adapters/openai.js +27 -0
package/dist/src/protocols/registry.js +99 -0
package/dist/src/protocols/types.js +2 -0
package/dist/src/providers/health.js +153 -0
package/dist/src/providers/importer.js +289 -0
package/dist/src/providers/modelRegistry.js +313 -0
package/dist/src/providers/repository.js +361 -0
package/dist/src/providers/types.js +2 -0
package/dist/src/routes/admin.js +531 -0
package/dist/src/routes/audio.js +295 -0
package/dist/src/routes/chat.js +240 -0
package/dist/src/routes/embeddings.js +157 -0
package/dist/src/routes/images.js +288 -0
package/dist/src/routes/mcp.js +256 -0
package/dist/src/routes/mcpService.js +100 -0
package/dist/src/routes/models.js +48 -0
package/dist/src/routes/responses.js +711 -0
package/dist/src/routes/sessions.js +450 -0
package/dist/src/routes/stats.js +270 -0
package/dist/src/routes/ui.js +97 -0
package/dist/src/routes/videos.js +107 -0
package/dist/src/routing/router.js +338 -0
package/dist/src/services/imageGeneration.js +280 -0
package/dist/src/services/imageUnderstanding.js +352 -0
package/dist/src/services/videoGeneration.js +79 -0
package/dist/src/storage/captureRepository.js +1591 -0
package/dist/src/storage/files.js +157 -0
package/dist/src/storage/imageCache.js +346 -0
package/dist/src/storage/repositories.js +388 -0
package/dist/src/storage/sessionRepository.js +370 -0
package/dist/src/storage/statsRepository.js +204 -0
package/dist/src/transport/httpClient.js +126 -0
package/dist/src/types.js +2 -0
package/dist/src/utils/messageMedia.js +285 -0
package/dist/src/utils/modelCapabilities.js +108 -0
package/dist/src/utils/modelDiscovery.js +170 -0
package/dist/src/version.js +5 -0
package/dist/src/workers/captureRetention.js +25 -0
package/dist/src/workers/configWatcher.js +91 -0
package/dist/src/workers/healthChecker.js +21 -0
package/dist/src/workers/statsRotation.js +41 -0
package/docs/LLM/output_schema.md +312 -0
package/docs/benchmark.md +208 -0
package/docs/mcp-guidelines.md +125 -0
package/docs/mcp-service.md +178 -0
package/docs/opencode.md +86 -0
package/docs/providers.md +79 -0
package/examples/benchmark.config.yaml +28 -0
package/examples/providers/alibaba-dashscope.yaml +88 -0
package/examples/providers/alibaba-llm.yaml +64 -0
package/examples/providers/alibaba-registry.yaml +7 -0
package/examples/providers/inference-v2-ray.yaml +29 -0
package/examples/scenarios/assets/omni-call-sample.wav +0 -0
package/examples/scenarios/custom.jsonl +5 -0
package/examples/scenarios/custom.yaml +40 -0
package/model-form-v2.png +0 -0
package/package.json +66 -0
package/provider-form-v2.png +0 -0
package/provider-form.png +0 -0
package/scripts/manual-test.sh +11 -0
package/scripts/version-from-git.js +23 -0
package/src/benchmark/artifacts.ts +149 -0
package/src/benchmark/capabilityClassifier.ts +99 -0
package/src/benchmark/capabilityStore.ts +174 -0
package/src/benchmark/config.ts +337 -0
package/src/benchmark/gates.ts +164 -0
package/src/benchmark/jobs.ts +312 -0
package/src/benchmark/runner.ts +2519 -0
package/src/benchmark/schema.ts +443 -0
package/src/benchmark/suites.ts +323 -0
package/src/benchmark/tinyQaDataset.ts +428 -0
package/src/benchmark/types.ts +442 -0
package/src/config.ts +44 -0
package/src/index.ts +195 -0
package/src/mcp/client.ts +305 -0
package/src/mcp/discovery.ts +266 -0
package/src/mcp/policy.ts +105 -0
package/src/mcp/registry.ts +164 -0
package/src/mcp/service.ts +611 -0
package/src/middleware/auth.ts +251 -0
package/src/middleware/requestCapture.ts +245 -0
package/src/middleware/requestStats.ts +163 -0
package/src/pools/builder.ts +159 -0
package/src/pools/repository.ts +71 -0
package/src/pools/scheduler.ts +425 -0
package/src/pools/types.ts +117 -0
package/src/protocols/adapters/dashscope.ts +335 -0
package/src/protocols/adapters/inferenceV2.ts +428 -0
package/src/protocols/adapters/openai.ts +32 -0
package/src/protocols/registry.ts +117 -0
package/src/protocols/types.ts +81 -0
package/src/providers/health.ts +207 -0
package/src/providers/importer.ts +402 -0
package/src/providers/modelRegistry.ts +415 -0
package/src/providers/repository.ts +439 -0
package/src/providers/types.ts +113 -0
package/src/routes/admin.ts +666 -0
package/src/routes/audio.ts +372 -0
package/src/routes/chat.ts +301 -0
package/src/routes/embeddings.ts +197 -0
package/src/routes/images.ts +356 -0
package/src/routes/mcp.ts +320 -0
package/src/routes/mcpService.ts +114 -0
package/src/routes/models.ts +50 -0
package/src/routes/responses.ts +872 -0
package/src/routes/sessions.ts +558 -0
package/src/routes/stats.ts +312 -0
package/src/routes/ui.ts +96 -0
package/src/routes/videos.ts +132 -0
package/src/routing/router.ts +501 -0
package/src/services/imageGeneration.ts +396 -0
package/src/services/imageUnderstanding.ts +449 -0
package/src/services/videoGeneration.ts +127 -0
package/src/storage/captureRepository.ts +1835 -0
package/src/storage/files.ts +178 -0
package/src/storage/imageCache.ts +405 -0
package/src/storage/repositories.ts +494 -0
package/src/storage/sessionRepository.ts +419 -0
package/src/storage/statsRepository.ts +238 -0
package/src/transport/httpClient.ts +145 -0
package/src/types.ts +322 -0
package/src/utils/messageMedia.ts +293 -0
package/src/utils/modelCapabilities.ts +161 -0
package/src/utils/modelDiscovery.ts +203 -0
package/src/workers/captureRetention.ts +25 -0
package/src/workers/configWatcher.ts +115 -0
package/src/workers/healthChecker.ts +22 -0
package/src/workers/statsRotation.ts +49 -0
package/tests/benchmarkAdminRoutes.test.ts +82 -0
package/tests/benchmarkBasics.test.ts +116 -0
package/tests/captureAdminRoutes.test.ts +420 -0
package/tests/captureRepository.test.ts +797 -0
package/tests/cliLegacyRewrite.test.ts +45 -0
package/tests/imageGeneration.service.test.ts +107 -0
package/tests/imageUnderstanding.service.test.ts +123 -0
package/tests/mcpPolicy.test.ts +105 -0
package/tests/mcpService.test.ts +1245 -0
package/tests/modelRef.test.ts +23 -0
package/tests/modelsRoutes.test.ts +154 -0
package/tests/sessionMediaCache.test.ts +167 -0
package/tests/statsRoutes.test.ts +323 -0
package/tsconfig.json +15 -0
package/ui/index.html +16 -0
package/ui/package-lock.json +8521 -0
package/ui/package.json +52 -0
package/ui/postcss.config.js +6 -0
package/ui/public/assets/apple-touch-icon.png +0 -0
package/ui/public/assets/favicon-16.png +0 -0
package/ui/public/assets/favicon-32.png +0 -0
package/ui/public/assets/icon-192.png +0 -0
package/ui/public/assets/icon-512.png +0 -0
package/ui/src/App.tsx +27 -0
package/ui/src/api/client.ts +1503 -0
package/ui/src/components/EndpointUsageGuide.tsx +361 -0
package/ui/src/components/Layout.tsx +124 -0
package/ui/src/components/MessageContent.tsx +365 -0
package/ui/src/components/ToolCallMessage.tsx +179 -0
package/ui/src/components/ToolPicker.tsx +442 -0
package/ui/src/components/messageContentParser.test.ts +41 -0
package/ui/src/components/messageContentParser.ts +73 -0
package/ui/src/components/thinkingPreview.test.ts +27 -0
package/ui/src/components/thinkingPreview.ts +15 -0
package/ui/src/components/toMermaidSankey.test.ts +78 -0
package/ui/src/components/toMermaidSankey.ts +56 -0
package/ui/src/components/ui/button.tsx +58 -0
package/ui/src/components/ui/input.tsx +21 -0
package/ui/src/components/ui/textarea.tsx +21 -0
package/ui/src/lib/utils.ts +6 -0
package/ui/src/main.tsx +9 -0
package/ui/src/pages/AgentPlayground.tsx +2010 -0
package/ui/src/pages/Benchmark.tsx +988 -0
package/ui/src/pages/Dashboard.tsx +581 -0
package/ui/src/pages/Peek.tsx +962 -0
package/ui/src/pages/Settings.tsx +2013 -0
package/ui/src/pages/agentPlaygroundPayload.test.ts +109 -0
package/ui/src/pages/agentPlaygroundPayload.ts +97 -0
package/ui/src/pages/agentThinkingContent.test.ts +50 -0
package/ui/src/pages/agentThinkingContent.ts +57 -0
package/ui/src/pages/dashboardTokenUsage.test.ts +66 -0
package/ui/src/pages/dashboardTokenUsage.ts +36 -0
package/ui/src/pages/imageUpload.test.ts +39 -0
package/ui/src/pages/imageUpload.ts +71 -0
package/ui/src/pages/peekFilters.test.ts +29 -0
package/ui/src/pages/peekFilters.ts +13 -0
package/ui/src/pages/peekMedia.test.ts +58 -0
package/ui/src/pages/peekMedia.ts +148 -0
package/ui/src/pages/sessionAutoTitle.test.ts +128 -0
package/ui/src/pages/sessionAutoTitle.ts +106 -0
package/ui/src/stores/settings.ts +58 -0
package/ui/src/styles/globals.css +223 -0
package/ui/src/vite-env.d.ts +8 -0
package/ui/tailwind.config.js +106 -0
package/ui/tsconfig.json +32 -0
package/ui/vite.config.ts +37 -0

package/src/benchmark/config.ts ADDED Viewed

@@ -0,0 +1,337 @@
+import { promises as fs } from "fs";
+import path from "path";
+import YAML from "yaml";
+import { StoragePaths } from "../storage/files";
+import {
+  BenchmarkCliOptions,
+  BenchmarkConfigFile,
+  BenchmarkDefaults,
+  BenchmarkExecutionMode,
+  BenchmarkGateConfig,
+  BenchmarkProfileSettings,
+  EffectiveBenchmarkConfig,
+} from "./types";
+const DEFAULT_VERSION = 1;
+const DEFAULT_CAP_TTL_DAYS = 7;
+const DEFAULTS: BenchmarkDefaults = {
+  requestTimeoutMs: 120000,
+  toolTimeoutMs: 15000,
+  maxIterations: 6,
+  temperature: 0,
+  top_p: 1,
+  max_tokens: 512,
+  presence_penalty: 0,
+  frequency_penalty: 0,
+};
+const DEFAULT_PROFILES: Record<string, BenchmarkProfileSettings> = {
+  local: {
+    warmupRuns: 1,
+    measuredRuns: 3,
+    minScenarioPassRate: 1.0,
+  },
+  ci: {
+    warmupRuns: 2,
+    measuredRuns: 5,
+    minScenarioPassRate: 1.0,
+  },
+};
+const DEFAULT_GATES: BenchmarkGateConfig = {
+  hard: {
+    smokeMinSuccessRate: 1.0,
+  },
+  soft: {
+    maxP95RegressionPct: 20,
+    maxThroughputDropPct: 20,
+  },
+};
+export async function resolveBenchmarkConfig(
+  paths: StoragePaths,
+  cli: BenchmarkCliOptions
+): Promise<EffectiveBenchmarkConfig> {
+  const { fileConfig, configSource } = await loadConfigFile(paths, cli.configPath);
+  const mergedDefaults: BenchmarkDefaults = {
+    ...DEFAULTS,
+    ...(fileConfig?.defaults ?? {}),
+  };
+  const mergedProfiles: Record<string, BenchmarkProfileSettings> = {
+    ...DEFAULT_PROFILES,
+  };
+  for (const [profileName, profilePatch] of Object.entries(fileConfig?.profiles ?? {})) {
+    mergedProfiles[profileName] = {
+      ...(mergedProfiles[profileName] ?? DEFAULT_PROFILES.local),
+      ...profilePatch,
+    };
+  }
+  const selectedProfile =
+    cli.profile ?? fileConfig?.run?.profile ?? "local";
+  const profileSettings = mergedProfiles[selectedProfile];
+  if (!profileSettings) {
+    const names = Object.keys(mergedProfiles).sort().join(", ");
+    throw new Error(
+      `Unknown benchmark profile '${selectedProfile}'. Available profiles: ${names}`
+    );
+  }
+  const mergedGates: BenchmarkGateConfig = {
+    hard: {
+      ...DEFAULT_GATES.hard,
+      ...(fileConfig?.gates?.hard ?? {}),
+    },
+    soft: {
+      ...DEFAULT_GATES.soft,
+      ...(fileConfig?.gates?.soft ?? {}),
+    },
+  };
+  const resolved: EffectiveBenchmarkConfig = {
+    version: fileConfig?.version ?? DEFAULT_VERSION,
+    profile: selectedProfile,
+    defaults: validateDefaults(mergedDefaults),
+    profileSettings: validateProfileSettings(profileSettings, selectedProfile),
+    gates: validateGates(mergedGates),
+    run: {
+      suite: cli.suite ?? fileConfig?.run?.suite ?? "showcase",
+      exampleId: cli.exampleId ?? fileConfig?.run?.exampleId,
+      scenarioPath: cli.scenarioPath ?? fileConfig?.run?.scenarioPath,
+      modelOverride: cli.modelOverride ?? fileConfig?.run?.model,
+      outPath: cli.outPath ?? fileConfig?.run?.outPath,
+      baselinePath: cli.baselinePath ?? fileConfig?.run?.baselinePath,
+      executionMode: resolveExecutionMode(cli, fileConfig),
+      listExamples: cli.listExamples ?? fileConfig?.run?.listExamples ?? false,
+      updateCapCache: cli.updateCapCache ?? fileConfig?.run?.updateCapCache ?? false,
+      capTtlDays: intField(
+        cli.capTtlDays ?? fileConfig?.run?.capTtlDays ?? DEFAULT_CAP_TTL_DAYS,
+        "run.capTtlDays",
+        1
+      ),
+      temperature: optionalNumberField(
+        cli.temperature ?? fileConfig?.run?.temperature,
+        "run.temperature"
+      ),
+      top_p: optionalBoundedField(
+        cli.top_p ?? fileConfig?.run?.top_p,
+        "run.top_p",
+        0,
+        1
+      ),
+      max_tokens: optionalIntField(
+        cli.max_tokens ?? fileConfig?.run?.max_tokens,
+        "run.max_tokens",
+        1
+      ),
+      presence_penalty: optionalBoundedField(
+        cli.presence_penalty ?? fileConfig?.run?.presence_penalty,
+        "run.presence_penalty",
+        -2,
+        2
+      ),
+      frequency_penalty: optionalBoundedField(
+        cli.frequency_penalty ?? fileConfig?.run?.frequency_penalty,
+        "run.frequency_penalty",
+        -2,
+        2
+      ),
+      seed: optionalIntField(cli.seed ?? fileConfig?.run?.seed, "run.seed", 0),
+      stop: optionalStopField(cli.stop ?? fileConfig?.run?.stop, "run.stop"),
+    },
+    configSource,
+  };
+  return resolved;
+}
+async function loadConfigFile(
+  paths: StoragePaths,
+  explicitPath?: string
+): Promise<{ fileConfig?: BenchmarkConfigFile; configSource?: string }> {
+  const candidatePath = explicitPath
+    ? path.resolve(explicitPath)
+    : path.join(paths.baseDir, "benchmark.config.yaml");
+  try {
+    const raw = await fs.readFile(candidatePath, "utf8");
+    const parsed = parseConfigDocument(candidatePath, raw);
+    return { fileConfig: parsed, configSource: candidatePath };
+  } catch (error) {
+    const code = (error as NodeJS.ErrnoException).code;
+    if (code === "ENOENT") {
+      if (explicitPath) {
+        throw new Error(`Benchmark config not found: ${candidatePath}`);
+      }
+      return {};
+    }
+    throw error;
+  }
+}
+function parseConfigDocument(filePath: string, raw: string): BenchmarkConfigFile {
+  const ext = path.extname(filePath).toLowerCase();
+  try {
+    if (ext === ".json") {
+      return JSON.parse(raw) as BenchmarkConfigFile;
+    }
+    return YAML.parse(raw) as BenchmarkConfigFile;
+  } catch (error) {
+    throw new Error(
+      `Failed to parse benchmark config ${filePath}: ${(error as Error).message}`
+    );
+  }
+}
+function validateDefaults(defaults: BenchmarkDefaults): BenchmarkDefaults {
+  return {
+    requestTimeoutMs: intField(defaults.requestTimeoutMs, "defaults.requestTimeoutMs", 1),
+    toolTimeoutMs: intField(defaults.toolTimeoutMs, "defaults.toolTimeoutMs", 1),
+    maxIterations: intField(defaults.maxIterations, "defaults.maxIterations", 1),
+    temperature: numberField(defaults.temperature, "defaults.temperature"),
+    top_p: boundedField(defaults.top_p, "defaults.top_p", 0, 1),
+    max_tokens: intField(defaults.max_tokens, "defaults.max_tokens", 1),
+    presence_penalty: boundedField(
+      defaults.presence_penalty,
+      "defaults.presence_penalty",
+      -2,
+      2
+    ),
+    frequency_penalty: boundedField(
+      defaults.frequency_penalty,
+      "defaults.frequency_penalty",
+      -2,
+      2
+    ),
+    seed: optionalIntField(defaults.seed, "defaults.seed", 0),
+    stop: optionalStopField(defaults.stop, "defaults.stop"),
+  };
+}
+function resolveExecutionMode(
+  cli: BenchmarkCliOptions,
+  fileConfig?: BenchmarkConfigFile
+): BenchmarkExecutionMode {
+  const explicit = cli.executionMode ?? fileConfig?.run?.executionMode;
+  if (explicit === "showcase" || explicit === "diagnostic") {
+    return explicit;
+  }
+  const suite = cli.suite ?? fileConfig?.run?.suite ?? "showcase";
+  return suite === "showcase" ? "showcase" : "diagnostic";
+}
+function validateProfileSettings(
+  profile: BenchmarkProfileSettings,
+  profileName: string
+): BenchmarkProfileSettings {
+  return {
+    warmupRuns: intField(profile.warmupRuns, `profiles.${profileName}.warmupRuns`, 0),
+    measuredRuns: intField(profile.measuredRuns, `profiles.${profileName}.measuredRuns`, 1),
+    minScenarioPassRate: boundedField(
+      profile.minScenarioPassRate,
+      `profiles.${profileName}.minScenarioPassRate`,
+      0,
+      1
+    ),
+  };
+}
+function validateGates(gates: BenchmarkGateConfig): BenchmarkGateConfig {
+  return {
+    hard: {
+      smokeMinSuccessRate: boundedField(
+        gates.hard.smokeMinSuccessRate,
+        "gates.hard.smokeMinSuccessRate",
+        0,
+        1
+      ),
+    },
+    soft: {
+      maxP95RegressionPct: numberField(
+        gates.soft.maxP95RegressionPct,
+        "gates.soft.maxP95RegressionPct",
+        0
+      ),
+      maxThroughputDropPct: numberField(
+        gates.soft.maxThroughputDropPct,
+        "gates.soft.maxThroughputDropPct",
+        0
+      ),
+    },
+  };
+}
+function intField(value: number, field: string, min: number): number {
+  if (!Number.isInteger(value) || value < min) {
+    throw new Error(`${field} must be an integer >= ${min}`);
+  }
+  return value;
+}
+function numberField(value: number, field: string, min?: number): number {
+  if (!Number.isFinite(value)) {
+    throw new Error(`${field} must be a finite number`);
+  }
+  if (typeof min === "number" && value < min) {
+    throw new Error(`${field} must be >= ${min}`);
+  }
+  return value;
+}
+function boundedField(value: number, field: string, min: number, max: number): number {
+  if (!Number.isFinite(value) || value < min || value > max) {
+    throw new Error(`${field} must be between ${min} and ${max}`);
+  }
+  return value;
+}
+function optionalIntField(value: number | undefined, field: string, min: number): number | undefined {
+  if (value === undefined) return undefined;
+  return intField(value, field, min);
+}
+function optionalNumberField(value: number | undefined, field: string, min?: number): number | undefined {
+  if (value === undefined) return undefined;
+  return numberField(value, field, min);
+}
+function optionalBoundedField(
+  value: number | undefined,
+  field: string,
+  min: number,
+  max: number
+): number | undefined {
+  if (value === undefined) return undefined;
+  return boundedField(value, field, min, max);
+}
+function optionalStopField(value: string | string[] | undefined, field: string): string | string[] | undefined {
+  if (value === undefined) return undefined;
+  if (typeof value === "string") {
+    const trimmed = value.trim();
+    if (trimmed.length === 0) {
+      throw new Error(`${field} must not be empty`);
+    }
+    return trimmed;
+  }
+  if (Array.isArray(value)) {
+    if (value.length === 0) {
+      throw new Error(`${field} must include at least one stop sequence`);
+    }
+    const normalized = value.map((item, index) => {
+      if (typeof item !== "string") {
+        throw new Error(`${field}[${index}] must be a string`);
+      }
+      const trimmed = item.trim();
+      if (trimmed.length === 0) {
+        throw new Error(`${field}[${index}] must not be empty`);
+      }
+      return trimmed;
+    });
+    return normalized;
+  }
+  throw new Error(`${field} must be a string or string[]`);
+}

package/src/benchmark/gates.ts ADDED Viewed

@@ -0,0 +1,164 @@
+import { promises as fs } from "fs";
+import {
+  BenchmarkGateResults,
+  BenchmarkReport,
+  EffectiveBenchmarkConfig,
+  ScenarioResult,
+} from "./types";
+export async function evaluateGates(
+  report: Omit<BenchmarkReport, "gateResults">,
+  effective: EffectiveBenchmarkConfig
+): Promise<BenchmarkGateResults> {
+  const hardMessages: string[] = [];
+  const softMessages: string[] = [];
+  if (effective.run.suite === "smoke") {
+    const min = effective.gates.hard.smokeMinSuccessRate;
+    if (report.executed > 0 && report.successRate < min) {
+      hardMessages.push(
+        `Smoke suite success rate ${toPct(report.successRate)} is below required ${toPct(min)}.`
+      );
+    }
+  }
+  const minScenarioPassRate = effective.profileSettings.minScenarioPassRate;
+  const failingScenarios = report.results.filter(
+    (scenario) => scenario.status !== "skipped" && scenario.passRate < minScenarioPassRate
+  );
+  for (const scenario of failingScenarios) {
+    hardMessages.push(
+      `Scenario '${scenario.id}' pass rate ${toPct(scenario.passRate)} is below required ${toPct(minScenarioPassRate)}.`
+    );
+  }
+  if (effective.run.baselinePath) {
+    const baseline = await loadBaseline(effective.run.baselinePath);
+    const baselineById = new Map(
+      baseline.results
+        .map((scenario) => normalizeBaselineScenario(scenario))
+        .filter((scenario): scenario is BaselineScenario => scenario !== null)
+        .map((scenario) => [scenario.id, scenario])
+    );
+    for (const current of report.results) {
+      if (current.status === "skipped") {
+        continue;
+      }
+      const ref = baselineById.get(current.id);
+      if (!ref) {
+        continue;
+      }
+      const maxP95 = effective.gates.soft.maxP95RegressionPct;
+      if (ref.p95LatencyMs > 0) {
+        const p95Threshold = ref.p95LatencyMs * (1 + maxP95 / 100);
+        if (current.p95LatencyMs > p95Threshold) {
+          softMessages.push(
+            `Scenario '${current.id}' p95 latency regressed ${pctDelta(current.p95LatencyMs, ref.p95LatencyMs)} (${current.p95LatencyMs}ms vs baseline ${ref.p95LatencyMs}ms).`
+          );
+        }
+      }
+      const maxThroughputDrop = effective.gates.soft.maxThroughputDropPct;
+      if (ref.avgThroughputTokensPerSec > 0) {
+        const throughputThreshold = ref.avgThroughputTokensPerSec * (1 - maxThroughputDrop / 100);
+        if (current.avgThroughputTokensPerSec < throughputThreshold) {
+          softMessages.push(
+            `Scenario '${current.id}' throughput dropped ${throughputDropPct(current.avgThroughputTokensPerSec, ref.avgThroughputTokensPerSec)} (${current.avgThroughputTokensPerSec.toFixed(2)} t/s vs baseline ${ref.avgThroughputTokensPerSec.toFixed(2)} t/s).`
+          );
+        }
+      }
+    }
+  }
+  return {
+    hard: {
+      passed: hardMessages.length === 0,
+      messages: hardMessages,
+    },
+    soft: {
+      passed: softMessages.length === 0,
+      messages: softMessages,
+    },
+  };
+}
+interface BaselineScenario {
+  id: string;
+  p95LatencyMs: number;
+  avgThroughputTokensPerSec: number;
+}
+async function loadBaseline(pathLike: string): Promise<{ results: unknown[] }> {
+  let raw: string;
+  try {
+    raw = await fs.readFile(pathLike, "utf8");
+  } catch (error) {
+    throw new Error(
+      `Failed to read baseline file '${pathLike}': ${(error as Error).message}`
+    );
+  }
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (error) {
+    throw new Error(
+      `Failed to parse baseline file '${pathLike}' as JSON: ${(error as Error).message}`
+    );
+  }
+  if (!parsed || typeof parsed !== "object" || !Array.isArray((parsed as { results?: unknown[] }).results)) {
+    throw new Error(
+      `Baseline file '${pathLike}' must contain a top-level 'results' array.`
+    );
+  }
+  return parsed as { results: unknown[] };
+}
+function normalizeBaselineScenario(raw: unknown): BaselineScenario | null {
+  if (!raw || typeof raw !== "object") {
+    return null;
+  }
+  const scenario = raw as Partial<ScenarioResult>;
+  if (typeof scenario.id !== "string") {
+    return null;
+  }
+  const p95LatencyMs =
+    typeof scenario.p95LatencyMs === "number" && Number.isFinite(scenario.p95LatencyMs)
+      ? scenario.p95LatencyMs
+      : 0;
+  const avgThroughputTokensPerSec =
+    typeof scenario.avgThroughputTokensPerSec === "number" &&
+    Number.isFinite(scenario.avgThroughputTokensPerSec)
+      ? scenario.avgThroughputTokensPerSec
+      : 0;
+  return {
+    id: scenario.id,
+    p95LatencyMs,
+    avgThroughputTokensPerSec,
+  };
+}
+function toPct(value: number): string {
+  return `${(value * 100).toFixed(1)}%`;
+}
+function pctDelta(current: number, baseline: number): string {
+  if (baseline === 0) {
+    return "n/a";
+  }
+  const delta = ((current - baseline) / baseline) * 100;
+  const sign = delta >= 0 ? "+" : "";
+  return `${sign}${delta.toFixed(1)}%`;
+}
+function throughputDropPct(current: number, baseline: number): string {
+  if (baseline === 0) {
+    return "n/a";
+  }
+  const delta = ((baseline - current) / baseline) * 100;
+  return `${delta.toFixed(1)}%`;
+}