vieval 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +53 -52
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +258 -258
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +64 -64
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.mjs +1 -1
- package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +207 -195
- package/dist/index.mjs +147 -147
- package/dist/index.mjs.map +1 -1
- package/dist/models-CaCOUPZw.mjs.map +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +359 -359
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +3 -3
- package/dist/cli-CHFCF8UR.mjs.map +0 -1
- package/dist/env-BVYeJhGA.mjs.map +0 -1
- package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
- package/dist/registry-BSyjwZFx.mjs.map +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry-BK7k6X81.mjs","names":["loadEnv","loadViteEnv"],"sources":["../src/cli/config.ts","../src/core/telemetry/noop.ts","../src/core/telemetry/otel.ts","../src/dsl/registry.ts"],"sourcesContent":["import type { CliReportingConfig, ConfigHookPlugin, MatrixDefinition, MatrixLayer, TaskRunContext } from '../config'\nimport type { ModelDefinition } from '../config/models'\nimport type { RunResult, TaskExecutionContext } from '../core/runner'\nimport type { InferenceExecutor, ScheduledTask } from '../core/runner/schedule'\nimport type { VievalVitestCompatReporterReference } from './reporters/vitest-compat-reporter'\n\nimport process from 'node:process'\n\nimport { access, readFile } from 'node:fs/promises'\nimport { createRequire } from 'node:module'\nimport { dirname, extname, isAbsolute, join, resolve } from 'node:path'\nimport { pathToFileURL } from 'node:url'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { createDefineConfig, loadConfig } from 'c12'\nimport { loadEnv as loadViteEnv } from 'vite'\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\nconst require = createRequire(import.meta.url)\n\n/**\n * Benchmark identity and shared cache namespace.\n */\nexport interface CliComparisonBenchmarkConfig {\n /**\n * Benchmark identifier used in report artifacts.\n */\n id: string\n /**\n * Shared cache namespace reused across method runs.\n */\n sharedCaseNamespace: string\n}\n\n/**\n * One comparison entry loaded by `vieval compare`.\n */\nexport interface CliComparisonConfig {\n /**\n * Benchmark metadata for reporting and shared cache coordination.\n */\n benchmark: CliComparisonBenchmarkConfig\n /**\n * Optional workspace exclude glob(s), also relative to config directory.\n */\n excludesWorkspaces?: string | string[]\n /**\n * Comparison id selected by `--comparison`.\n */\n id: string\n /**\n * Optional workspace glob(s) discovered relative to config directory.\n */\n includesWorkspaces?: string | string[]\n /**\n * Optional explicit method list.\n */\n methods?: CliComparisonMethodConfig[]\n}\n\n/**\n * One explicit comparison method descriptor.\n */\nexport interface CliComparisonMethodConfig {\n /**\n * Optional explicit config file path for this workspace.\n */\n configFilePath?: string\n /**\n * Method identifier shown in compare reports.\n */\n id: string\n /**\n * Project name to execute inside workspace config.\n */\n project: string\n /**\n * Workspace path containing this method's `vieval.config.*`.\n */\n workspace: string\n}\n\n/**\n * Comparison mode config for `vieval compare`.\n */\nexport interface CliComparisonModeConfig extends CliConfigBase {\n comparisons: CliComparisonConfig[]\n projects?: never\n workspaces?: never\n}\n\n/**\n * Concurrency limits that can be declared in CLI-facing config.\n *\n * Use when:\n * - the CLI needs independent caps for workspace, project, task, attempt, or case scheduling scopes\n * - config authors want to define concurrency without wiring runtime execution yet\n *\n * Expects:\n * - each provided value to be a positive integer chosen by the caller\n *\n * Returns:\n * - one partial concurrency descriptor keyed by scheduling scope\n */\nexport interface CliConcurrencyConfig {\n /**\n * Attempt-level concurrency cap.\n */\n attempt?: number\n /**\n * Case-level concurrency cap.\n */\n case?: number\n /**\n * Project-level concurrency cap.\n */\n project?: number\n /**\n * Task-level concurrency cap.\n */\n task?: number\n /**\n * Workspace-level concurrency cap.\n */\n workspace?: number\n}\n\n/**\n * Top-level CLI config loaded from `vieval.config.*`.\n *\n * Exactly one top-level mode is allowed:\n * - `projects`\n * - `workspaces`\n * - `comparisons`\n */\nexport type CliConfig = CliComparisonModeConfig | CliProjectModeConfig | CliWorkspaceModeConfig\n\nexport type CliConfigMode = 'comparisons' | 'projects' | 'workspaces'\n\n/**\n * CLI plugin shape bound to the full CLI config object.\n */\nexport type CliConfigPlugin = ConfigHookPlugin<CliConfig>\n\n/**\n * Defines one project block for `vieval run`.\n */\nexport interface CliProjectConfig {\n /**\n * Optional project-scoped concurrency overrides.\n *\n * @default inherited from top-level or CLI execution settings\n */\n concurrency?: Omit<CliConcurrencyConfig, 'workspace'>\n /**\n * Optional eval-time matrix dimensions.\n */\n evalMatrix?: MatrixDefinition | MatrixLayer\n /**\n * Glob patterns excluded from discovery.\n *\n * @default Common exclusion globs for dependencies, build output, and VCS directories.\n */\n exclude?: string[]\n /**\n * Optional task executor.\n *\n * Use when this project should execute live inferenceExecutor requests.\n * If omitted, `vieval run` performs collection + scheduling only.\n */\n executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>\n /**\n * Glob patterns for eval file discovery.\n *\n * @default Common eval file globs for TypeScript and JavaScript module formats.\n */\n include?: string[]\n /**\n * Providers expanded by scheduler.\n *\n * @default [{ id: 'default' }]\n */\n inferenceExecutors?: InferenceExecutor[]\n /**\n * Model definitions available to project runtime execution.\n *\n * Inference executors control schedule fan-out, while models provide\n * runtime lookup metadata for model plugin helpers during task execution.\n *\n * @default inherited from top-level config models\n */\n models?: ModelDefinition[]\n /**\n * Project label used in summary output.\n */\n name: string\n /**\n * Optional project-local plugins.\n */\n plugins?: CliConfigPlugin[]\n /**\n * Optional vitest-compatible reporter modules.\n *\n * Use when:\n * - project runs should emit additional reporter callbacks using Vitest-style lifecycle names\n *\n * @default []\n */\n reporters?: VievalVitestCompatReporterReference[]\n /**\n * Project root used for include/exclude glob matching.\n *\n * @default process cwd\n */\n root?: string\n /**\n * Optional run-time matrix dimensions.\n */\n runMatrix?: MatrixDefinition | MatrixLayer\n}\n\n/**\n * Execution context exposed to project-level `executor` implementations.\n *\n * Use when:\n * - a project executor needs task-scoped models plus case reporter hooks\n * - custom scheduling logic wants the same hook shape as `TaskRunContext`\n *\n * Expects:\n * - `models` exposes configured model registrations for plugin helpers\n * - `reporterHooks` follows `TaskRunContext['reporterHooks']`\n * - `telemetry` follows `TaskRunContext['telemetry']`\n * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`\n */\nexport interface CliProjectExecutorContext extends TaskExecutionContext {\n reporterHooks?: TaskRunContext['reporterHooks']\n runtimeConcurrency?: TaskRunContext['runtimeConcurrency']\n telemetry?: TaskRunContext['telemetry']\n}\n\n/**\n * Project mode config for `vieval run`.\n */\nexport interface CliProjectModeConfig extends CliConfigBase {\n comparisons?: never\n /**\n * Project list expanded by `vieval run`.\n *\n * @default [{ name: 'default' }]\n */\n projects?: CliProjectConfig[]\n workspaces?: never\n}\n\n/**\n * One workspace descriptor for workspace-mode configs.\n */\nexport interface CliWorkspaceConfig {\n /**\n * Workspace identifier.\n */\n id: string\n /**\n * Workspace root path.\n */\n root: string\n}\n\n/**\n * Workspace mode config placeholder for future workspace orchestration.\n */\nexport interface CliWorkspaceModeConfig extends CliConfigBase {\n comparisons?: never\n projects?: never\n workspaces: CliWorkspaceConfig[]\n}\n\n/**\n * Result of loading and normalizing a config file.\n */\nexport interface LoadedCliConfig {\n concurrency?: CliConcurrencyConfig\n configFilePath: null | string\n env: NodeJS.ProcessEnv\n projects: NormalizedCliProjectConfig[]\n reporting?: CliReportingConfig\n}\n\nexport interface LoadedRawCliConfig {\n config: CliConfig | null\n configFilePath: null | string\n}\n\n/**\n * Runtime options for config loading.\n */\nexport interface LoadVievalCliConfigOptions {\n /**\n * Explicit config file path.\n */\n configFilePath?: string\n /**\n * Starting directory for config lookup.\n *\n * @default process.cwd()\n */\n cwd?: string\n}\n\n/**\n * Normalized CLI project used by runtime orchestration.\n */\nexport interface NormalizedCliProjectConfig {\n concurrency?: Omit<CliConcurrencyConfig, 'workspace'>\n evalMatrix?: MatrixLayer\n exclude: string[]\n executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>\n include: string[]\n inferenceExecutors: InferenceExecutor[]\n models: ModelDefinition[]\n name: string\n reporters: VievalVitestCompatReporterReference[]\n root: string\n runMatrix?: MatrixLayer\n}\n\n/**\n * Top-level CLI config loaded from `vieval.config.*`.\n */\ninterface CliConfigBase {\n /**\n * Global concurrency defaults inherited by projects and tasks.\n *\n * Use when:\n * - config authors want one shared concurrency policy across workspace, project, task, attempt, and case scopes\n * - project-local overrides should start from a top-level baseline\n *\n * Expects:\n * - each provided value to be a positive integer chosen by the caller\n *\n * @default undefined\n */\n concurrency?: CliConcurrencyConfig\n /**\n * Environment variables injected into `process.env` during `vieval run`.\n *\n * Use when:\n * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)\n * - config wants deterministic env values without shell-level exports\n *\n * @default {}\n */\n env?: NodeJS.ProcessEnv\n /**\n * Global model definitions inherited by projects.\n *\n * @default []\n */\n models?: ModelDefinition[]\n /**\n * Global config plugins.\n *\n * @default []\n */\n plugins?: CliConfigPlugin[]\n /**\n * Global vitest-compatible reporter modules inherited by projects.\n *\n * @default []\n */\n reporters?: VievalVitestCompatReporterReference[]\n /**\n * Optional reporting integrations shared by CLI run orchestration.\n *\n * @default undefined\n */\n reporting?: CliReportingConfig\n}\n\n/**\n * Helper used by `vieval.config.*` for better type inference.\n */\nexport const defineConfig = createDefineConfig<CliConfig>()\n\n/**\n * Detects which top-level config mode is active.\n *\n * Expects:\n * - exactly one of `projects`, `workspaces`, or `comparisons`\n *\n * Returns:\n * - active top-level mode key\n */\nexport function detectCliConfigMode(config: CliConfig): CliConfigMode {\n const declaredModes: CliConfigMode[] = []\n if (config.projects != null) {\n declaredModes.push('projects')\n }\n if (config.workspaces != null) {\n declaredModes.push('workspaces')\n }\n if (config.comparisons != null) {\n declaredModes.push('comparisons')\n }\n\n if (declaredModes.length > 1) {\n throw new Error(`Invalid vieval config: top-level keys are mutually exclusive. Found ${declaredModes.join(', ')}.`)\n }\n\n return declaredModes[0] ?? 'projects'\n}\n\n/**\n * Loads `.env*` files using Vite's env resolution behavior.\n *\n * Use when:\n * - `vieval.config.*` should mirror Vitest/Vite env loading semantics\n * - config wants to populate top-level `env` via file-based values\n *\n * Expects:\n * - `mode` to match the env file suffix (`.env.<mode>`)\n * - `envDir` to point at the directory containing `.env` files\n *\n * Returns:\n * - Key/value map compatible with `CliConfig['env']`\n */\nexport function loadEnv(mode: string, envDir: string, prefixes: string | string[] = ''): NodeJS.ProcessEnv {\n return loadViteEnv(mode, envDir, prefixes)\n}\n\n/**\n * Loads nearest `vieval.config.*` without project normalization.\n */\nexport async function loadRawVievalConfig(options: LoadVievalCliConfigOptions = {}): Promise<LoadedRawCliConfig> {\n const cwd = options.cwd ?? process.cwd()\n\n try {\n const loadedConfig = await resolveVievalConfig(cwd, options.configFilePath)\n if (loadedConfig.configFilePath == null || loadedConfig.config == null) {\n return {\n config: null,\n configFilePath: null,\n }\n }\n\n const config = await applyVievalPlugins(loadedConfig.config)\n detectCliConfigMode(config)\n\n return {\n config,\n configFilePath: loadedConfig.configFilePath,\n }\n }\n catch (error) {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown config loading error.'\n const configFilePath = options.configFilePath == null\n ? 'vieval.config'\n : (isAbsolute(options.configFilePath) ? options.configFilePath : resolve(cwd, options.configFilePath))\n throw new Error(`Failed to load vieval config \"${configFilePath}\": ${errorMessage}`, { cause: error })\n }\n}\n\n/**\n * Loads nearest `vieval.config.*` and returns normalized project definitions.\n *\n * Call stack:\n *\n * {@link loadVievalCliConfig}\n * -> {@link resolveVievalConfig}\n * -> {@link normalizeConfig}\n * -> {@link NormalizedCliProjectConfig}[]\n *\n * Use when:\n * - CLI orchestration needs project includes/excludes similar to Vitest\n * - callers want config auto-discovery without manual imports in eval files\n */\nexport async function loadVievalCliConfig(options: LoadVievalCliConfigOptions = {}): Promise<LoadedCliConfig> {\n const cwd = options.cwd ?? process.cwd()\n try {\n const loadedConfig = await loadRawVievalConfig(options)\n if (loadedConfig.configFilePath == null || loadedConfig.config == null) {\n return {\n concurrency: undefined,\n configFilePath: null,\n env: {},\n projects: await normalizeConfig(null, cwd),\n reporting: undefined,\n }\n }\n\n const config = loadedConfig.config\n\n return {\n concurrency: config.concurrency,\n configFilePath: loadedConfig.configFilePath,\n env: config.env ?? {},\n projects: await normalizeConfig(config, dirname(loadedConfig.configFilePath)),\n reporting: normalizeReportingConfig(config.reporting),\n }\n }\n catch (error) {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown config loading error.'\n const configFilePath = options.configFilePath == null\n ? 'vieval.config'\n : (isAbsolute(options.configFilePath) ? options.configFilePath : resolve(cwd, options.configFilePath))\n throw new Error(`Failed to load vieval config \"${configFilePath}\": ${errorMessage}`, { cause: error })\n }\n}\n\nasync function applyProjectPlugins(\n config: CliConfig | null | undefined,\n project: CliProjectConfig,\n normalizedProject: NormalizedCliProjectConfig,\n cwd: string,\n): Promise<NormalizedCliProjectConfig> {\n if (project.plugins == null || project.plugins.length === 0) {\n return normalizedProject\n }\n\n const scopedConfig = createProjectScopedConfig(config, project, normalizedProject)\n const resolvedConfig = await applyVievalPlugins(scopedConfig) as CliProjectModeConfig\n const scopedProject = scopedConfig.projects?.[0]\n if (scopedProject == null) {\n throw new Error('Project-local plugin normalization requires one scoped project.')\n }\n const resolvedProject = resolvedConfig.projects?.[0] ?? scopedProject\n\n return normalizeProjectConfig(\n {\n ...resolvedProject,\n concurrency: resolvedProject.concurrency === scopedProject.concurrency\n ? toProjectConcurrencyDefaults(resolvedConfig.concurrency)\n : resolvedProject.concurrency,\n models: resolvedProject.models === scopedProject.models\n ? resolvedConfig.models\n : resolvedProject.models,\n reporters: resolvedProject.reporters === scopedProject.reporters\n ? resolvedConfig.reporters\n : resolvedProject.reporters,\n },\n cwd,\n undefined,\n resolvedConfig.models ?? [],\n resolvedConfig.reporters ?? [],\n )\n}\n\nasync function applyVievalPlugins(config: CliConfig): Promise<CliConfig> {\n let currentConfig: CliConfig = config\n const plugins = currentConfig.plugins ?? []\n\n for (const plugin of plugins) {\n if (plugin.configVieval == null) {\n continue\n }\n\n const nextConfig = await plugin.configVieval(currentConfig)\n if (nextConfig != null) {\n currentConfig = {\n ...currentConfig,\n ...nextConfig,\n } as CliConfig\n }\n }\n\n for (const plugin of plugins) {\n await plugin.configVievalResolved?.(currentConfig)\n }\n\n return currentConfig\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: MatrixDefinition | MatrixLayer): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction createProjectScopedConfig(\n config: CliConfig | null | undefined,\n project: CliProjectConfig,\n normalizedProject: NormalizedCliProjectConfig,\n): CliProjectModeConfig {\n const concurrency = config?.concurrency == null && normalizedProject.concurrency == null\n ? undefined\n : {\n ...normalizedProject.concurrency,\n workspace: config?.concurrency?.workspace,\n }\n\n return {\n concurrency,\n env: config?.env,\n models: normalizedProject.models,\n plugins: project.plugins,\n projects: [\n {\n concurrency: normalizedProject.concurrency,\n evalMatrix: normalizedProject.evalMatrix,\n exclude: normalizedProject.exclude,\n executor: normalizedProject.executor,\n include: normalizedProject.include,\n inferenceExecutors: normalizedProject.inferenceExecutors,\n models: normalizedProject.models,\n name: normalizedProject.name,\n plugins: project.plugins,\n reporters: normalizedProject.reporters,\n root: normalizedProject.root,\n runMatrix: normalizedProject.runMatrix,\n },\n ],\n reporters: normalizedProject.reporters,\n reporting: config?.reporting,\n }\n}\n\nasync function findNearestConfigFile(startDirectory: string): Promise<null | string> {\n const supportedFileNames = [\n 'vieval.config.ts',\n 'vieval.config.mts',\n 'vieval.config.cts',\n 'vieval.config.js',\n 'vieval.config.mjs',\n 'vieval.config.cjs',\n 'vieval.config.json',\n ]\n\n let currentDirectory = resolve(startDirectory)\n\n while (true) {\n for (const fileName of supportedFileNames) {\n const candidatePath = join(currentDirectory, fileName)\n if (await isReadableFile(candidatePath)) {\n return candidatePath\n }\n }\n\n const parentDirectory = dirname(currentDirectory)\n if (parentDirectory === currentDirectory) {\n return null\n }\n currentDirectory = parentDirectory\n }\n}\n\nasync function importVievalConfigModule(filePath: string): Promise<unknown> {\n const extension = extname(filePath)\n\n if (isConfigFileExtensionUsingJsonParse(extension)) {\n const raw = await readFile(filePath, 'utf-8')\n return JSON.parse(raw) as unknown\n }\n\n if (isConfigFileExtensionUsingRequire(extension)) {\n return require(filePath) as unknown\n }\n\n return import(pathToFileURL(filePath).href)\n}\n\nfunction isConfigFileExtensionUsingJsonParse(extension: string): boolean {\n return extension === '.json'\n}\n\nfunction isConfigFileExtensionUsingRequire(extension: string): boolean {\n return extension === '.cjs' || extension === '.cts'\n}\n\nfunction isLayerMatrixDefinition(matrix: MatrixDefinition | MatrixLayer): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nasync function isReadableFile(filePath: string): Promise<boolean> {\n try {\n await access(filePath)\n return true\n }\n catch {\n return false\n }\n}\n\nfunction mergeProjectConcurrency(\n inheritedConcurrency: Omit<CliConcurrencyConfig, 'workspace'> | undefined,\n projectConcurrency: Omit<CliConcurrencyConfig, 'workspace'> | undefined,\n): Omit<CliConcurrencyConfig, 'workspace'> | undefined {\n if (inheritedConcurrency == null && projectConcurrency == null) {\n return undefined\n }\n\n return {\n attempt: projectConcurrency?.attempt ?? inheritedConcurrency?.attempt,\n case: projectConcurrency?.case ?? inheritedConcurrency?.case,\n project: projectConcurrency?.project ?? inheritedConcurrency?.project,\n task: projectConcurrency?.task ?? inheritedConcurrency?.task,\n }\n}\n\nasync function normalizeConfig(config: CliConfig | null | undefined, cwd: string): Promise<NormalizedCliProjectConfig[]> {\n if (config != null) {\n const mode = detectCliConfigMode(config)\n if (mode === 'comparisons') {\n throw new Error('vieval run requires project-mode config. Received comparison-mode config.')\n }\n }\n\n const projects = config?.workspaces == null\n ? ((config as CliProjectModeConfig | null | undefined)?.projects ?? [{ name: 'default' }])\n : config.workspaces.map(workspace => ({\n name: workspace.id,\n root: workspace.root,\n }))\n const inheritedConcurrency = toProjectConcurrencyDefaults(config?.concurrency)\n const inheritedModels = config?.models ?? []\n const inheritedReporterReferences = config?.reporters ?? []\n\n return Promise.all(projects.map(async (project) => {\n const normalizedProject = normalizeProjectConfig(\n project,\n cwd,\n inheritedConcurrency,\n inheritedModels,\n inheritedReporterReferences,\n )\n\n return applyProjectPlugins(\n config,\n project,\n normalizedProject,\n cwd,\n )\n }))\n}\n\nfunction normalizeMatrixLayerInput(matrix: MatrixDefinition | MatrixLayer | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isLayerMatrixDefinition(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction normalizeProjectConfig(\n project: CliProjectConfig,\n cwd: string,\n inheritedConcurrency: Omit<CliConcurrencyConfig, 'workspace'> | undefined,\n inheritedModels: readonly ModelDefinition[],\n inheritedReporterReferences: readonly VievalVitestCompatReporterReference[],\n): NormalizedCliProjectConfig {\n const include = project.include ?? [\n '**/*.eval.ts',\n '**/*.eval.mts',\n '**/*.eval.cts',\n '**/*.eval.js',\n '**/*.eval.mjs',\n '**/*.eval.cjs',\n ]\n const exclude = project.exclude ?? [\n '**/node_modules/**',\n '**/dist/**',\n '**/.git/**',\n ]\n const models = project.models ?? [...inheritedModels]\n const inferenceExecutors = project.inferenceExecutors ?? [{ id: 'default' }]\n const root = project.root == null\n ? cwd\n : (isAbsolute(project.root) ? project.root : resolve(cwd, project.root))\n const reporters = project.reporters ?? [...inheritedReporterReferences]\n const concurrency = mergeProjectConcurrency(inheritedConcurrency, project.concurrency)\n\n return {\n concurrency,\n evalMatrix: normalizeMatrixLayerInput(project.evalMatrix),\n exclude,\n executor: project.executor,\n include,\n inferenceExecutors,\n models,\n name: project.name,\n reporters,\n root,\n runMatrix: normalizeMatrixLayerInput(project.runMatrix),\n }\n}\n\nfunction normalizeReportingConfig(config: CliReportingConfig | undefined): CliReportingConfig | undefined {\n if (config == null) {\n return undefined\n }\n\n return {\n openTelemetry: config.openTelemetry == null\n ? undefined\n : {\n enabled: config.openTelemetry.enabled ?? false,\n onRunEnd: config.openTelemetry.onRunEnd,\n },\n }\n}\n\nfunction resolveConfigExport(moduleValue: unknown): unknown {\n if (moduleValue == null) {\n return null\n }\n\n if (typeof moduleValue !== 'object') {\n return moduleValue\n }\n\n if ('default' in moduleValue) {\n return (moduleValue as { default: unknown }).default\n }\n\n return moduleValue\n}\n\nasync function resolveVievalConfig(\n cwd: string,\n explicitConfigFilePath: string | undefined,\n): Promise<{\n config: CliConfig | null\n configFilePath: null | string\n}> {\n const resolvedConfigFilePath = explicitConfigFilePath == null\n ? await findNearestConfigFile(cwd)\n : (isAbsolute(explicitConfigFilePath) ? explicitConfigFilePath : resolve(cwd, explicitConfigFilePath))\n\n if (explicitConfigFilePath != null && resolvedConfigFilePath != null && !await isReadableFile(resolvedConfigFilePath)) {\n throw new Error(`Config file does not exist or is not readable: ${resolvedConfigFilePath}`)\n }\n\n if (resolvedConfigFilePath == null) {\n return {\n config: null,\n configFilePath: null,\n }\n }\n\n const loaded = await loadConfig<CliConfig>({\n configFile: resolvedConfigFilePath,\n cwd,\n dotenv: false,\n envName: false,\n extend: false,\n import: importVievalConfigModule,\n packageJson: false,\n rcFile: false,\n resolveModule: resolveConfigExport,\n })\n return {\n config: loaded.config,\n configFilePath: resolvedConfigFilePath,\n }\n}\n\nfunction toProjectConcurrencyDefaults(\n concurrency: CliConcurrencyConfig | undefined,\n): Omit<CliConcurrencyConfig, 'workspace'> | undefined {\n if (concurrency == null) {\n return undefined\n }\n\n return {\n attempt: concurrency.attempt,\n case: concurrency.case,\n project: concurrency.project,\n task: concurrency.task,\n }\n}\n","import type { TelemetryRuntime } from './types'\n\n/**\n * Creates the default no-op telemetry runtime.\n *\n * Use when:\n * - OpenTelemetry is not enabled by config\n * - tests need deterministic pass-through execution\n *\n * Expects:\n * - callers still wrap run/task/case boundaries with `withSpan`\n *\n * Returns:\n * - a runtime that never emits external telemetry and never changes control flow\n */\nexport function createNoopTelemetryRuntime(): TelemetryRuntime {\n return {\n addEvent() {},\n recordException() {},\n setAttributes() {},\n async withSpan(_name, _attributes, callback) {\n return await callback()\n },\n }\n}\n","import type { TelemetryAttributes, TelemetryRuntime } from './types'\n\nimport { errorMessageFrom } from '@moeru/std'\n\n/**\n * Options used to construct the OpenTelemetry-backed telemetry runtime.\n */\nexport interface CreateOpenTelemetryRuntimeOptions {\n /**\n * Optional import adapter used by tests to avoid requiring a real OpenTelemetry SDK.\n *\n * @default dynamic import of `@opentelemetry/api`\n */\n importApi?: () => Promise<OpenTelemetryApiModule>\n}\ninterface OpenTelemetryApiModule {\n SpanStatusCode: { ERROR: number }\n trace: {\n getActiveSpan: () => OpenTelemetrySpan | undefined\n getTracer: (name: string) => OpenTelemetryTracer\n }\n}\ntype OpenTelemetryAttributes = Record<string, OpenTelemetryAttributeValue>\n\ntype OpenTelemetryAttributeScalar = boolean | number | string\n\ntype OpenTelemetryAttributeValue = OpenTelemetryAttributeScalar | readonly boolean[] | readonly number[] | readonly string[]\n\ninterface OpenTelemetrySpan {\n addEvent: (name: string, attributes?: OpenTelemetryAttributes) => void\n end: () => void\n recordException: (error: unknown) => void\n setAttributes: (attributes: OpenTelemetryAttributes) => void\n setStatus: (status: { code: number, message?: string }) => void\n}\n\ninterface OpenTelemetryTracer {\n startActiveSpan: <T>(\n name: string,\n options: { attributes: OpenTelemetryAttributes },\n callback: (span: OpenTelemetrySpan) => Promise<T>,\n ) => Promise<T>\n}\n\n/**\n * Creates an OpenTelemetry-backed runtime using active spans.\n *\n * Use when:\n * - `reporting.openTelemetry.enabled` is true\n * - the user's config has initialized an OpenTelemetry SDK or intentionally relies on the API no-op provider\n *\n * Expects:\n * - `@opentelemetry/api` is resolvable when enabled\n * - SDK lifecycle is managed by user config and `reporting.openTelemetry.onRunEnd`\n *\n * Returns:\n * - a runtime that starts active spans and forwards events to the current active span\n */\nexport function createOpenTelemetryRuntime(options: CreateOpenTelemetryRuntimeOptions = {}): TelemetryRuntime {\n const importApi = options.importApi ?? importOpenTelemetryApi\n let apiPromise: Promise<OpenTelemetryApiModule> | undefined\n let loadedApi: OpenTelemetryApiModule | undefined\n\n async function getApi(): Promise<OpenTelemetryApiModule> {\n apiPromise ??= importApi().then((api) => {\n loadedApi = api\n return api\n })\n return await apiPromise\n }\n\n return {\n addEvent(name, attributes) {\n loadedApi?.trace.getActiveSpan()?.addEvent(name, normalizeOpenTelemetryAttributes(attributes))\n },\n recordException(error) {\n loadedApi?.trace.getActiveSpan()?.recordException(error)\n },\n setAttributes(attributes) {\n loadedApi?.trace.getActiveSpan()?.setAttributes(normalizeOpenTelemetryAttributes(attributes) ?? {})\n },\n async withSpan(name, attributes, callback) {\n const api = await getApi()\n const tracer = api.trace.getTracer('vieval')\n\n return await tracer.startActiveSpan(name, { attributes: normalizeOpenTelemetryAttributes(attributes) ?? {} }, async (span) => {\n try {\n return await callback()\n }\n catch (error) {\n span.recordException(error)\n span.setStatus({ code: api.SpanStatusCode.ERROR, message: errorMessageFrom(error) ?? 'Unknown error' })\n throw error\n }\n finally {\n span.end()\n }\n })\n },\n }\n}\n\nasync function importOpenTelemetryApi(): Promise<OpenTelemetryApiModule> {\n const moduleName = '@opentelemetry/api'\n return await import(moduleName) as unknown as OpenTelemetryApiModule\n}\n\nfunction isHomogeneousOpenTelemetryAttributeArray(value: readonly unknown[]): value is readonly boolean[] | readonly number[] | readonly string[] {\n if (value.length === 0) {\n return true\n }\n\n const firstType = typeof value[0]\n if (firstType !== 'boolean' && firstType !== 'number' && firstType !== 'string') {\n return false\n }\n\n return value.every(item => typeof item === firstType)\n}\n\nfunction isOpenTelemetryAttributeScalar(value: unknown): value is OpenTelemetryAttributeScalar {\n return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'\n}\n\n/**\n * Normalizes JSON-compatible telemetry attributes into OpenTelemetry-safe attributes.\n *\n * Before:\n * - `{ nil: null, nested: ['a', [1, null]], scalarArray: ['a', 1, true] }`\n *\n * After:\n * - `{ nested: '[\"a\",[1,null]]', scalarArray: ['a', 1, true] }`\n */\nfunction normalizeOpenTelemetryAttributes(attributes: TelemetryAttributes | undefined): OpenTelemetryAttributes | undefined {\n if (attributes == null) {\n return undefined\n }\n\n const normalized: OpenTelemetryAttributes = {}\n\n for (const [key, value] of Object.entries(attributes)) {\n if (value == null) {\n continue\n }\n\n if (isOpenTelemetryAttributeScalar(value)) {\n normalized[key] = value\n continue\n }\n\n if (Array.isArray(value)) {\n normalized[key] = isHomogeneousOpenTelemetryAttributeArray(value)\n ? value\n : stringifyAttributeValue(value) ?? ''\n continue\n }\n\n const stringified = stringifyAttributeValue(value)\n\n if (stringified != null) {\n normalized[key] = stringified\n }\n }\n\n return normalized\n}\n\nfunction stringifyAttributeValue(value: unknown): string | undefined {\n try {\n return JSON.stringify(value)\n }\n catch {\n return String(value)\n }\n}\n","import type { EvalDefinition } from '../config'\n\nimport process from 'node:process'\n\ninterface EvalDefinitionRegistryStore {\n activeModuleHref: null | string\n registeredDefinitionsByModule: Map<string, EvalDefinition[]>\n}\n\nconst registryStoreSymbol = Symbol.for('vieval.dsl.registry.store')\n\n/**\n * Starts module-scoped eval registration collection.\n */\nexport function beginModuleRegistration(moduleHref: string): void {\n const store = getRegistryStore()\n store.activeModuleHref = moduleHref\n}\n\n/**\n * Consumes registered definitions for one module and clears stored state.\n */\nexport function consumeModuleRegistrations(moduleHref: string): EvalDefinition[] {\n const store = getRegistryStore()\n const definitions = store.registeredDefinitionsByModule.get(moduleHref) ?? []\n store.registeredDefinitionsByModule.delete(moduleHref)\n return definitions\n}\n\n/**\n * Ends module-scoped eval registration collection.\n */\nexport function endModuleRegistration(): void {\n const store = getRegistryStore()\n store.activeModuleHref = null\n}\n\n/**\n * Registers one eval definition against the currently active module.\n */\nexport function registerEvalDefinition(definition: EvalDefinition): void {\n const store = getRegistryStore()\n\n if (store.activeModuleHref == null) {\n return\n }\n\n const existing = store.registeredDefinitionsByModule.get(store.activeModuleHref) ?? []\n existing.push(definition)\n store.registeredDefinitionsByModule.set(store.activeModuleHref, existing)\n}\n\nfunction getRegistryStore(): EvalDefinitionRegistryStore {\n const processWithStore = process as NodeJS.Process & {\n [registryStoreSymbol]?: EvalDefinitionRegistryStore\n }\n\n processWithStore[registryStoreSymbol] ??= {\n activeModuleHref: null,\n registeredDefinitionsByModule: new Map<string, EvalDefinition[]>(),\n }\n\n return processWithStore[registryStoreSymbol]\n}\n"],"mappings":";;;;;;;;;AAiBA,MAAM,kCAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;AAAU,CAAC;AACjE,MAAM,wCAAwC;AAC9C,MAAM,UAAU,cAAc,OAAO,KAAK,GAAG;;;;AA4W7C,MAAa,eAAe,mBAA8B;;;;;;;;;;AAW1D,SAAgB,oBAAoB,QAAkC;CACpE,MAAM,gBAAiC,CAAC;CACxC,IAAI,OAAO,YAAY,MACrB,cAAc,KAAK,UAAU;CAE/B,IAAI,OAAO,cAAc,MACvB,cAAc,KAAK,YAAY;CAEjC,IAAI,OAAO,eAAe,MACxB,cAAc,KAAK,aAAa;CAGlC,IAAI,cAAc,SAAS,GACzB,MAAM,IAAI,MAAM,uEAAuE,cAAc,KAAK,IAAI,EAAE,EAAE;CAGpH,OAAO,cAAc,MAAM;AAC7B;;;;;;;;;;;;;;;AAgBA,SAAgBA,UAAQ,MAAc,QAAgB,WAA8B,IAAuB;CACzG,OAAOC,QAAY,MAAM,QAAQ,QAAQ;AAC3C;;;;AAKA,eAAsB,oBAAoB,UAAsC,CAAC,GAAgC;CAC/G,MAAM,MAAM,QAAQ,OAAO,QAAQ,IAAI;CAEvC,IAAI;EACF,MAAM,eAAe,MAAM,oBAAoB,KAAK,QAAQ,cAAc;EAC1E,IAAI,aAAa,kBAAkB,QAAQ,aAAa,UAAU,MAChE,OAAO;GACL,QAAQ;GACR,gBAAgB;EAClB;EAGF,MAAM,SAAS,MAAM,mBAAmB,aAAa,MAAM;EAC3D,oBAAoB,MAAM;EAE1B,OAAO;GACL;GACA,gBAAgB,aAAa;EAC/B;CACF,SACO,OAAO;EACZ,MAAM,eAAe,iBAAiB,KAAK,KAAK;EAChD,MAAM,iBAAiB,QAAQ,kBAAkB,OAC7C,kBACC,WAAW,QAAQ,cAAc,IAAI,QAAQ,iBAAiB,QAAQ,KAAK,QAAQ,cAAc;EACtG,MAAM,IAAI,MAAM,iCAAiC,eAAe,KAAK,gBAAgB,EAAE,OAAO,MAAM,CAAC;CACvG;AACF;;;;;;;;;;;;;;;AAgBA,eAAsB,oBAAoB,UAAsC,CAAC,GAA6B;CAC5G,MAAM,MAAM,QAAQ,OAAO,QAAQ,IAAI;CACvC,IAAI;EACF,MAAM,eAAe,MAAM,oBAAoB,OAAO;EACtD,IAAI,aAAa,kBAAkB,QAAQ,aAAa,UAAU,MAChE,OAAO;GACL,aAAa,KAAA;GACb,gBAAgB;GAChB,KAAK,CAAC;GACN,UAAU,MAAM,gBAAgB,MAAM,GAAG;GACzC,WAAW,KAAA;EACb;EAGF,MAAM,SAAS,aAAa;EAE5B,OAAO;GACL,aAAa,OAAO;GACpB,gBAAgB,aAAa;GAC7B,KAAK,OAAO,OAAO,CAAC;GACpB,UAAU,MAAM,gBAAgB,QAAQ,QAAQ,aAAa,cAAc,CAAC;GAC5E,WAAW,yBAAyB,OAAO,SAAS;EACtD;CACF,SACO,OAAO;EACZ,MAAM,eAAe,iBAAiB,KAAK,KAAK;EAChD,MAAM,iBAAiB,QAAQ,kBAAkB,OAC7C,kBACC,WAAW,QAAQ,cAAc,IAAI,QAAQ,iBAAiB,QAAQ,KAAK,QAAQ,cAAc;EACtG,MAAM,IAAI,MAAM,iCAAiC,eAAe,KAAK,gBAAgB,EAAE,OAAO,MAAM,CAAC;CACvG;AACF;AAEA,eAAe,oBACb,QACA,SACA,mBACA,KACqC;CACrC,IAAI,QAAQ,WAAW,QAAQ,QAAQ,QAAQ,WAAW,GACxD,OAAO;CAGT,MAAM,eAAe,0BAA0B,QAAQ,SAAS,iBAAiB;CACjF,MAAM,iBAAiB,MAAM,mBAAmB,YAAY;CAC5D,MAAM,gBAAgB,aAAa,WAAW;CAC9C,IAAI,iBAAiB,MACnB,MAAM,IAAI,MAAM,iEAAiE;CAEnF,MAAM,kBAAkB,eAAe,WAAW,MAAM;CAExD,OAAO,uBACL;EACE,GAAG;EACH,aAAa,gBAAgB,gBAAgB,cAAc,cACvD,6BAA6B,eAAe,WAAW,IACvD,gBAAgB;EACpB,QAAQ,gBAAgB,WAAW,cAAc,SAC7C,eAAe,SACf,gBAAgB;EACpB,WAAW,gBAAgB,cAAc,cAAc,YACnD,eAAe,YACf,gBAAgB;CACtB,GACA,KACA,KAAA,GACA,eAAe,UAAU,CAAC,GAC1B,eAAe,aAAa,CAAC,CAC/B;AACF;AAEA,eAAe,mBAAmB,QAAuC;CACvE,IAAI,gBAA2B;CAC/B,MAAM,UAAU,cAAc,WAAW,CAAC;CAE1C,KAAK,MAAM,UAAU,SAAS;EAC5B,IAAI,OAAO,gBAAgB,MACzB;EAGF,MAAM,aAAa,MAAM,OAAO,aAAa,aAAa;EAC1D,IAAI,cAAc,MAChB,gBAAgB;GACd,GAAG;GACH,GAAG;EACL;CAEJ;CAEA,KAAK,MAAM,UAAU,SACnB,MAAM,OAAO,uBAAuB,aAAa;CAGnD,OAAO;AACT;AAEA,SAAS,mCAAmC,QAA8C;CACxF,MAAM,aAAa,OAAO,KAAK,MAAM;CACrC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,GAAG,CAAC;CACvE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,GAAG,CAAC;CAEpE,IAAI,mBAAmB,aACrB,MAAM,IAAI,UAAU,qCAAqC;AAE7D;AAEA,SAAS,0BACP,QACA,SACA,mBACsB;CAQtB,OAAO;EACL,aARkB,QAAQ,eAAe,QAAQ,kBAAkB,eAAe,OAChF,KAAA,IACA;GACE,GAAG,kBAAkB;GACrB,WAAW,QAAQ,aAAa;EAClC;EAIF,KAAK,QAAQ;EACb,QAAQ,kBAAkB;EAC1B,SAAS,QAAQ;EACjB,UAAU,CACR;GACE,aAAa,kBAAkB;GAC/B,YAAY,kBAAkB;GAC9B,SAAS,kBAAkB;GAC3B,UAAU,kBAAkB;GAC5B,SAAS,kBAAkB;GAC3B,oBAAoB,kBAAkB;GACtC,QAAQ,kBAAkB;GAC1B,MAAM,kBAAkB;GACxB,SAAS,QAAQ;GACjB,WAAW,kBAAkB;GAC7B,MAAM,kBAAkB;GACxB,WAAW,kBAAkB;EAC/B,CACF;EACA,WAAW,kBAAkB;EAC7B,WAAW,QAAQ;CACrB;AACF;AAEA,eAAe,sBAAsB,gBAAgD;CACnF,MAAM,qBAAqB;EACzB;EACA;EACA;EACA;EACA;EACA;EACA;CACF;CAEA,IAAI,mBAAmB,QAAQ,cAAc;CAE7C,OAAO,MAAM;EACX,KAAK,MAAM,YAAY,oBAAoB;GACzC,MAAM,gBAAgB,KAAK,kBAAkB,QAAQ;GACrD,IAAI,MAAM,eAAe,aAAa,GACpC,OAAO;EAEX;EAEA,MAAM,kBAAkB,QAAQ,gBAAgB;EAChD,IAAI,oBAAoB,kBACtB,OAAO;EAET,mBAAmB;CACrB;AACF;AAEA,eAAe,yBAAyB,UAAoC;CAC1E,MAAM,YAAY,QAAQ,QAAQ;CAElC,IAAI,oCAAoC,SAAS,GAAG;EAClD,MAAM,MAAM,MAAM,SAAS,UAAU,OAAO;EAC5C,OAAO,KAAK,MAAM,GAAG;CACvB;CAEA,IAAI,kCAAkC,SAAS,GAC7C,OAAO,QAAQ,QAAQ;CAGzB,OAAO,OAAO,cAAc,QAAQ,CAAC,CAAC;AACxC;AAEA,SAAS,oCAAoC,WAA4B;CACvE,OAAO,cAAc;AACvB;AAEA,SAAS,kCAAkC,WAA4B;CACrE,OAAO,cAAc,UAAU,cAAc;AAC/C;AAEA,SAAS,wBAAwB,QAA+D;CAC9F,MAAM,aAAa,OAAO,KAAK,MAAM;CACrC,OACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,GAAG,CAAC;AAEvD;AAEA,eAAe,eAAe,UAAoC;CAChE,IAAI;EACF,MAAM,OAAO,QAAQ;EACrB,OAAO;CACT,QACM;EACJ,OAAO;CACT;AACF;AAEA,SAAS,wBACP,sBACA,oBACqD;CACrD,IAAI,wBAAwB,QAAQ,sBAAsB,MACxD;CAGF,OAAO;EACL,SAAS,oBAAoB,WAAW,sBAAsB;EAC9D,MAAM,oBAAoB,QAAQ,sBAAsB;EACxD,SAAS,oBAAoB,WAAW,sBAAsB;EAC9D,MAAM,oBAAoB,QAAQ,sBAAsB;CAC1D;AACF;AAEA,eAAe,gBAAgB,QAAsC,KAAoD;CACvH,IAAI,UAAU;MACC,oBAAoB,MAC1B,MAAM,eACX,MAAM,IAAI,MAAM,2EAA2E;CAAA;CAI/F,MAAM,WAAW,QAAQ,cAAc,OACjC,QAAoD,YAAY,CAAC,EAAE,MAAM,UAAU,CAAC,IACtF,OAAO,WAAW,KAAI,eAAc;EAClC,MAAM,UAAU;EAChB,MAAM,UAAU;CAClB,EAAE;CACN,MAAM,uBAAuB,6BAA6B,QAAQ,WAAW;CAC7E,MAAM,kBAAkB,QAAQ,UAAU,CAAC;CAC3C,MAAM,8BAA8B,QAAQ,aAAa,CAAC;CAE1D,OAAO,QAAQ,IAAI,SAAS,IAAI,OAAO,YAAY;EASjD,OAAO,oBACL,QACA,SAVwB,uBACxB,SACA,KACA,sBACA,iBACA,2BAMgB,GAChB,GACF;CACF,CAAC,CAAC;AACJ;AAEA,SAAS,0BAA0B,QAA6E;CAC9G,IAAI,UAAU,MACZ;CAGF,mCAAmC,MAAM;CAEzC,IAAI,wBAAwB,MAAM,GAChC,OAAO;CAGT,OAAO,EACL,QAAQ,OACV;AACF;AAEA,SAAS,uBACP,SACA,KACA,sBACA,iBACA,6BAC4B;CAC5B,MAAM,UAAU,QAAQ,WAAW;EACjC;EACA;EACA;EACA;EACA;EACA;CACF;CACA,MAAM,UAAU,QAAQ,WAAW;EACjC;EACA;EACA;CACF;CACA,MAAM,SAAS,QAAQ,UAAU,CAAC,GAAG,eAAe;CACpD,MAAM,qBAAqB,QAAQ,sBAAsB,CAAC,EAAE,IAAI,UAAU,CAAC;CAC3E,MAAM,OAAO,QAAQ,QAAQ,OACzB,MACC,WAAW,QAAQ,IAAI,IAAI,QAAQ,OAAO,QAAQ,KAAK,QAAQ,IAAI;CACxE,MAAM,YAAY,QAAQ,aAAa,CAAC,GAAG,2BAA2B;CAGtE,OAAO;EACL,aAHkB,wBAAwB,sBAAsB,QAAQ,WAG9D;EACV,YAAY,0BAA0B,QAAQ,UAAU;EACxD;EACA,UAAU,QAAQ;EAClB;EACA;EACA;EACA,MAAM,QAAQ;EACd;EACA;EACA,WAAW,0BAA0B,QAAQ,SAAS;CACxD;AACF;AAEA,SAAS,yBAAyB,QAAwE;CACxG,IAAI,UAAU,MACZ;CAGF,OAAO,EACL,eAAe,OAAO,iBAAiB,OACnC,KAAA,IACA;EACE,SAAS,OAAO,cAAc,WAAW;EACzC,UAAU,OAAO,cAAc;CACjC,EACN;AACF;AAEA,SAAS,oBAAoB,aAA+B;CAC1D,IAAI,eAAe,MACjB,OAAO;CAGT,IAAI,OAAO,gBAAgB,UACzB,OAAO;CAGT,IAAI,aAAa,aACf,OAAQ,YAAqC;CAG/C,OAAO;AACT;AAEA,eAAe,oBACb,KACA,wBAIC;CACD,MAAM,yBAAyB,0BAA0B,OACrD,MAAM,sBAAsB,GAAG,IAC9B,WAAW,sBAAsB,IAAI,yBAAyB,QAAQ,KAAK,sBAAsB;CAEtG,IAAI,0BAA0B,QAAQ,0BAA0B,QAAQ,CAAC,MAAM,eAAe,sBAAsB,GAClH,MAAM,IAAI,MAAM,kDAAkD,wBAAwB;CAG5F,IAAI,0BAA0B,MAC5B,OAAO;EACL,QAAQ;EACR,gBAAgB;CAClB;CAcF,OAAO;EACL,SAAQ,MAZW,WAAsB;GACzC,YAAY;GACZ;GACA,QAAQ;GACR,SAAS;GACT,QAAQ;GACR,QAAQ;GACR,aAAa;GACb,QAAQ;GACR,eAAe;EACjB,CAAC,EAAA,CAEgB;EACf,gBAAgB;CAClB;AACF;AAEA,SAAS,6BACP,aACqD;CACrD,IAAI,eAAe,MACjB;CAGF,OAAO;EACL,SAAS,YAAY;EACrB,MAAM,YAAY;EAClB,SAAS,YAAY;EACrB,MAAM,YAAY;CACpB;AACF;;;;;;;;;;;;;;;;ACt2BA,SAAgB,6BAA+C;CAC7D,OAAO;EACL,WAAW,CAAC;EACZ,kBAAkB,CAAC;EACnB,gBAAgB,CAAC;EACjB,MAAM,SAAS,OAAO,aAAa,UAAU;GAC3C,OAAO,MAAM,SAAS;EACxB;CACF;AACF;;;;;;;;;;;;;;;;;ACkCA,SAAgB,2BAA2B,UAA6C,CAAC,GAAqB;CAC5G,MAAM,YAAY,QAAQ,aAAa;CACvC,IAAI;CACJ,IAAI;CAEJ,eAAe,SAA0C;EACvD,eAAe,UAAU,CAAC,CAAC,MAAM,QAAQ;GACvC,YAAY;GACZ,OAAO;EACT,CAAC;EACD,OAAO,MAAM;CACf;CAEA,OAAO;EACL,SAAS,MAAM,YAAY;GACzB,WAAW,MAAM,cAAc,CAAC,EAAE,SAAS,MAAM,iCAAiC,UAAU,CAAC;EAC/F;EACA,gBAAgB,OAAO;GACrB,WAAW,MAAM,cAAc,CAAC,EAAE,gBAAgB,KAAK;EACzD;EACA,cAAc,YAAY;GACxB,WAAW,MAAM,cAAc,CAAC,EAAE,cAAc,iCAAiC,UAAU,KAAK,CAAC,CAAC;EACpG;EACA,MAAM,SAAS,MAAM,YAAY,UAAU;GACzC,MAAM,MAAM,MAAM,OAAO;GAGzB,OAAO,MAFQ,IAAI,MAAM,UAAU,QAEjB,CAAC,CAAC,gBAAgB,MAAM,EAAE,YAAY,iCAAiC,UAAU,KAAK,CAAC,EAAE,GAAG,OAAO,SAAS;IAC5H,IAAI;KACF,OAAO,MAAM,SAAS;IACxB,SACO,OAAO;KACZ,KAAK,gBAAgB,KAAK;KAC1B,KAAK,UAAU;MAAE,MAAM,IAAI,eAAe;MAAO,SAAS,iBAAiB,KAAK,KAAK;KAAgB,CAAC;KACtG,MAAM;IACR,UACQ;KACN,KAAK,IAAI;IACX;GACF,CAAC;EACH;CACF;AACF;AAEA,eAAe,yBAA0D;CAEvE,OAAO,MAAM,OAAO;AACtB;AAEA,SAAS,yCAAyC,OAAgG;CAChJ,IAAI,MAAM,WAAW,GACnB,OAAO;CAGT,MAAM,YAAY,OAAO,MAAM;CAC/B,IAAI,cAAc,aAAa,cAAc,YAAY,cAAc,UACrE,OAAO;CAGT,OAAO,MAAM,OAAM,SAAQ,OAAO,SAAS,SAAS;AACtD;AAEA,SAAS,+BAA+B,OAAuD;CAC7F,OAAO,OAAO,UAAU,aAAa,OAAO,UAAU,YAAY,OAAO,UAAU;AACrF;;;;;;;;;;AAWA,SAAS,iCAAiC,YAAkF;CAC1H,IAAI,cAAc,MAChB;CAGF,MAAM,aAAsC,CAAC;CAE7C,KAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,UAAU,GAAG;EACrD,IAAI,SAAS,MACX;EAGF,IAAI,+BAA+B,KAAK,GAAG;GACzC,WAAW,OAAO;GAClB;EACF;EAEA,IAAI,MAAM,QAAQ,KAAK,GAAG;GACxB,WAAW,OAAO,yCAAyC,KAAK,IAC5D,QACA,wBAAwB,KAAK,KAAK;GACtC;EACF;EAEA,MAAM,cAAc,wBAAwB,KAAK;EAEjD,IAAI,eAAe,MACjB,WAAW,OAAO;CAEtB;CAEA,OAAO;AACT;AAEA,SAAS,wBAAwB,OAAoC;CACnE,IAAI;EACF,OAAO,KAAK,UAAU,KAAK;CAC7B,QACM;EACJ,OAAO,OAAO,KAAK;CACrB;AACF;;;ACrKA,MAAM,sBAAsB,OAAO,IAAI,2BAA2B;;;;AAKlE,SAAgB,wBAAwB,YAA0B;CAChE,MAAM,QAAQ,iBAAiB;CAC/B,MAAM,mBAAmB;AAC3B;;;;AAKA,SAAgB,2BAA2B,YAAsC;CAC/E,MAAM,QAAQ,iBAAiB;CAC/B,MAAM,cAAc,MAAM,8BAA8B,IAAI,UAAU,KAAK,CAAC;CAC5E,MAAM,8BAA8B,OAAO,UAAU;CACrD,OAAO;AACT;;;;AAKA,SAAgB,wBAA8B;CAC5C,MAAM,QAAQ,iBAAiB;CAC/B,MAAM,mBAAmB;AAC3B;;;;AAKA,SAAgB,uBAAuB,YAAkC;CACvE,MAAM,QAAQ,iBAAiB;CAE/B,IAAI,MAAM,oBAAoB,MAC5B;CAGF,MAAM,WAAW,MAAM,8BAA8B,IAAI,MAAM,gBAAgB,KAAK,CAAC;CACrF,SAAS,KAAK,UAAU;CACxB,MAAM,8BAA8B,IAAI,MAAM,kBAAkB,QAAQ;AAC1E;AAEA,SAAS,mBAAgD;CACvD,MAAM,mBAAmB;CAIzB,iBAAiB,yBAAyB;EACxC,kBAAkB;EAClB,+CAA+B,IAAI,IAA8B;CACnE;CAEA,OAAO,iBAAiB;AAC1B"}
|
|
@@ -27,43 +27,21 @@ interface ToolCallContainer {
|
|
|
27
27
|
*/
|
|
28
28
|
toolCalls?: readonly ToolCall[];
|
|
29
29
|
}
|
|
30
|
-
/**
|
|
31
|
-
* Registers vieval custom matchers on Vitest `expect`.
|
|
32
|
-
*
|
|
33
|
-
* Call stack:
|
|
34
|
-
*
|
|
35
|
-
* {@link installVievalExpectMatchers}
|
|
36
|
-
* -> `expect.extend(...)`
|
|
37
|
-
* -> `expect(received).toMustInclude(...)`
|
|
38
|
-
* -> `expect(received).toScoreRubricGreaterThan(...)`
|
|
39
|
-
*
|
|
40
|
-
* Use when:
|
|
41
|
-
* - eval suites need domain assertions while preserving native Vitest ergonomics
|
|
42
|
-
* - callers want native `.not` chaining with the same matchers
|
|
43
|
-
*/
|
|
44
|
-
declare function installVievalExpectMatchers(): void;
|
|
45
30
|
interface VievalCustomMatchers {
|
|
46
|
-
/**
|
|
47
|
-
* Asserts that text includes required keywords.
|
|
48
|
-
*
|
|
49
|
-
* Example:
|
|
50
|
-
* `expect('calm answer').toMustInclude(['calm'])`
|
|
51
|
-
*/
|
|
52
|
-
toMustInclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void;
|
|
53
31
|
/**
|
|
54
32
|
* Asserts that text excludes forbidden keywords.
|
|
55
33
|
*
|
|
56
34
|
* Example:
|
|
57
35
|
* `expect('calm answer').toMustExclude(['bestmove'])`
|
|
58
36
|
*/
|
|
59
|
-
toMustExclude: (keywords: string |
|
|
37
|
+
toMustExclude: (keywords: readonly string[] | string, options?: KeywordMatcherOptions) => void;
|
|
60
38
|
/**
|
|
61
|
-
* Asserts
|
|
39
|
+
* Asserts that text includes required keywords.
|
|
62
40
|
*
|
|
63
41
|
* Example:
|
|
64
|
-
* `expect(
|
|
42
|
+
* `expect('calm answer').toMustInclude(['calm'])`
|
|
65
43
|
*/
|
|
66
|
-
|
|
44
|
+
toMustInclude: (keywords: readonly string[] | string, options?: KeywordMatcherOptions) => void;
|
|
67
45
|
/**
|
|
68
46
|
* Asserts structured output satisfies a validator.
|
|
69
47
|
*
|
|
@@ -78,10 +56,32 @@ interface VievalCustomMatchers {
|
|
|
78
56
|
* `expect({ toolCalls }).toSatisfyToolCallArgs('builtIn_sparkCommand', isSparkArgs)`
|
|
79
57
|
*/
|
|
80
58
|
toSatisfyToolCallArgs: (toolName: string, validator: (args: unknown) => boolean) => void;
|
|
59
|
+
/**
|
|
60
|
+
* Asserts rubric score is greater than a threshold.
|
|
61
|
+
*
|
|
62
|
+
* Example:
|
|
63
|
+
* `expect({ score: 0.91 }).toScoreRubricGreaterThan(0.8)`
|
|
64
|
+
*/
|
|
65
|
+
toScoreRubricGreaterThan: (threshold: number) => void;
|
|
81
66
|
}
|
|
67
|
+
/**
|
|
68
|
+
* Registers vieval custom matchers on Vitest `expect`.
|
|
69
|
+
*
|
|
70
|
+
* Call stack:
|
|
71
|
+
*
|
|
72
|
+
* {@link installVievalExpectMatchers}
|
|
73
|
+
* -> `expect.extend(...)`
|
|
74
|
+
* -> `expect(received).toMustInclude(...)`
|
|
75
|
+
* -> `expect(received).toScoreRubricGreaterThan(...)`
|
|
76
|
+
*
|
|
77
|
+
* Use when:
|
|
78
|
+
* - eval suites need domain assertions while preserving native Vitest ergonomics
|
|
79
|
+
* - callers want native `.not` chaining with the same matchers
|
|
80
|
+
*/
|
|
81
|
+
declare function installVievalExpectMatchers(): void;
|
|
82
82
|
declare module '@vitest/expect' {
|
|
83
|
-
interface Matchers<T = any> extends VievalCustomMatchers {}
|
|
84
83
|
interface Assertion<T = any> extends VievalCustomMatchers {}
|
|
84
|
+
interface Matchers<T = any> extends VievalCustomMatchers {}
|
|
85
85
|
}
|
|
86
86
|
declare module 'vitest' {
|
|
87
87
|
interface Assertion extends VievalCustomMatchers {}
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { t as installVievalExpectMatchers } from "../expect-extensions-
|
|
1
|
+
import { t as installVievalExpectMatchers } from "../expect-extensions-BKdEPt3h.mjs";
|
|
2
2
|
export { installVievalExpectMatchers };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vieval",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0.
|
|
4
|
+
"version": "0.0.12",
|
|
5
5
|
"description": "Vitest-based evaluation framework for agents, models, and more.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Vieval Team",
|
|
@@ -77,8 +77,8 @@
|
|
|
77
77
|
"@pnpm/find-workspace-dir": "^1000.1.5",
|
|
78
78
|
"@vitest/expect": "^4.1.9",
|
|
79
79
|
"@vitest/runner": "^4.1.9",
|
|
80
|
-
"@xsai-ext/providers": "0.5.0-beta.
|
|
81
|
-
"@xsai/generate-text": "0.5.0-beta.
|
|
80
|
+
"@xsai-ext/providers": "0.5.0-beta.6",
|
|
81
|
+
"@xsai/generate-text": "0.5.0-beta.6",
|
|
82
82
|
"c12": "^4.0.0-beta.5",
|
|
83
83
|
"date-fns": "^4.4.0",
|
|
84
84
|
"es-toolkit": "1.43.0",
|