agent-skill-evals 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":["configObject","numberSetting","availableMetrics","metricFrom"],"sources":["../../src/assertions/_shared.ts","../../src/assertions/skill-test.ts","../../src/assertions/skill-budget.ts","../../src/skill-checks/assertions-static/_shared.ts","../../src/skill-checks/assertions-static/settings.ts","../../src/skill-checks/assertions-static/context-economy.ts","../../src/skill-checks/assertions-static/executable-helper.ts","../../src/skill-checks/assertions-static/instruction-calibration.ts","../../src/skill-checks/assertions-static/negative-coverage.ts","../../src/skill-checks/assertions-static/routing-metadata.ts","../../src/skill-checks/assertions-static/scenario-validity.ts","../../src/skill-checks/assertions-static/promptfoo.ts","../../src/assertions/promptfoo.ts"],"sourcesContent":["import { join } from \"node:path\";\nimport * as Either from \"effect/Either\";\nimport * as Effect from \"effect/Effect\";\nimport type { AssertionEntry } from \"../assertion-entries.js\";\nimport type {\n AssertionMode,\n EvidenceHandle,\n AgentSkillEvalsAssertionResult,\n WorldHandle,\n} from \"../internal-types.js\";\nimport { decodeEvidenceSnapshotEither } from \"../evidence-schema.js\";\nimport {\n EvidenceCollector,\n evidenceFromSnapshot,\n type AgentSkillEvalsProviderMetadata,\n} from \"../agent/index.js\";\nimport { makeWorldHandle } from \"../agent/world.js\";\nimport {\n getRuntimeCheck,\n RuntimeCheckCatalog,\n RuntimeCheckCatalogLive,\n} from \"../runtime-checks/catalog.js\";\nimport {\n Environment,\n FileSystem,\n NodeServicesLive,\n} from \"../internal-services.js\";\n\nexport interface PromptfooAssertContext {\n vars?: Record<string, unknown>;\n providerResponse?: {\n metadata?: unknown;\n tokenUsage?: {\n total?: number;\n prompt?: number;\n completion?: number;\n cached?: number;\n };\n };\n test?: { vars?: Record<string, unknown> };\n assertion?: { metric?: string };\n assert?: { metric?: string };\n config?: { metric?: string; agentSkillEvals?: unknown };\n metric?: string;\n}\n\nexport interface GradingResult {\n pass: boolean;\n score: number;\n reason: string;\n componentResults?: Array<{ pass: boolean; score: number; reason: string }>;\n metadata?: Record<string, unknown>;\n}\n\nexport async function loadMetadata(\n context: PromptfooAssertContext,\n): Promise<AgentSkillEvalsProviderMetadata | null> {\n return Effect.runPromise(loadMetadataEffect(context).pipe(Effect.provide(NodeServicesLive)));\n}\n\nexport function loadMetadataEffect(\n context: PromptfooAssertContext,\n): Effect.Effect<AgentSkillEvalsProviderMetadata | null, never, FileSystem | Environment> {\n return Effect.gen(function* () {\n const direct = context.providerResponse?.metadata;\n if (direct && typeof direct === \"object\" && \"worldPath\" in direct) {\n return direct as AgentSkillEvalsProviderMetadata;\n }\n // Fallback: AGENT_SKILL_EVALS_RUN_DIR env points at the most recent run.\n const environment = yield* Environment;\n const env = yield* environment.env;\n const runDir = env.AGENT_SKILL_EVALS_RUN_DIR;\n if (runDir) {\n const fs = yield* FileSystem;\n const parsed = yield* fs.readText(join(runDir, \"agent-skill-evals-meta.json\")).pipe(\n Effect.map((buf) => {\n try {\n return JSON.parse(buf) as AgentSkillEvalsProviderMetadata;\n } catch {\n return null;\n }\n }),\n Effect.catchAll(() => Effect.succeed(null)),\n );\n if (parsed) {\n return parsed;\n }\n }\n return null;\n });\n}\n\nexport async function loadEvidence(\n meta: AgentSkillEvalsProviderMetadata,\n): Promise<EvidenceCollector> {\n return Effect.runPromise(loadEvidenceEffect(meta).pipe(Effect.provide(NodeServicesLive)));\n}\n\nexport function loadEvidenceEffect(\n meta: AgentSkillEvalsProviderMetadata,\n): Effect.Effect<EvidenceCollector, Error, FileSystem> {\n return Effect.gen(function* () {\n const fs = yield* FileSystem;\n const buf = yield* fs.readText(meta.evidencePath).pipe(\n Effect.mapError((err) => new Error(`evidence: failed to read ${meta.evidencePath}: ${err instanceof Error ? err.message : String(err)}`)),\n );\n const parsed = yield* Effect.try({\n try: () => JSON.parse(buf) as unknown,\n catch: (err) =>\n new Error(`evidence: invalid JSON in ${meta.evidencePath}: ${err instanceof Error ? err.message : String(err)}`),\n });\n const decoded = decodeEvidenceSnapshotEither(parsed);\n if (Either.isLeft(decoded)) {\n return yield* Effect.fail(\n new Error(`evidence: invalid agent-skill-evals.evidence.v1 payload: ${decoded.left.message}`),\n );\n }\n const snapshot = decoded.right;\n return EvidenceCollector.fromSnapshot(snapshot);\n });\n}\n\nexport function loadWorld(\n meta: AgentSkillEvalsProviderMetadata,\n evidenceCollector: EvidenceCollector,\n): WorldHandle {\n return makeWorldHandle(meta.worldPath, (event) => evidenceCollector.addCommand(event));\n}\n\nexport async function runEntries(\n entries: AssertionEntry[],\n world: WorldHandle,\n evidenceCollector: EvidenceCollector,\n mode: AssertionMode,\n): Promise<AgentSkillEvalsAssertionResult[]> {\n return Effect.runPromise(\n runEntriesEffect(entries, world, evidenceCollector, mode).pipe(\n Effect.provide(RuntimeCheckCatalogLive),\n ),\n );\n}\n\nexport function runEntriesEffect(\n entries: AssertionEntry[],\n world: WorldHandle,\n evidenceCollector: EvidenceCollector,\n mode: AssertionMode,\n): Effect.Effect<AgentSkillEvalsAssertionResult[], never, RuntimeCheckCatalog> {\n return Effect.gen(function* () {\n const results: AgentSkillEvalsAssertionResult[] = [];\n for (const entry of entries) {\n const plugin = yield* getRuntimeCheck(entry.type);\n if (!plugin) {\n results.push({\n pass: false,\n score: 0,\n reason: `unknown effect type: ${entry.type}`,\n });\n continue;\n }\n const r = yield* plugin.verify({\n assertion: entry.args,\n world,\n evidence: evidenceFromSnapshot(evidenceCollector.toSnapshot()),\n mode,\n });\n results.push(r);\n }\n return results;\n });\n}\n\nexport function aggregate(\n results: AgentSkillEvalsAssertionResult[],\n emptyReason: string,\n options: { emptyPass?: boolean } = {},\n): GradingResult {\n if (results.length === 0) {\n const pass = options.emptyPass ?? true;\n return { pass, score: pass ? 1 : 0, reason: emptyReason };\n }\n const allPass = results.every((r) => r.pass);\n const failed = results.filter((r) => !r.pass).map((r) => r.reason);\n return {\n pass: allPass,\n score: allPass ? 1 : 0,\n reason: allPass\n ? `${results.length} check(s) passed`\n : failed.join(\"; \"),\n componentResults: results.map((r) => ({\n pass: r.pass,\n score: r.score,\n reason: r.reason,\n })),\n };\n}\n","import * as Either from \"effect/Either\";\nimport * as Effect from \"effect/Effect\";\nimport {\n aggregate,\n loadEvidenceEffect,\n loadMetadataEffect,\n loadWorld,\n runEntriesEffect,\n type GradingResult,\n type PromptfooAssertContext,\n} from \"./_shared.js\";\nimport { parseRuntimeTestFields } from \"../assertion-entries.js\";\nimport { writeEvidenceToEffect } from \"../agent/evidence.js\";\nimport {\n Environment,\n FileSystem,\n NodeServicesLive,\n} from \"../internal-services.js\";\nimport {\n RuntimeCheckCatalog,\n RuntimeCheckCatalogLive,\n} from \"../runtime-checks/catalog.js\";\n\nexport default async function skillTest(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n return Effect.runPromise(\n skillTestEffect(_output, context).pipe(\n Effect.provide(RuntimeCheckCatalogLive),\n Effect.provide(NodeServicesLive),\n ),\n );\n}\n\nfunction skillTestEffect(\n _output: string,\n context: PromptfooAssertContext,\n): Effect.Effect<GradingResult, never, FileSystem | Environment | RuntimeCheckCatalog> {\n return Effect.gen(function* () {\n const meta = yield* loadMetadataEffect(context);\n if (!meta) {\n return {\n pass: false,\n score: 0,\n reason: \"skill.test: provider metadata missing\",\n };\n }\n\n const preconditionResults = meta.preconditionResults ?? [];\n if (!meta.preconditionsPassed) {\n return aggregate(preconditionResults, \"skill.test: preconditions failed\");\n }\n\n const vars = (context.vars ?? context.test?.vars ?? {}) as Record<string, unknown>;\n const loadedEvidence = yield* Effect.either(loadEvidenceEffect(meta));\n if (Either.isLeft(loadedEvidence)) {\n const err = loadedEvidence.left;\n return { pass: false, score: 0, reason: err instanceof Error ? err.message : String(err) };\n }\n const evidenceCollector = loadedEvidence.right;\n const world = loadWorld(meta, evidenceCollector);\n\n const parsed = parseRuntimeTestFields(vars);\n const parseResults = parsed.errors\n .filter((error) => error.field !== \"preconditions\")\n .map((error) => ({\n pass: false,\n score: 0,\n reason: `runtime test field ${error.index === undefined ? error.field : `${error.field}[${error.index}]`}: ${error.reason}`,\n }));\n const shouldResults = yield* runEntriesEffect(parsed.should, world, evidenceCollector, \"should\");\n const shouldNotResults = yield* runEntriesEffect(parsed.should_not, world, evidenceCollector, \"should_not\");\n yield* writeEvidenceToEffect(evidenceCollector, meta.runDir).pipe(Effect.orDie);\n const results = [...preconditionResults, ...parseResults, ...shouldResults, ...shouldNotResults];\n return aggregate(results, \"skill.test: no Runtime Test Fields checks declared\", {\n emptyPass: false,\n });\n });\n}\n","import type { GradingResult, PromptfooAssertContext } from \"./_shared.js\";\n\ninterface TokenUsage {\n total?: number;\n prompt?: number;\n completion?: number;\n cached?: number;\n}\n\ninterface BudgetSettings {\n maxTotalTokens?: number;\n maxPromptTokens?: number;\n maxCompletionTokens?: number;\n maxCachedTokens?: number;\n}\n\nconst budgetFields = [\n [\"total\", \"maxTotalTokens\"],\n [\"prompt\", \"maxPromptTokens\"],\n [\"completion\", \"maxCompletionTokens\"],\n [\"cached\", \"maxCachedTokens\"],\n] as const;\n\nfunction configObject(value: unknown): Record<string, unknown> {\n return value && typeof value === \"object\" && !Array.isArray(value)\n ? value as Record<string, unknown>\n : {};\n}\n\nfunction numberSetting(config: Record<string, unknown>, key: keyof BudgetSettings): number | undefined {\n const value = config[key];\n return typeof value === \"number\" && Number.isFinite(value) ? value : undefined;\n}\n\nfunction budgetSettings(context: PromptfooAssertContext): BudgetSettings {\n const config = configObject(context.config?.agentSkillEvals);\n return {\n maxTotalTokens: numberSetting(config, \"maxTotalTokens\"),\n maxPromptTokens: numberSetting(config, \"maxPromptTokens\"),\n maxCompletionTokens: numberSetting(config, \"maxCompletionTokens\"),\n maxCachedTokens: numberSetting(config, \"maxCachedTokens\"),\n };\n}\n\nfunction tokenUsage(context: PromptfooAssertContext): TokenUsage | undefined {\n const usage = context.providerResponse?.tokenUsage;\n return usage && typeof usage === \"object\" && !Array.isArray(usage)\n ? usage as TokenUsage\n : undefined;\n}\n\nexport default async function skillBudget(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const usage = tokenUsage(context);\n if (!usage) {\n return {\n pass: false,\n score: 0,\n reason: \"skill.budget: provider tokenUsage missing\",\n };\n }\n\n const settings = budgetSettings(context);\n const configured = budgetFields.filter(([, limitKey]) => settings[limitKey] !== undefined);\n if (configured.length === 0) {\n return {\n pass: false,\n score: 0,\n reason: \"skill.budget: configure at least one token limit\",\n };\n }\n\n const components = configured.map(([usageKey, limitKey]) => {\n const actual = usage[usageKey];\n const limit = settings[limitKey] ?? 0;\n if (typeof actual !== \"number\" || !Number.isFinite(actual)) {\n return {\n pass: false,\n score: 0,\n reason: `${usageKey} tokens missing`,\n };\n }\n return {\n pass: actual <= limit,\n score: actual <= limit ? 1 : 0,\n reason: `${usageKey} tokens ${actual} <= ${limit}`,\n };\n });\n const failed = components.filter((component) => !component.pass);\n return {\n pass: failed.length === 0,\n score: failed.length === 0 ? 1 : 0,\n reason: failed.length === 0\n ? `skill.budget: ${components.length} budget(s) passed`\n : failed.map((component) => component.reason).join(\"; \"),\n componentResults: components,\n };\n}\n","import type { StaticProviderMetadata } from \"../index.js\";\n\nexport interface PromptfooAssertContext {\n vars?: Record<string, unknown>;\n providerResponse?: { metadata?: unknown };\n test?: { vars?: Record<string, unknown> };\n assertion?: { metric?: string };\n assert?: { metric?: string };\n config?: { metric?: string; agentSkillEvals?: unknown };\n metric?: string;\n}\n\nexport interface GradingResult {\n pass: boolean;\n score: number;\n reason: string;\n componentResults?: Array<{ pass: boolean; score: number; reason: string }>;\n}\n\nexport function getStaticMeta(\n context: PromptfooAssertContext,\n): StaticProviderMetadata | null {\n const m = context.providerResponse?.metadata;\n if (m && typeof m === \"object\" && (\"skill\" in m || \"tests\" in m)) {\n return m as StaticProviderMetadata;\n }\n return null;\n}\n\nexport function pass(reason: string, components?: GradingResult[\"componentResults\"]): GradingResult {\n return { pass: true, score: 1, reason, componentResults: components };\n}\n\nexport function fail(reason: string, components?: GradingResult[\"componentResults\"]): GradingResult {\n return { pass: false, score: 0, reason, componentResults: components };\n}\n","import * as Schema from \"effect/Schema\";\nimport type { PromptfooAssertContext } from \"./_shared.js\";\n\nexport interface SkillCheckSettings {\n maxSkillLines: number;\n destructiveEffects: readonly string[];\n requireTokenBudget: boolean;\n riskyEffects: readonly string[];\n}\n\nconst DEFAULT_SETTINGS: SkillCheckSettings = {\n maxSkillLines: 200,\n destructiveEffects: [\"file.changes_outside_scope\", \"tool.called\"],\n requireTokenBudget: false,\n riskyEffects: [\"file.changes_outside_scope\", \"tool.called\"],\n};\n\nconst StringArraySchema = Schema.Array(Schema.String);\n\nfunction configObject(value: unknown): Record<string, unknown> {\n return value && typeof value === \"object\" && !Array.isArray(value)\n ? value as Record<string, unknown>\n : {};\n}\n\nfunction numberSetting(\n config: Record<string, unknown>,\n key: string,\n fallback: number,\n): number {\n return Schema.is(Schema.Number)(config[key]) ? config[key] : fallback;\n}\n\nfunction stringArraySetting(\n config: Record<string, unknown>,\n key: string,\n fallback: readonly string[],\n): readonly string[] {\n return Schema.is(StringArraySchema)(config[key]) ? config[key] : fallback;\n}\n\nfunction booleanSetting(\n config: Record<string, unknown>,\n key: string,\n fallback: boolean,\n): boolean {\n return typeof config[key] === \"boolean\" ? config[key] : fallback;\n}\n\nexport function skillCheckSettings(context: PromptfooAssertContext): SkillCheckSettings {\n const config = configObject(context.config?.agentSkillEvals);\n return {\n maxSkillLines: numberSetting(config, \"maxSkillLines\", DEFAULT_SETTINGS.maxSkillLines),\n destructiveEffects: stringArraySetting(\n config,\n \"destructiveEffects\",\n DEFAULT_SETTINGS.destructiveEffects,\n ),\n requireTokenBudget: booleanSetting(\n config,\n \"requireTokenBudget\",\n DEFAULT_SETTINGS.requireTokenBudget,\n ),\n riskyEffects: stringArraySetting(\n config,\n \"riskyEffects\",\n DEFAULT_SETTINGS.riskyEffects,\n ),\n };\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\nimport { skillCheckSettings } from \"./settings.js\";\n\n/**\n * SPEC §7.2 — context economy. Hard: referenced files must exist (already\n * surfaced via missingFiles). Warnings: SKILL.md size, reference depth,\n * missing TOC in long reference files.\n *\n * Implemented as warnings-only: this assertion always passes, but emits a\n * `score < 1` reason describing how SKILL.md compares to thresholds.\n */\nexport default async function contextEconomy(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"context-economy: provider metadata missing\");\n const skill = meta.skill;\n if (!skill) return fail(\"context-economy: skill not parsed\");\n\n const { maxSkillLines: maxLines } = skillCheckSettings(context);\n\n const components: GradingResult[\"componentResults\"] = [];\n\n components.push({\n pass: skill.totalLines <= maxLines,\n score: skill.totalLines <= maxLines ? 1 : 0.5,\n reason: `SKILL.md ${skill.totalLines} line(s) (limit ${maxLines})`,\n });\n\n components.push({\n pass: skill.missingReferences.length === 0,\n score: skill.missingReferences.length === 0 ? 1 : 0,\n reason:\n skill.missingReferences.length === 0\n ? `${skill.references.length} reference(s) all resolved`\n : `missing references: ${skill.missingReferences.slice(0, 3).join(\", \")}`,\n });\n\n // Hard fail only on missing references; soft on size.\n const failed = components.filter((c) => !c.pass && c.reason.startsWith(\"missing references\"));\n if (failed.length === 0) {\n const oversize = components.find((c) => c.score < 1);\n return oversize\n ? { pass: true, score: 0.5, reason: oversize.reason, componentResults: components }\n : pass(`context-economy: ok`, components);\n }\n return fail(failed.map((f) => `✗ ${f.reason}`).join(\"; \"), components);\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\n\n/**\n * SPEC §7.4 — verifier scripts referenced by tests must exist and be\n * runnable. Generated tests must not reference missing fixtures.\n */\nexport default async function executableHelper(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"executable-helper: provider metadata missing\");\n const tests = meta.tests;\n if (!tests) return fail(\"executable-helper: tests not parsed\");\n\n const components: GradingResult[\"componentResults\"] = [];\n\n components.push({\n pass: tests.missingVerifierScripts.length === 0,\n score: tests.missingVerifierScripts.length === 0 ? 1 : 0,\n reason: tests.missingVerifierScripts.length === 0\n ? `${tests.verifierScripts.length} verifier script(s) all present`\n : `missing verifier scripts: ${tests.missingVerifierScripts.slice(0, 3).join(\", \")}`,\n });\n\n components.push({\n pass: tests.nonExecutableVerifierScripts.length === 0,\n score: tests.nonExecutableVerifierScripts.length === 0 ? 1 : 0,\n reason: tests.nonExecutableVerifierScripts.length === 0\n ? `${tests.verifierScripts.length} verifier script(s) executable`\n : `non-executable verifier scripts: ${tests.nonExecutableVerifierScripts.slice(0, 3).join(\", \")}`,\n });\n\n components.push({\n pass: tests.missingFixturePaths.length === 0,\n score: tests.missingFixturePaths.length === 0 ? 1 : 0,\n reason: tests.missingFixturePaths.length === 0\n ? `${tests.fixturePaths.length} fixture(s) all present`\n : `missing fixtures: ${tests.missingFixturePaths.slice(0, 3).join(\", \")}`,\n });\n\n const failed = components.filter((c) => !c.pass);\n if (failed.length === 0) return pass(\"executable-helper: ok\", components);\n return fail(failed.map((f) => `✗ ${f.reason}`).join(\"; \"), components);\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\nimport { skillCheckSettings } from \"./settings.js\";\n\n/**\n * SPEC §7.3 — for skills whose test pack uses destructive effects, the\n * SKILL.md must contain confirmation/clarification language and at least\n * one negative test must declare forbidden effects.\n *\n * `assert.config.agentSkillEvals.destructiveEffects` overrides the default list.\n */\nconst CONFIRMATION_RE =\n /\\b(confirm|ask first|do not.*without|before.*push|require.*approval|do not.*destructive)\\b/i;\nconst PLAN_BEFORE_ACT_RE =\n /\\b(plan first|plan before|read.*before.*write|validate.*before|dry.run)\\b/i;\n\nexport default async function instructionCalibration(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"skill.instructions: provider metadata missing\");\n const skill = meta.skill;\n const tests = meta.tests;\n if (!skill) return fail(\"skill.instructions: skill not parsed\");\n\n const destructive = new Set<string>(skillCheckSettings(context).destructiveEffects);\n\n const usesDestructive = tests\n ? tests.tests.some((t) => t.effectTypes.some((e) => destructive.has(e)))\n : false;\n\n if (!usesDestructive) {\n return pass(\"skill.instructions: no destructive effects in test pack\");\n }\n\n const components: GradingResult[\"componentResults\"] = [];\n const hasConfirm = CONFIRMATION_RE.test(skill.body) || PLAN_BEFORE_ACT_RE.test(skill.body);\n components.push({\n pass: hasConfirm,\n score: hasConfirm ? 1 : 0,\n reason: hasConfirm\n ? \"SKILL.md describes confirmation / plan-before-act\"\n : \"SKILL.md uses destructive effects but lacks confirmation / plan-before-act language\",\n });\n\n const declaresForbidden = tests\n ? tests.tests.some((t) => Array.isArray(t.vars.should_not) && t.vars.should_not.length > 0)\n : false;\n components.push({\n pass: declaresForbidden,\n score: declaresForbidden ? 1 : 0,\n reason: declaresForbidden\n ? \"test pack declares forbidden effects (should_not)\"\n : \"no should_not declared in any test, despite destructive effects\",\n });\n\n const failed = components.filter((c) => !c.pass);\n if (failed.length === 0) return pass(\"skill.instructions: ok\", components);\n return fail(failed.map((f) => `✗ ${f.reason}`).join(\"; \"), components);\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\nimport { skillCheckSettings } from \"./settings.js\";\n\n/**\n * SPEC §7.6 — risky skills (those whose test pack uses any risky effect)\n * must include at least one negative test.\n *\n * `assert.config.agentSkillEvals.riskyEffects` overrides the default risky-effect list.\n */\nexport default async function negativeCoverage(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"skill.tests: provider metadata missing\");\n const tests = meta.tests;\n if (!tests) return fail(\"skill.tests: tests not parsed\");\n\n const risky = new Set<string>(skillCheckSettings(context).riskyEffects);\n\n const usesRisky = tests.tests.some((t) => t.effectTypes.some((e) => risky.has(e)));\n if (!usesRisky) {\n return pass(\"skill.tests: no risky effects, negative test not required\");\n }\n\n const negatives = tests.tests.filter((t) => t.isNegative);\n if (negatives.length === 0) {\n return fail(\n `skill.tests: skill uses risky effects (${[...risky].join(\", \")}) but no negative test (kind: negative) is declared`,\n );\n }\n return pass(`skill.tests: ${negatives.length} negative test(s)`);\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\n\nconst GENERIC_PHRASES = [\n /\\bhelp(s|ing)?\\b/i,\n /\\bgithub workflows?\\b/i,\n /\\bvarious\\b/i,\n /\\bany kind of\\b/i,\n];\n\nconst WHEN_TO_USE_RE = /\\b(use when|use this|when (?:the )?(?:user|you))\\b/i;\nconst WHEN_NOT_RE = /\\bdo not use|do not invoke|don'?t use|not for|avoid using\\b/i;\n\n/**\n * SPEC §7.1 — routing metadata hard checks.\n */\nexport default async function routingMetadata(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"routing-metadata: provider metadata missing\");\n const skill = meta.skill;\n if (!skill) return fail(\"routing-metadata: skill not parsed (vars.skillPath missing?)\");\n\n const fm = skill.frontmatter;\n const components: GradingResult[\"componentResults\"] = [];\n\n const name = typeof fm.name === \"string\" ? fm.name.trim() : \"\";\n components.push({\n pass: name.length > 0,\n score: name.length > 0 ? 1 : 0,\n reason: name ? `name: ${name}` : \"missing `name` frontmatter\",\n });\n\n const desc = typeof fm.description === \"string\" ? fm.description.trim() : \"\";\n components.push({\n pass: desc.length > 0,\n score: desc.length > 0 ? 1 : 0,\n reason: desc ? `description present (${desc.length} chars)` : \"missing `description` frontmatter\",\n });\n\n const sayWhen = WHEN_TO_USE_RE.test(desc);\n components.push({\n pass: sayWhen,\n score: sayWhen ? 1 : 0,\n reason: sayWhen ? \"description says when to use\" : \"description does not say when to use (e.g. 'Use when …')\",\n });\n\n const sayWhenNot = WHEN_NOT_RE.test(desc);\n components.push({\n pass: sayWhenNot,\n score: sayWhenNot ? 1 : 0,\n reason: sayWhenNot ? \"description says when not to use\" : \"description does not say when not to use (e.g. 'Do not use for …')\",\n });\n\n const generic = GENERIC_PHRASES.some((re) => re.test(desc)) && desc.length < 80;\n components.push({\n pass: !generic,\n score: generic ? 0 : 1,\n reason: generic ? \"description is too generic\" : \"description is specific enough\",\n });\n\n const failed = components.filter((c) => !c.pass);\n if (failed.length === 0) {\n return pass(`routing-metadata: ${components.length}/${components.length} checks ok`, components);\n }\n return fail(\n failed.map((f) => `✗ ${f.reason}`).join(\"; \"),\n components,\n );\n}\n","import { fail, getStaticMeta, pass, type GradingResult, type PromptfooAssertContext } from \"./_shared.js\";\nimport { skillCheckSettings } from \"./settings.js\";\n\n/**\n * SPEC §7.6 — every runtime test must have prompt + fixture (or fixtureless)\n * + at least one of should/should_not. Unsupported effect types fail\n * static validation.\n */\nexport default async function scenarioValidity(\n _output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const meta = getStaticMeta(context);\n if (!meta) return fail(\"skill.tests: provider metadata missing\");\n const tests = meta.tests;\n if (!tests) return fail(\"skill.tests: tests not parsed (vars.testsGlob missing?)\");\n const settings = skillCheckSettings(context);\n\n const components: GradingResult[\"componentResults\"] = [];\n\n if (tests.parseErrors.length > 0) {\n components.push({\n pass: false,\n score: 0,\n reason: `parse errors: ${tests.parseErrors.map((e) => `${e.filePath}: ${e.error}`).join(\"; \")}`,\n });\n }\n\n if (tests.matchedFiles.length === 0) {\n components.push({\n pass: false,\n score: 0,\n reason: \"testsGlob matched no test files\",\n });\n }\n\n for (const t of tests.tests) {\n const issues: string[] = [];\n for (const error of t.entryErrors) {\n const at = error.index === undefined ? error.field : `${error.field}[${error.index}]`;\n issues.push(`${at}: ${error.reason}`);\n }\n if (typeof t.vars.prompt !== \"string\" || t.vars.prompt.length === 0) {\n issues.push(\"missing vars.prompt\");\n }\n if (!t.hasFixture) {\n issues.push(\"missing vars.fixture (or vars.fixtureless: true)\");\n }\n const hasCheck =\n (Array.isArray(t.vars.should) && t.vars.should.length > 0) ||\n (Array.isArray(t.vars.should_not) && t.vars.should_not.length > 0);\n if (!hasCheck) issues.push(\"no should / should_not\");\n if (settings.requireTokenBudget && !t.hasTokenBudget) {\n issues.push(\"missing skill.budget assertion\");\n }\n components.push({\n pass: issues.length === 0,\n score: issues.length === 0 ? 1 : 0,\n reason: `${t.description ?? \"(no description)\"}: ${issues.length === 0 ? \"ok\" : issues.join(\", \")}`,\n });\n }\n\n if (meta.unresolvedEffectTypes.length > 0) {\n components.push({\n pass: false,\n score: 0,\n reason: `unsupported effect types: ${meta.unresolvedEffectTypes.join(\", \")}`,\n });\n }\n\n if (meta.missingFiles.length > 0) {\n components.push({\n pass: false,\n score: 0,\n reason: `missing referenced files: ${meta.missingFiles.slice(0, 5).join(\", \")}`,\n });\n }\n\n const failed = components.filter((c) => !c.pass);\n if (failed.length === 0) {\n return pass(`skill.tests: ${tests.tests.length} test(s) ok`, components);\n }\n return fail(\n failed.map((f) => `✗ ${f.reason}`).join(\"; \"),\n components,\n );\n}\n","import * as Effect from \"effect/Effect\";\nimport contextEconomy from \"./context-economy.js\";\nimport executableHelper from \"./executable-helper.js\";\nimport instructionCalibration from \"./instruction-calibration.js\";\nimport negativeCoverage from \"./negative-coverage.js\";\nimport routingMetadata from \"./routing-metadata.js\";\nimport scenarioValidity from \"./scenario-validity.js\";\nimport type { GradingResult, PromptfooAssertContext } from \"./_shared.js\";\n\ntype AssertionFn = (\n output: string,\n context: PromptfooAssertContext,\n) => Promise<GradingResult>;\n\nconst metricChecks: Record<string, readonly AssertionFn[]> = {\n \"skill.activation\": [routingMetadata],\n \"skill.budgets\": [scenarioValidity],\n \"skill.context\": [contextEconomy],\n \"skill.instructions\": [instructionCalibration],\n \"skill.tests\": [scenarioValidity, negativeCoverage],\n \"skill.verifiers\": [executableHelper],\n};\n\nmetricChecks[\"skill.checks\"] = [\n routingMetadata,\n contextEconomy,\n instructionCalibration,\n scenarioValidity,\n negativeCoverage,\n executableHelper,\n];\n\nconst availableMetrics = Object.keys(metricChecks).sort().join(\", \");\n\nfunction metricFrom(context: PromptfooAssertContext): string | undefined {\n const candidates = [\n context.assertion?.metric,\n context.assert?.metric,\n context.config?.metric,\n context.metric,\n ];\n return candidates.find((metric): metric is string => typeof metric === \"string\");\n}\n\nasync function runChecks(\n output: string,\n context: PromptfooAssertContext,\n checks: readonly AssertionFn[],\n): Promise<GradingResult> {\n return Effect.runPromise(runChecksEffect(output, context, checks));\n}\n\nfunction runChecksEffect(\n output: string,\n context: PromptfooAssertContext,\n checks: readonly AssertionFn[],\n): Effect.Effect<GradingResult> {\n return Effect.gen(function* () {\n const results = yield* Effect.forEach(\n checks,\n (check) => Effect.promise(() => check(output, context)),\n { concurrency: \"unbounded\" },\n );\n const failed = results.filter((result) => !result.pass);\n const soft = results.filter((result) => result.pass && result.score < 1);\n return {\n pass: failed.length === 0,\n score: failed.length === 0\n ? Math.min(...results.map((result) => result.score))\n : 0,\n reason: failed.length === 0\n ? soft.length > 0\n ? soft.map((result) => result.reason).join(\"; \")\n : `skill checks: ${results.length} check(s) passed`\n : failed.map((result) => result.reason).join(\"; \"),\n componentResults: results,\n };\n });\n}\n\nexport async function agentSkillEvalsStaticAssertions(\n output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n return Effect.runPromise(agentSkillEvalsStaticAssertionsEffect(output, context));\n}\n\nfunction agentSkillEvalsStaticAssertionsEffect(\n output: string,\n context: PromptfooAssertContext,\n): Effect.Effect<GradingResult> {\n return Effect.gen(function* () {\n const metric = metricFrom(context);\n const checks = metric ? metricChecks[metric] : undefined;\n if (!checks) {\n return {\n pass: false,\n score: 0,\n reason: `agent-skill-evals skill checks: unknown metric \"${metric ?? \"missing\"}\". Available metrics: ${availableMetrics}`,\n };\n }\n return yield* runChecksEffect(output, context, checks);\n });\n}\n\nexport default agentSkillEvalsStaticAssertions;\n","import skillTest from \"./skill-test.js\";\nimport skillBudget from \"./skill-budget.js\";\nimport { agentSkillEvalsStaticAssertions } from \"../skill-checks/assertions-static/promptfoo.js\";\nimport type { GradingResult, PromptfooAssertContext } from \"./_shared.js\";\n\ntype AssertionFn = (\n output: string,\n context: PromptfooAssertContext,\n) => Promise<GradingResult>;\n\nconst routes: Record<string, AssertionFn> = {\n \"skill.budget\": skillBudget,\n \"skill.test\": skillTest,\n};\nconst staticMetrics = [\n \"skill.checks\",\n \"skill.activation\",\n \"skill.budgets\",\n \"skill.context\",\n \"skill.instructions\",\n \"skill.tests\",\n \"skill.verifiers\",\n];\nconst availableMetrics = [...Object.keys(routes), ...staticMetrics].sort().join(\", \");\n\nexport { skillTest };\n\nfunction metricFrom(context: PromptfooAssertContext): string | undefined {\n const candidates = [\n context.assertion?.metric,\n context.assert?.metric,\n context.config?.metric,\n context.metric,\n ];\n return candidates.find((metric): metric is string => typeof metric === \"string\");\n}\n\nexport default async function agentSkillEvalsAssertions(\n output: string,\n context: PromptfooAssertContext,\n): Promise<GradingResult> {\n const metric = metricFrom(context);\n const assertion = metric ? routes[metric] : undefined;\n if (assertion) {\n return assertion(output, context);\n }\n if (metric && staticMetrics.includes(metric)) {\n return agentSkillEvalsStaticAssertions(output, context);\n }\n return {\n pass: false,\n score: 0,\n reason: `agent-skill-evals assertions: unknown metric \"${metric ?? \"missing\"}\". Available metrics: ${availableMetrics}`,\n };\n}\n"],"mappings":";;;;;;;AA4DA,SAAgB,mBACd,SACwF;AACxF,QAAO,OAAO,IAAI,aAAa;EAC/B,MAAM,SAAS,QAAQ,kBAAkB;AACzC,MAAI,UAAU,OAAO,WAAW,YAAY,eAAe,OACzD,QAAO;EAKT,MAAM,UAAS,QADI,OADQ,aACI,KACZ;AACnB,MAAI,QAAQ;GAEV,MAAM,SAAS,QAAO,OADJ,YACO,SAAS,KAAK,QAAQ,8BAA8B,CAAC,CAAC,KAC7E,OAAO,KAAK,QAAQ;AAClB,QAAI;AACF,YAAO,KAAK,MAAM,IAAI;YAChB;AACN,YAAO;;KAET,EACF,OAAO,eAAe,OAAO,QAAQ,KAAK,CAAC,CAC5C;AACD,OAAI,OACF,QAAO;;AAGX,SAAO;GACL;;AASJ,SAAgB,mBACd,MACqD;AACrD,QAAO,OAAO,IAAI,aAAa;EAE/B,MAAM,MAAM,QAAO,OADD,YACI,SAAS,KAAK,aAAa,CAAC,KAChD,OAAO,UAAU,wBAAQ,IAAI,MAAM,4BAA4B,KAAK,aAAa,IAAI,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAI,GAAG,CAAC,CAC1I;EAMD,MAAM,UAAU,6BAA6B,OALvB,OAAO,IAAI;GAC/B,WAAW,KAAK,MAAM,IAAI;GAC1B,QAAQ,wBACN,IAAI,MAAM,6BAA6B,KAAK,aAAa,IAAI,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAI,GAAG;GACnH,CAAC,CACkD;AACpD,MAAI,OAAO,OAAO,QAAQ,CACxB,QAAO,OAAO,OAAO,qBACnB,IAAI,MAAM,4DAA4D,QAAQ,KAAK,UAAU,CAC9F;EAEH,MAAM,WAAW,QAAQ;AACzB,SAAO,kBAAkB,aAAa,SAAS;GAC7C;;AAGJ,SAAgB,UACd,MACA,mBACa;AACb,QAAO,gBAAgB,KAAK,YAAY,UAAU,kBAAkB,WAAW,MAAM,CAAC;;AAgBxF,SAAgB,iBACd,SACA,OACA,mBACA,MAC6E;AAC7E,QAAO,OAAO,IAAI,aAAa;EAC/B,MAAM,UAA4C,EAAE;AACpD,OAAK,MAAM,SAAS,SAAS;GAC3B,MAAM,SAAS,OAAO,gBAAgB,MAAM,KAAK;AACjD,OAAI,CAAC,QAAQ;AACX,YAAQ,KAAK;KACX,MAAM;KACN,OAAO;KACP,QAAQ,wBAAwB,MAAM;KACvC,CAAC;AACF;;GAEF,MAAM,IAAI,OAAO,OAAO,OAAO;IAC7B,WAAW,MAAM;IACjB;IACA,UAAU,qBAAqB,kBAAkB,YAAY,CAAC;IAC9D;IACD,CAAC;AACF,WAAQ,KAAK,EAAE;;AAEjB,SAAO;GACL;;AAGJ,SAAgB,UACd,SACA,aACA,UAAmC,EAAE,EACtB;AACf,KAAI,QAAQ,WAAW,GAAG;EACxB,MAAM,OAAO,QAAQ,aAAa;AAClC,SAAO;GAAE;GAAM,OAAO,OAAO,IAAI;GAAG,QAAQ;GAAa;;CAE3D,MAAM,UAAU,QAAQ,OAAO,MAAM,EAAE,KAAK;CAC5C,MAAM,SAAS,QAAQ,QAAQ,MAAM,CAAC,EAAE,KAAK,CAAC,KAAK,MAAM,EAAE,OAAO;AAClE,QAAO;EACL,MAAM;EACN,OAAO,UAAU,IAAI;EACrB,QAAQ,UACJ,GAAG,QAAQ,OAAO,oBAClB,OAAO,KAAK,KAAK;EACrB,kBAAkB,QAAQ,KAAK,OAAO;GACpC,MAAM,EAAE;GACR,OAAO,EAAE;GACT,QAAQ,EAAE;GACX,EAAE;EACJ;;;;AC3KH,eAA8B,UAC5B,SACA,SACwB;AACxB,QAAO,OAAO,WACZ,gBAAgB,SAAS,QAAQ,CAAC,KAChC,OAAO,QAAQ,wBAAwB,EACvC,OAAO,QAAQ,iBAAiB,CACjC,CACF;;AAGH,SAAS,gBACP,SACA,SACqF;AACrF,QAAO,OAAO,IAAI,aAAa;EAC7B,MAAM,OAAO,OAAO,mBAAmB,QAAQ;AAC/C,MAAI,CAAC,KACH,QAAO;GACL,MAAM;GACN,OAAO;GACP,QAAQ;GACT;EAGH,MAAM,sBAAsB,KAAK,uBAAuB,EAAE;AAC1D,MAAI,CAAC,KAAK,oBACR,QAAO,UAAU,qBAAqB,mCAAmC;EAG3E,MAAM,OAAQ,QAAQ,QAAQ,QAAQ,MAAM,QAAQ,EAAE;EACtD,MAAM,iBAAiB,OAAO,OAAO,OAAO,mBAAmB,KAAK,CAAC;AACrE,MAAI,OAAO,OAAO,eAAe,EAAE;GACjC,MAAM,MAAM,eAAe;AAC3B,UAAO;IAAE,MAAM;IAAO,OAAO;IAAG,QAAQ,eAAe,QAAQ,IAAI,UAAU,OAAO,IAAI;IAAE;;EAE5F,MAAM,oBAAoB,eAAe;EACzC,MAAM,QAAQ,UAAU,MAAM,kBAAkB;EAEhD,MAAM,SAAS,uBAAuB,KAAK;EAC3C,MAAM,eAAe,OAAO,OACzB,QAAQ,UAAU,MAAM,UAAU,gBAAgB,CAClD,KAAK,WAAW;GACf,MAAM;GACN,OAAO;GACP,QAAQ,sBAAsB,MAAM,UAAU,KAAA,IAAY,MAAM,QAAQ,GAAG,MAAM,MAAM,GAAG,MAAM,MAAM,GAAG,IAAI,MAAM;GACpH,EAAE;EACL,MAAM,gBAAgB,OAAO,iBAAiB,OAAO,QAAQ,OAAO,mBAAmB,SAAS;EAChG,MAAM,mBAAmB,OAAO,iBAAiB,OAAO,YAAY,OAAO,mBAAmB,aAAa;AAC3G,SAAO,sBAAsB,mBAAmB,KAAK,OAAO,CAAC,KAAK,OAAO,MAAM;AAE/E,SAAO,UAAU;GADA,GAAG;GAAqB,GAAG;GAAc,GAAG;GAAe,GAAG;GACvD,EAAE,sDAAsD,EAC9E,WAAW,OACZ,CAAC;GACF;;;;AC9DJ,MAAM,eAAe;CACnB,CAAC,SAAS,iBAAiB;CAC3B,CAAC,UAAU,kBAAkB;CAC7B,CAAC,cAAc,sBAAsB;CACrC,CAAC,UAAU,kBAAkB;CAC9B;AAED,SAASA,eAAa,OAAyC;AAC7D,QAAO,SAAS,OAAO,UAAU,YAAY,CAAC,MAAM,QAAQ,MAAM,GAC9D,QACA,EAAE;;AAGR,SAASC,gBAAc,QAAiC,KAA+C;CACrG,MAAM,QAAQ,OAAO;AACrB,QAAO,OAAO,UAAU,YAAY,OAAO,SAAS,MAAM,GAAG,QAAQ,KAAA;;AAGvE,SAAS,eAAe,SAAiD;CACvE,MAAM,SAASD,eAAa,QAAQ,QAAQ,gBAAgB;AAC5D,QAAO;EACL,gBAAgBC,gBAAc,QAAQ,iBAAiB;EACvD,iBAAiBA,gBAAc,QAAQ,kBAAkB;EACzD,qBAAqBA,gBAAc,QAAQ,sBAAsB;EACjE,iBAAiBA,gBAAc,QAAQ,kBAAkB;EAC1D;;AAGH,SAAS,WAAW,SAAyD;CAC3E,MAAM,QAAQ,QAAQ,kBAAkB;AACxC,QAAO,SAAS,OAAO,UAAU,YAAY,CAAC,MAAM,QAAQ,MAAM,GAC9D,QACA,KAAA;;AAGN,eAA8B,YAC5B,SACA,SACwB;CACxB,MAAM,QAAQ,WAAW,QAAQ;AACjC,KAAI,CAAC,MACH,QAAO;EACL,MAAM;EACN,OAAO;EACP,QAAQ;EACT;CAGH,MAAM,WAAW,eAAe,QAAQ;CACxC,MAAM,aAAa,aAAa,QAAQ,GAAG,cAAc,SAAS,cAAc,KAAA,EAAU;AAC1F,KAAI,WAAW,WAAW,EACxB,QAAO;EACL,MAAM;EACN,OAAO;EACP,QAAQ;EACT;CAGH,MAAM,aAAa,WAAW,KAAK,CAAC,UAAU,cAAc;EAC1D,MAAM,SAAS,MAAM;EACrB,MAAM,QAAQ,SAAS,aAAa;AACpC,MAAI,OAAO,WAAW,YAAY,CAAC,OAAO,SAAS,OAAO,CACxD,QAAO;GACL,MAAM;GACN,OAAO;GACP,QAAQ,GAAG,SAAS;GACrB;AAEH,SAAO;GACL,MAAM,UAAU;GAChB,OAAO,UAAU,QAAQ,IAAI;GAC7B,QAAQ,GAAG,SAAS,UAAU,OAAO,MAAM;GAC5C;GACD;CACF,MAAM,SAAS,WAAW,QAAQ,cAAc,CAAC,UAAU,KAAK;AAChE,QAAO;EACL,MAAM,OAAO,WAAW;EACxB,OAAO,OAAO,WAAW,IAAI,IAAI;EACjC,QAAQ,OAAO,WAAW,IACtB,iBAAiB,WAAW,OAAO,qBACnC,OAAO,KAAK,cAAc,UAAU,OAAO,CAAC,KAAK,KAAK;EAC1D,kBAAkB;EACnB;;;;AC/EH,SAAgB,cACd,SAC+B;CAC/B,MAAM,IAAI,QAAQ,kBAAkB;AACpC,KAAI,KAAK,OAAO,MAAM,aAAa,WAAW,KAAK,WAAW,GAC5D,QAAO;AAET,QAAO;;AAGT,SAAgB,KAAK,QAAgB,YAA+D;AAClG,QAAO;EAAE,MAAM;EAAM,OAAO;EAAG;EAAQ,kBAAkB;EAAY;;AAGvE,SAAgB,KAAK,QAAgB,YAA+D;AAClG,QAAO;EAAE,MAAM;EAAO,OAAO;EAAG;EAAQ,kBAAkB;EAAY;;;;ACxBxE,MAAM,mBAAuC;CAC3C,eAAe;CACf,oBAAoB,CAAC,8BAA8B,cAAc;CACjE,oBAAoB;CACpB,cAAc,CAAC,8BAA8B,cAAc;CAC5D;AAED,MAAM,oBAAoB,OAAO,MAAM,OAAO,OAAO;AAErD,SAAS,aAAa,OAAyC;AAC7D,QAAO,SAAS,OAAO,UAAU,YAAY,CAAC,MAAM,QAAQ,MAAM,GAC9D,QACA,EAAE;;AAGR,SAAS,cACP,QACA,KACA,UACQ;AACR,QAAO,OAAO,GAAG,OAAO,OAAO,CAAC,OAAO,KAAK,GAAG,OAAO,OAAO;;AAG/D,SAAS,mBACP,QACA,KACA,UACmB;AACnB,QAAO,OAAO,GAAG,kBAAkB,CAAC,OAAO,KAAK,GAAG,OAAO,OAAO;;AAGnE,SAAS,eACP,QACA,KACA,UACS;AACT,QAAO,OAAO,OAAO,SAAS,YAAY,OAAO,OAAO;;AAG1D,SAAgB,mBAAmB,SAAqD;CACtF,MAAM,SAAS,aAAa,QAAQ,QAAQ,gBAAgB;AAC5D,QAAO;EACL,eAAe,cAAc,QAAQ,iBAAiB,iBAAiB,cAAc;EACrF,oBAAoB,mBAClB,QACA,sBACA,iBAAiB,mBAClB;EACD,oBAAoB,eAClB,QACA,sBACA,iBAAiB,mBAClB;EACD,cAAc,mBACZ,QACA,gBACA,iBAAiB,aAClB;EACF;;;;;;;;;;;;ACzDH,eAA8B,eAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,6CAA6C;CACpE,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,oCAAoC;CAE5D,MAAM,EAAE,eAAe,aAAa,mBAAmB,QAAQ;CAE/D,MAAM,aAAgD,EAAE;AAExD,YAAW,KAAK;EACd,MAAM,MAAM,cAAc;EAC1B,OAAO,MAAM,cAAc,WAAW,IAAI;EAC1C,QAAQ,YAAY,MAAM,WAAW,kBAAkB,SAAS;EACjE,CAAC;AAEF,YAAW,KAAK;EACd,MAAM,MAAM,kBAAkB,WAAW;EACzC,OAAO,MAAM,kBAAkB,WAAW,IAAI,IAAI;EAClD,QACE,MAAM,kBAAkB,WAAW,IAC/B,GAAG,MAAM,WAAW,OAAO,8BAC3B,uBAAuB,MAAM,kBAAkB,MAAM,GAAG,EAAE,CAAC,KAAK,KAAK;EAC5E,CAAC;CAGF,MAAM,SAAS,WAAW,QAAQ,MAAM,CAAC,EAAE,QAAQ,EAAE,OAAO,WAAW,qBAAqB,CAAC;AAC7F,KAAI,OAAO,WAAW,GAAG;EACvB,MAAM,WAAW,WAAW,MAAM,MAAM,EAAE,QAAQ,EAAE;AACpD,SAAO,WACH;GAAE,MAAM;GAAM,OAAO;GAAK,QAAQ,SAAS;GAAQ,kBAAkB;GAAY,GACjF,KAAK,uBAAuB,WAAW;;AAE7C,QAAO,KAAK,OAAO,KAAK,MAAM,KAAK,EAAE,SAAS,CAAC,KAAK,KAAK,EAAE,WAAW;;;;;;;;ACzCxE,eAA8B,iBAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,+CAA+C;CACtE,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,sCAAsC;CAE9D,MAAM,aAAgD,EAAE;AAExD,YAAW,KAAK;EACd,MAAM,MAAM,uBAAuB,WAAW;EAC9C,OAAO,MAAM,uBAAuB,WAAW,IAAI,IAAI;EACvD,QAAQ,MAAM,uBAAuB,WAAW,IAC5C,GAAG,MAAM,gBAAgB,OAAO,mCAChC,6BAA6B,MAAM,uBAAuB,MAAM,GAAG,EAAE,CAAC,KAAK,KAAK;EACrF,CAAC;AAEF,YAAW,KAAK;EACd,MAAM,MAAM,6BAA6B,WAAW;EACpD,OAAO,MAAM,6BAA6B,WAAW,IAAI,IAAI;EAC7D,QAAQ,MAAM,6BAA6B,WAAW,IAClD,GAAG,MAAM,gBAAgB,OAAO,kCAChC,oCAAoC,MAAM,6BAA6B,MAAM,GAAG,EAAE,CAAC,KAAK,KAAK;EAClG,CAAC;AAEF,YAAW,KAAK;EACd,MAAM,MAAM,oBAAoB,WAAW;EAC3C,OAAO,MAAM,oBAAoB,WAAW,IAAI,IAAI;EACpD,QAAQ,MAAM,oBAAoB,WAAW,IACzC,GAAG,MAAM,aAAa,OAAO,2BAC7B,qBAAqB,MAAM,oBAAoB,MAAM,GAAG,EAAE,CAAC,KAAK,KAAK;EAC1E,CAAC;CAEF,MAAM,SAAS,WAAW,QAAQ,MAAM,CAAC,EAAE,KAAK;AAChD,KAAI,OAAO,WAAW,EAAG,QAAO,KAAK,yBAAyB,WAAW;AACzE,QAAO,KAAK,OAAO,KAAK,MAAM,KAAK,EAAE,SAAS,CAAC,KAAK,KAAK,EAAE,WAAW;;;;;;;;;;;ACjCxE,MAAM,kBACJ;AACF,MAAM,qBACJ;AAEF,eAA8B,uBAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,gDAAgD;CACvE,MAAM,QAAQ,KAAK;CACnB,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,uCAAuC;CAE/D,MAAM,cAAc,IAAI,IAAY,mBAAmB,QAAQ,CAAC,mBAAmB;AAMnF,KAAI,EAJoB,QACpB,MAAM,MAAM,MAAM,MAAM,EAAE,YAAY,MAAM,MAAM,YAAY,IAAI,EAAE,CAAC,CAAC,GACtE,OAGF,QAAO,KAAK,0DAA0D;CAGxE,MAAM,aAAgD,EAAE;CACxD,MAAM,aAAa,gBAAgB,KAAK,MAAM,KAAK,IAAI,mBAAmB,KAAK,MAAM,KAAK;AAC1F,YAAW,KAAK;EACd,MAAM;EACN,OAAO,aAAa,IAAI;EACxB,QAAQ,aACJ,sDACA;EACL,CAAC;CAEF,MAAM,oBAAoB,QACtB,MAAM,MAAM,MAAM,MAAM,MAAM,QAAQ,EAAE,KAAK,WAAW,IAAI,EAAE,KAAK,WAAW,SAAS,EAAE,GACzF;AACJ,YAAW,KAAK;EACd,MAAM;EACN,OAAO,oBAAoB,IAAI;EAC/B,QAAQ,oBACJ,sDACA;EACL,CAAC;CAEF,MAAM,SAAS,WAAW,QAAQ,MAAM,CAAC,EAAE,KAAK;AAChD,KAAI,OAAO,WAAW,EAAG,QAAO,KAAK,0BAA0B,WAAW;AAC1E,QAAO,KAAK,OAAO,KAAK,MAAM,KAAK,EAAE,SAAS,CAAC,KAAK,KAAK,EAAE,WAAW;;;;;;;;;;ACjDxE,eAA8B,iBAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,yCAAyC;CAChE,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,gCAAgC;CAExD,MAAM,QAAQ,IAAI,IAAY,mBAAmB,QAAQ,CAAC,aAAa;AAGvE,KAAI,CADc,MAAM,MAAM,MAAM,MAAM,EAAE,YAAY,MAAM,MAAM,MAAM,IAAI,EAAE,CAAC,CACnE,CACZ,QAAO,KAAK,4DAA4D;CAG1E,MAAM,YAAY,MAAM,MAAM,QAAQ,MAAM,EAAE,WAAW;AACzD,KAAI,UAAU,WAAW,EACvB,QAAO,KACL,0CAA0C,CAAC,GAAG,MAAM,CAAC,KAAK,KAAK,CAAC,qDACjE;AAEH,QAAO,KAAK,gBAAgB,UAAU,OAAO,mBAAmB;;;;AC7BlE,MAAM,kBAAkB;CACtB;CACA;CACA;CACA;CACD;AAED,MAAM,iBAAiB;AACvB,MAAM,cAAc;;;;AAKpB,eAA8B,gBAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,8CAA8C;CACrE,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,+DAA+D;CAEvF,MAAM,KAAK,MAAM;CACjB,MAAM,aAAgD,EAAE;CAExD,MAAM,OAAO,OAAO,GAAG,SAAS,WAAW,GAAG,KAAK,MAAM,GAAG;AAC5D,YAAW,KAAK;EACd,MAAM,KAAK,SAAS;EACpB,OAAO,KAAK,SAAS,IAAI,IAAI;EAC7B,QAAQ,OAAO,SAAS,SAAS;EAClC,CAAC;CAEF,MAAM,OAAO,OAAO,GAAG,gBAAgB,WAAW,GAAG,YAAY,MAAM,GAAG;AAC1E,YAAW,KAAK;EACd,MAAM,KAAK,SAAS;EACpB,OAAO,KAAK,SAAS,IAAI,IAAI;EAC7B,QAAQ,OAAO,wBAAwB,KAAK,OAAO,WAAW;EAC/D,CAAC;CAEF,MAAM,UAAU,eAAe,KAAK,KAAK;AACzC,YAAW,KAAK;EACd,MAAM;EACN,OAAO,UAAU,IAAI;EACrB,QAAQ,UAAU,iCAAiC;EACpD,CAAC;CAEF,MAAM,aAAa,YAAY,KAAK,KAAK;AACzC,YAAW,KAAK;EACd,MAAM;EACN,OAAO,aAAa,IAAI;EACxB,QAAQ,aAAa,qCAAqC;EAC3D,CAAC;CAEF,MAAM,UAAU,gBAAgB,MAAM,OAAO,GAAG,KAAK,KAAK,CAAC,IAAI,KAAK,SAAS;AAC7E,YAAW,KAAK;EACd,MAAM,CAAC;EACP,OAAO,UAAU,IAAI;EACrB,QAAQ,UAAU,+BAA+B;EAClD,CAAC;CAEF,MAAM,SAAS,WAAW,QAAQ,MAAM,CAAC,EAAE,KAAK;AAChD,KAAI,OAAO,WAAW,EACpB,QAAO,KAAK,qBAAqB,WAAW,OAAO,GAAG,WAAW,OAAO,aAAa,WAAW;AAElG,QAAO,KACL,OAAO,KAAK,MAAM,KAAK,EAAE,SAAS,CAAC,KAAK,KAAK,EAC7C,WACD;;;;;;;;;AC7DH,eAA8B,iBAC5B,SACA,SACwB;CACxB,MAAM,OAAO,cAAc,QAAQ;AACnC,KAAI,CAAC,KAAM,QAAO,KAAK,yCAAyC;CAChE,MAAM,QAAQ,KAAK;AACnB,KAAI,CAAC,MAAO,QAAO,KAAK,0DAA0D;CAClF,MAAM,WAAW,mBAAmB,QAAQ;CAE5C,MAAM,aAAgD,EAAE;AAExD,KAAI,MAAM,YAAY,SAAS,EAC7B,YAAW,KAAK;EACd,MAAM;EACN,OAAO;EACP,QAAQ,iBAAiB,MAAM,YAAY,KAAK,MAAM,GAAG,EAAE,SAAS,IAAI,EAAE,QAAQ,CAAC,KAAK,KAAK;EAC9F,CAAC;AAGJ,KAAI,MAAM,aAAa,WAAW,EAChC,YAAW,KAAK;EACd,MAAM;EACN,OAAO;EACP,QAAQ;EACT,CAAC;AAGJ,MAAK,MAAM,KAAK,MAAM,OAAO;EAC3B,MAAM,SAAmB,EAAE;AAC3B,OAAK,MAAM,SAAS,EAAE,aAAa;GACjC,MAAM,KAAK,MAAM,UAAU,KAAA,IAAY,MAAM,QAAQ,GAAG,MAAM,MAAM,GAAG,MAAM,MAAM;AACnF,UAAO,KAAK,GAAG,GAAG,IAAI,MAAM,SAAS;;AAEvC,MAAI,OAAO,EAAE,KAAK,WAAW,YAAY,EAAE,KAAK,OAAO,WAAW,EAChE,QAAO,KAAK,sBAAsB;AAEpC,MAAI,CAAC,EAAE,WACL,QAAO,KAAK,mDAAmD;AAKjE,MAAI,EAFD,MAAM,QAAQ,EAAE,KAAK,OAAO,IAAI,EAAE,KAAK,OAAO,SAAS,KACvD,MAAM,QAAQ,EAAE,KAAK,WAAW,IAAI,EAAE,KAAK,WAAW,SAAS,GACnD,QAAO,KAAK,yBAAyB;AACpD,MAAI,SAAS,sBAAsB,CAAC,EAAE,eACpC,QAAO,KAAK,iCAAiC;AAE/C,aAAW,KAAK;GACd,MAAM,OAAO,WAAW;GACxB,OAAO,OAAO,WAAW,IAAI,IAAI;GACjC,QAAQ,GAAG,EAAE,eAAe,mBAAmB,IAAI,OAAO,WAAW,IAAI,OAAO,OAAO,KAAK,KAAK;GAClG,CAAC;;AAGJ,KAAI,KAAK,sBAAsB,SAAS,EACtC,YAAW,KAAK;EACd,MAAM;EACN,OAAO;EACP,QAAQ,6BAA6B,KAAK,sBAAsB,KAAK,KAAK;EAC3E,CAAC;AAGJ,KAAI,KAAK,aAAa,SAAS,EAC7B,YAAW,KAAK;EACd,MAAM;EACN,OAAO;EACP,QAAQ,6BAA6B,KAAK,aAAa,MAAM,GAAG,EAAE,CAAC,KAAK,KAAK;EAC9E,CAAC;CAGJ,MAAM,SAAS,WAAW,QAAQ,MAAM,CAAC,EAAE,KAAK;AAChD,KAAI,OAAO,WAAW,EACpB,QAAO,KAAK,gBAAgB,MAAM,MAAM,OAAO,cAAc,WAAW;AAE1E,QAAO,KACL,OAAO,KAAK,MAAM,KAAK,EAAE,SAAS,CAAC,KAAK,KAAK,EAC7C,WACD;;;;ACvEH,MAAM,eAAuD;CAC3D,oBAAoB,CAAC,gBAAgB;CACrC,iBAAiB,CAAC,iBAAiB;CACnC,iBAAiB,CAAC,eAAe;CACjC,sBAAsB,CAAC,uBAAuB;CAC9C,eAAe,CAAC,kBAAkB,iBAAiB;CACnD,mBAAmB,CAAC,iBAAiB;CACtC;AAED,aAAa,kBAAkB;CAC7B;CACA;CACA;CACA;CACA;CACA;CACD;AAED,MAAMC,qBAAmB,OAAO,KAAK,aAAa,CAAC,MAAM,CAAC,KAAK,KAAK;AAEpE,SAASC,aAAW,SAAqD;AAOvE,QAAO;EALL,QAAQ,WAAW;EACnB,QAAQ,QAAQ;EAChB,QAAQ,QAAQ;EAChB,QAAQ;EAEO,CAAC,MAAM,WAA6B,OAAO,WAAW,SAAS;;AAWlF,SAAS,gBACP,QACA,SACA,QAC8B;AAC9B,QAAO,OAAO,IAAI,aAAa;EAC/B,MAAM,UAAU,OAAO,OAAO,QAC5B,SACC,UAAU,OAAO,cAAc,MAAM,QAAQ,QAAQ,CAAC,EACvD,EAAE,aAAa,aAAa,CAC7B;EACD,MAAM,SAAS,QAAQ,QAAQ,WAAW,CAAC,OAAO,KAAK;EACvD,MAAM,OAAO,QAAQ,QAAQ,WAAW,OAAO,QAAQ,OAAO,QAAQ,EAAE;AACxE,SAAO;GACL,MAAM,OAAO,WAAW;GACxB,OAAO,OAAO,WAAW,IACrB,KAAK,IAAI,GAAG,QAAQ,KAAK,WAAW,OAAO,MAAM,CAAC,GAClD;GACJ,QAAQ,OAAO,WAAW,IACtB,KAAK,SAAS,IACZ,KAAK,KAAK,WAAW,OAAO,OAAO,CAAC,KAAK,KAAK,GAC9C,iBAAiB,QAAQ,OAAO,oBAClC,OAAO,KAAK,WAAW,OAAO,OAAO,CAAC,KAAK,KAAK;GACpD,kBAAkB;GACnB;GACC;;AAGJ,eAAsB,gCACpB,QACA,SACwB;AACxB,QAAO,OAAO,WAAW,sCAAsC,QAAQ,QAAQ,CAAC;;AAGlF,SAAS,sCACP,QACA,SAC8B;AAC9B,QAAO,OAAO,IAAI,aAAa;EAC/B,MAAM,SAASA,aAAW,QAAQ;EAClC,MAAM,SAAS,SAAS,aAAa,UAAU,KAAA;AAC/C,MAAI,CAAC,OACH,QAAO;GACL,MAAM;GACN,OAAO;GACP,QAAQ,mDAAmD,UAAU,UAAU,wBAAwBD;GACxG;AAEH,SAAO,OAAO,gBAAgB,QAAQ,SAAS,OAAO;GACpD;;;;AC5FJ,MAAM,SAAsC;CAC1C,gBAAgB;CAChB,cAAc;CACf;AACD,MAAM,gBAAgB;CACpB;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AACD,MAAM,mBAAmB,CAAC,GAAG,OAAO,KAAK,OAAO,EAAE,GAAG,cAAc,CAAC,MAAM,CAAC,KAAK,KAAK;AAIrF,SAAS,WAAW,SAAqD;AAOvE,QAAO;EALL,QAAQ,WAAW;EACnB,QAAQ,QAAQ;EAChB,QAAQ,QAAQ;EAChB,QAAQ;EAEO,CAAC,MAAM,WAA6B,OAAO,WAAW,SAAS;;AAGlF,eAA8B,0BAC5B,QACA,SACwB;CACxB,MAAM,SAAS,WAAW,QAAQ;CAClC,MAAM,YAAY,SAAS,OAAO,UAAU,KAAA;AAC5C,KAAI,UACF,QAAO,UAAU,QAAQ,QAAQ;AAEnC,KAAI,UAAU,cAAc,SAAS,OAAO,CAC1C,QAAO,gCAAgC,QAAQ,QAAQ;AAEzD,QAAO;EACL,MAAM;EACN,OAAO;EACP,QAAQ,iDAAiD,UAAU,UAAU,wBAAwB;EACtG"}
@@ -0,0 +1,90 @@
1
+ import { a as FileEvent, c as Usage, i as EvidenceSnapshot, n as EvidenceHandle, o as SkillLoadEvent, r as CommandEvent, s as ToolCallEvent, t as AgentSkillEvalsAssertionResult } from "./internal-services-DbsekQ_K.mjs";
2
+ //#region src/agent/evidence.d.ts
3
+ interface SkillEvidenceConfig {
4
+ mcpResource?: {
5
+ uriArgPaths?: readonly string[];
6
+ uriPatterns?: readonly string[];
7
+ };
8
+ mcpTool?: {
9
+ toolPatterns?: readonly string[];
10
+ };
11
+ nativeArgs?: {
12
+ whenArgs?: readonly string[];
13
+ whenAnyArgs?: readonly string[];
14
+ skillPathFlags?: readonly string[];
15
+ provider?: string;
16
+ source?: string;
17
+ };
18
+ }
19
+ declare class EvidenceCollector {
20
+ private readonly skillEvidenceConfig;
21
+ private snapshot;
22
+ constructor(skillEvidenceConfig?: SkillEvidenceConfig);
23
+ addCommand(e: CommandEvent): void;
24
+ addFileWrite(e: FileEvent): void;
25
+ addToolCall(e: ToolCallEvent): void;
26
+ addSkillLoad(e: SkillLoadEvent): void;
27
+ setUsage(u: Usage): void;
28
+ addUsage(u: Usage): void;
29
+ setOutput(output: string): void;
30
+ setRun(run: EvidenceSnapshot["run"]): void;
31
+ toSnapshot(): EvidenceSnapshot;
32
+ writeTo(runDir: string): Promise<string>;
33
+ static fromSnapshot(snapshot: EvidenceSnapshot): EvidenceCollector;
34
+ }
35
+ declare function evidenceFromSnapshot(s: EvidenceSnapshot): EvidenceHandle;
36
+ //#endregion
37
+ //#region src/agent/index.d.ts
38
+ interface ProviderConfig {
39
+ adapter?: string;
40
+ command?: string;
41
+ args?: readonly string[];
42
+ timeoutMs?: number;
43
+ baseDir?: string;
44
+ isolatedHome?: boolean;
45
+ skillEvidence?: SkillEvidenceConfig;
46
+ }
47
+ interface PromptfooContext {
48
+ vars?: Record<string, unknown>;
49
+ test?: {
50
+ vars?: Record<string, unknown>;
51
+ metadata?: Record<string, unknown>;
52
+ };
53
+ }
54
+ interface ProviderResponse {
55
+ output: string;
56
+ metadata?: Record<string, unknown>;
57
+ cost?: number;
58
+ tokenUsage?: {
59
+ total?: number;
60
+ prompt?: number;
61
+ completion?: number;
62
+ cached?: number;
63
+ };
64
+ error?: string;
65
+ }
66
+ interface AgentSkillEvalsProviderMetadata {
67
+ runDir: string;
68
+ worldPath: string;
69
+ evidencePath: string;
70
+ fixture: string;
71
+ skill?: string;
72
+ kind?: string;
73
+ preconditionResults: AgentSkillEvalsAssertionResult[];
74
+ preconditionsPassed: boolean;
75
+ durationMs: number;
76
+ }
77
+ declare class AgentSkillEvalsProvider {
78
+ config: ProviderConfig;
79
+ private readonly configError?;
80
+ id: () => string;
81
+ constructor(options?: {
82
+ config?: ProviderConfig;
83
+ id?: string;
84
+ });
85
+ callApi(prompt: string, context?: PromptfooContext): Promise<ProviderResponse>;
86
+ private callApiEffect;
87
+ }
88
+ //#endregion
89
+ export { evidenceFromSnapshot as i, AgentSkillEvalsProviderMetadata as n, EvidenceCollector as r, AgentSkillEvalsProvider as t };
90
+ //# sourceMappingURL=index-4l7TCFny.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index-4l7TCFny.d.mts","names":[],"sources":["../src/agent/evidence.ts","../src/agent/index.ts"],"mappings":";;UAmBiB,mBAAA;EACf,WAAA;IACE,WAAA;IACA,WAAA;EAAA;EAEF,OAAA;IACE,YAAA;EAAA;EAEF,UAAA;IACE,QAAA;IACA,WAAA;IACA,cAAA;IACA,QAAA;IACA,MAAA;EAAA;AAAA;AAAA,cAcS,iBAAA;EAAA,iBACM,mBAAA;EAAA,QACT,QAAA;cAeI,mBAAA,GAAqB,mBAAA;EAIjC,UAAA,CAAW,CAAA,EAAG,YAAA;EAId,YAAA,CAAa,CAAA,EAAG,SAAA;EAIhB,WAAA,CAAY,CAAA,EAAG,aAAA;EAMf,YAAA,CAAa,CAAA,EAAG,cAAA;EAWhB,QAAA,CAAS,CAAA,EAAG,KAAA;EAIZ,QAAA,CAAS,CAAA,EAAG,KAAA;EAIZ,SAAA,CAAU,MAAA;EAIV,MAAA,CAAO,GAAA,EAAK,gBAAA;EAIZ,UAAA,CAAA,GAAc,gBAAA;EAcR,OAAA,CAAQ,MAAA,WAAiB,OAAA;EAAA,OAIxB,YAAA,CAAa,QAAA,EAAU,gBAAA,GAAmB,iBAAA;AAAA;AAAA,iBA+GnC,oBAAA,CAAqB,CAAA,EAAG,gBAAA,GAAmB,cAAA;;;UC3MjD,cAAA;EACR,OAAA;EACA,OAAA;EACA,IAAA;EACA,SAAA;EACA,OAAA;EACA,YAAA;EACA,aAAA,GAAgB,mBAAA;AAAA;AAAA,UAgCR,gBAAA;EACR,IAAA,GAAO,MAAA;EACP,IAAA;IAAS,IAAA,GAAO,MAAA;IAAyB,QAAA,GAAW,MAAA;EAAA;AAAA;AAAA,UAG5C,gBAAA;EACR,MAAA;EACA,QAAA,GAAW,MAAA;EACX,IAAA;EACA,UAAA;IAAe,KAAA;IAAgB,MAAA;IAAiB,UAAA;IAAqB,MAAA;EAAA;EACrE,KAAA;AAAA;AAAA,UAiBe,+BAAA;EACf,MAAA;EACA,SAAA;EACA,YAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;EACA,mBAAA,EAAqB,8BAAA;EACrB,mBAAA;EACA,UAAA;AAAA;AAAA,cA+TI,uBAAA;EACJ,MAAA,EAAQ,cAAA;EAAA,iBACS,WAAA;EACjB,EAAA;cAEY,OAAA;IAAW,MAAA,GAAS,cAAA;IAAgB,EAAA;EAAA;EAY1C,OAAA,CACJ,MAAA,UACA,OAAA,GAAS,gBAAA,GACR,OAAA,CAAQ,gBAAA;EAAA,QAMH,aAAA;AAAA"}
@@ -0,0 +1,226 @@
1
+ import * as Either from "effect/Either";
2
+ import * as Effect from "effect/Effect";
3
+ import * as Layer from "effect/Layer";
4
+ import * as Schema from "effect/Schema";
5
+ import * as Context from "effect/Context";
6
+ import { constants } from "node:fs";
7
+ import * as PlatformFileSystem from "@effect/platform/FileSystem";
8
+ import * as NodeFileSystem from "@effect/platform-node/NodeFileSystem";
9
+ import { parse } from "yaml";
10
+ //#region src/runtime-checks/check-set.ts
11
+ const RUNTIME_CHECK_TYPES = [
12
+ "verifier.succeeds",
13
+ "verifier.fails",
14
+ "file.exists",
15
+ "file.created",
16
+ "file.contains",
17
+ "file.not_modified",
18
+ "file.changes_outside_scope",
19
+ "code.pattern_exists",
20
+ "code.no_pattern",
21
+ "tool.called",
22
+ "tool.not_called",
23
+ "skill.loaded"
24
+ ];
25
+ const DOUBLE_NEGATIVE_CHECK_TYPES = [
26
+ "code.no_pattern",
27
+ "file.not_modified",
28
+ "tool.not_called"
29
+ ];
30
+ new Set(RUNTIME_CHECK_TYPES);
31
+ const doubleNegativeCheckTypeSet = new Set(DOUBLE_NEGATIVE_CHECK_TYPES);
32
+ //#endregion
33
+ //#region src/assertion-entries.ts
34
+ const RuntimeFieldArraySchema = Schema.Array(Schema.Unknown);
35
+ const EntryObjectSchema = Schema.Unknown.pipe(Schema.filter((value) => typeof value === "object" && value !== null && !Array.isArray(value), { identifier: "AssertionEntryObject" }));
36
+ const NonEmptyTypeSchema = Schema.String.pipe(Schema.filter((value) => value.length > 0, { identifier: "NonEmptyType" }));
37
+ /**
38
+ * Normalizes Promptfoo `vars.preconditions | should | should_not` entries.
39
+ *
40
+ * Supported forms:
41
+ * - `"file.exists"`
42
+ * - `{ type: "file.exists", path: "app.js" }`
43
+ * - `{ "file.exists": { path: "app.js" } }`
44
+ */
45
+ function parseRuntimeTestFields(vars) {
46
+ const preconditions = parseAssertionEntries(vars.preconditions, "preconditions", {
47
+ allowMissing: true,
48
+ mode: "precondition"
49
+ });
50
+ const should = parseAssertionEntries(vars.should, "should", {
51
+ allowMissing: true,
52
+ mode: "should"
53
+ });
54
+ const shouldNot = parseAssertionEntries(vars.should_not, "should_not", {
55
+ allowMissing: true,
56
+ mode: "should_not"
57
+ });
58
+ return {
59
+ preconditions: preconditions.entries,
60
+ should: should.entries,
61
+ should_not: shouldNot.entries,
62
+ errors: [
63
+ ...preconditions.errors,
64
+ ...should.errors,
65
+ ...shouldNot.errors
66
+ ]
67
+ };
68
+ }
69
+ function parseAssertionEntries(raw, field, options = {}) {
70
+ const entries = [];
71
+ const errors = [];
72
+ if (raw === void 0 || raw === null) {
73
+ if (!options.allowMissing) errors.push({
74
+ field,
75
+ reason: "must be an array of assertion entries"
76
+ });
77
+ return {
78
+ entries,
79
+ errors
80
+ };
81
+ }
82
+ const rawEntries = Schema.decodeUnknownEither(RuntimeFieldArraySchema)(raw);
83
+ if (Either.isLeft(rawEntries)) return {
84
+ entries,
85
+ errors: [{
86
+ field,
87
+ reason: "must be an array of assertion entries"
88
+ }]
89
+ };
90
+ rawEntries.right.forEach((entry, index) => {
91
+ const parsed = parseAssertionEntry(entry, field, index);
92
+ if ("error" in parsed) errors.push(parsed.error);
93
+ else {
94
+ if (options.mode === "should_not" && doubleNegativeCheckTypeSet.has(parsed.entry.type)) {
95
+ errors.push({
96
+ field,
97
+ index,
98
+ reason: `"${parsed.entry.type}" must be declared under should, not should_not`
99
+ });
100
+ return;
101
+ }
102
+ entries.push(parsed.entry);
103
+ }
104
+ });
105
+ return {
106
+ entries,
107
+ errors
108
+ };
109
+ }
110
+ function parseAssertionEntry(entry, field, index) {
111
+ if (typeof entry === "string") {
112
+ if (Either.isLeft(Schema.decodeUnknownEither(NonEmptyTypeSchema)(entry))) return { error: {
113
+ field,
114
+ index,
115
+ reason: "string entry must not be empty"
116
+ } };
117
+ return { entry: {
118
+ type: entry,
119
+ args: {}
120
+ } };
121
+ }
122
+ const decodedObject = Schema.decodeUnknownEither(EntryObjectSchema)(entry);
123
+ if (Either.isLeft(decodedObject)) return { error: {
124
+ field,
125
+ index,
126
+ reason: "entry must be a string, { type: ... }, or shorthand object"
127
+ } };
128
+ const candidate = decodedObject.right;
129
+ if ("type" in candidate) {
130
+ const type = Schema.decodeUnknownEither(NonEmptyTypeSchema)(candidate.type);
131
+ if (Either.isLeft(type)) return { error: {
132
+ field,
133
+ index,
134
+ reason: "`type` must be a non-empty string"
135
+ } };
136
+ return { entry: {
137
+ type: type.right,
138
+ args: candidate
139
+ } };
140
+ }
141
+ const keys = Object.keys(candidate);
142
+ if (keys.length !== 1) return { error: {
143
+ field,
144
+ index,
145
+ reason: "shorthand assertion object must have exactly one key"
146
+ } };
147
+ const type = keys[0];
148
+ const args = candidate[type] ?? {};
149
+ if (args !== null && typeof args !== "object") return { error: {
150
+ field,
151
+ index,
152
+ reason: `shorthand assertion "${type}" value must be an object`
153
+ } };
154
+ return { entry: {
155
+ type,
156
+ args
157
+ } };
158
+ }
159
+ //#endregion
160
+ //#region src/internal-services.ts
161
+ var FileSystem = class extends Context.Tag("agent-skill-evals/promptfoo/FileSystem")() {};
162
+ var Environment = class extends Context.Tag("agent-skill-evals/promptfoo/Environment")() {};
163
+ var YamlParser = class extends Context.Tag("agent-skill-evals/promptfoo/YamlParser")() {};
164
+ function toFileInfo(info) {
165
+ return {
166
+ mode: info.mode,
167
+ isDirectory: () => info.type === "Directory",
168
+ isFile: () => info.type === "File"
169
+ };
170
+ }
171
+ function executableFromMode(mode) {
172
+ return (mode & 73) !== 0;
173
+ }
174
+ const NodeFileSystemLive = Layer.effect(FileSystem, Effect.gen(function* () {
175
+ const fs = yield* PlatformFileSystem.FileSystem;
176
+ const statFile = (path) => fs.stat(path).pipe(Effect.map(toFileInfo));
177
+ return {
178
+ access: (path, mode) => {
179
+ const readable = mode === void 0 ? void 0 : (mode & constants.R_OK) !== 0;
180
+ const writable = mode === void 0 ? void 0 : (mode & constants.W_OK) !== 0;
181
+ const executable = mode === void 0 ? false : (mode & constants.X_OK) !== 0;
182
+ const baseAccess = fs.access(path, {
183
+ ok: true,
184
+ readable,
185
+ writable
186
+ });
187
+ if (!executable) return baseAccess;
188
+ return baseAccess.pipe(Effect.zipRight(statFile(path)), Effect.flatMap((info) => executableFromMode(info.mode) ? Effect.void : Effect.fail(/* @__PURE__ */ new Error(`Path is not executable: ${path}`))));
189
+ },
190
+ copyDirectory: (source, destination) => fs.copy(source, destination),
191
+ makeDirectory: (path) => fs.makeDirectory(path, { recursive: true }),
192
+ makeTempDirectory: (prefix) => fs.makeTempDirectory({ prefix }),
193
+ readFile: (path) => fs.readFile(path).pipe(Effect.map(Buffer.from)),
194
+ readText: (path) => fs.readFileString(path),
195
+ readDirectory: (path) => fs.readDirectory(path).pipe(Effect.flatMap((names) => Effect.forEach(names, (name) => fs.stat(`${path}/${name}`).pipe(Effect.map((info) => ({
196
+ name,
197
+ isDirectory: () => info.type === "Directory",
198
+ isFile: () => info.type === "File"
199
+ })))))),
200
+ stat: statFile,
201
+ writeText: (path, contents) => fs.writeFileString(path, contents)
202
+ };
203
+ })).pipe(Layer.provide(NodeFileSystem.layer));
204
+ const NodeEnvironmentLive = Layer.succeed(Environment, {
205
+ cwd: Effect.sync(() => process.cwd()),
206
+ env: Effect.sync(() => ({ ...process.env }))
207
+ });
208
+ const YamlParserLive = Layer.succeed(YamlParser, { parse: (input) => Effect.try({
209
+ try: () => parse(input),
210
+ catch: (error) => error
211
+ }) });
212
+ const NodeServicesLive = Layer.mergeAll(NodeFileSystemLive, NodeEnvironmentLive, YamlParserLive);
213
+ function pathExists(path) {
214
+ return Effect.gen(function* () {
215
+ return yield* (yield* FileSystem).stat(path).pipe(Effect.as(true), Effect.catchAll(() => Effect.succeed(false)));
216
+ });
217
+ }
218
+ function pathExecutable(path) {
219
+ return Effect.gen(function* () {
220
+ return yield* (yield* FileSystem).access(path, constants.X_OK).pipe(Effect.as(true), Effect.catchAll(() => Effect.succeed(false)));
221
+ });
222
+ }
223
+ //#endregion
224
+ export { pathExecutable as a, parseRuntimeTestFields as c, YamlParser as i, RUNTIME_CHECK_TYPES as l, FileSystem as n, pathExists as o, NodeServicesLive as r, parseAssertionEntries as s, Environment as t };
225
+
226
+ //# sourceMappingURL=internal-services-5-mRgNls.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"internal-services-5-mRgNls.mjs","names":["parseYaml"],"sources":["../src/runtime-checks/check-set.ts","../src/assertion-entries.ts","../src/internal-services.ts"],"sourcesContent":["export const RUNTIME_CHECK_TYPES = [\n \"verifier.succeeds\",\n \"verifier.fails\",\n \"file.exists\",\n \"file.created\",\n \"file.contains\",\n \"file.not_modified\",\n \"file.changes_outside_scope\",\n \"code.pattern_exists\",\n \"code.no_pattern\",\n \"tool.called\",\n \"tool.not_called\",\n \"skill.loaded\",\n] as const;\n\nexport type RuntimeCheckType = (typeof RUNTIME_CHECK_TYPES)[number];\n\nexport const DOUBLE_NEGATIVE_CHECK_TYPES = [\n \"code.no_pattern\",\n \"file.not_modified\",\n \"tool.not_called\",\n] as const satisfies readonly RuntimeCheckType[];\n\nexport const runtimeCheckTypeSet = new Set<string>(RUNTIME_CHECK_TYPES);\nexport const doubleNegativeCheckTypeSet = new Set<string>(DOUBLE_NEGATIVE_CHECK_TYPES);\n","import * as Either from \"effect/Either\";\nimport * as Schema from \"effect/Schema\";\nimport { doubleNegativeCheckTypeSet } from \"./runtime-checks/check-set.js\";\nimport type { AssertionMode } from \"./internal-types.js\";\n\nexport interface AssertionEntry {\n type: string;\n args: unknown;\n}\n\nexport interface AssertionEntryError {\n field: string;\n index?: number;\n reason: string;\n}\n\nexport interface ParsedAssertionEntries {\n entries: AssertionEntry[];\n errors: AssertionEntryError[];\n}\n\nexport interface RuntimeTestFieldEntries {\n preconditions: AssertionEntry[];\n should: AssertionEntry[];\n should_not: AssertionEntry[];\n errors: AssertionEntryError[];\n}\n\ninterface TypedEntry {\n type?: unknown;\n [key: string]: unknown;\n}\n\nconst RuntimeFieldArraySchema = Schema.Array(Schema.Unknown);\nconst EntryObjectSchema = Schema.Unknown.pipe(\n Schema.filter(\n (value): value is Record<string, unknown> =>\n typeof value === \"object\" && value !== null && !Array.isArray(value),\n { identifier: \"AssertionEntryObject\" },\n ),\n);\nconst NonEmptyTypeSchema = Schema.String.pipe(\n Schema.filter((value) => value.length > 0, { identifier: \"NonEmptyType\" }),\n);\n\n/**\n * Normalizes Promptfoo `vars.preconditions | should | should_not` entries.\n *\n * Supported forms:\n * - `\"file.exists\"`\n * - `{ type: \"file.exists\", path: \"app.js\" }`\n * - `{ \"file.exists\": { path: \"app.js\" } }`\n */\nexport function parseRuntimeTestFields(\n vars: Record<string, unknown>,\n): RuntimeTestFieldEntries {\n const preconditions = parseAssertionEntries(vars.preconditions, \"preconditions\", {\n allowMissing: true,\n mode: \"precondition\",\n });\n const should = parseAssertionEntries(vars.should, \"should\", {\n allowMissing: true,\n mode: \"should\",\n });\n const shouldNot = parseAssertionEntries(vars.should_not, \"should_not\", {\n allowMissing: true,\n mode: \"should_not\",\n });\n return {\n preconditions: preconditions.entries,\n should: should.entries,\n should_not: shouldNot.entries,\n errors: [...preconditions.errors, ...should.errors, ...shouldNot.errors],\n };\n}\n\nexport function parseAssertionEntries(\n raw: unknown,\n field: string,\n options: { allowMissing?: boolean; mode?: AssertionMode } = {},\n): ParsedAssertionEntries {\n const entries: AssertionEntry[] = [];\n const errors: AssertionEntryError[] = [];\n\n if (raw === undefined || raw === null) {\n if (!options.allowMissing) {\n errors.push({ field, reason: \"must be an array of assertion entries\" });\n }\n return { entries, errors };\n }\n\n const rawEntries = Schema.decodeUnknownEither(RuntimeFieldArraySchema)(raw);\n if (Either.isLeft(rawEntries)) {\n return {\n entries,\n errors: [{ field, reason: \"must be an array of assertion entries\" }],\n };\n }\n\n rawEntries.right.forEach((entry, index) => {\n const parsed = parseAssertionEntry(entry, field, index);\n if (\"error\" in parsed) {\n errors.push(parsed.error);\n } else {\n if (\n options.mode === \"should_not\" &&\n doubleNegativeCheckTypeSet.has(parsed.entry.type)\n ) {\n errors.push({\n field,\n index,\n reason: `\"${parsed.entry.type}\" must be declared under should, not should_not`,\n });\n return;\n }\n entries.push(parsed.entry);\n }\n });\n\n return { entries, errors };\n}\n\nfunction parseAssertionEntry(\n entry: unknown,\n field: string,\n index: number,\n): { entry: AssertionEntry } | { error: AssertionEntryError } {\n if (typeof entry === \"string\") {\n if (Either.isLeft(Schema.decodeUnknownEither(NonEmptyTypeSchema)(entry))) {\n return { error: { field, index, reason: \"string entry must not be empty\" } };\n }\n return { entry: { type: entry, args: {} } };\n }\n\n const decodedObject = Schema.decodeUnknownEither(EntryObjectSchema)(entry);\n if (Either.isLeft(decodedObject)) {\n return {\n error: {\n field,\n index,\n reason: \"entry must be a string, { type: ... }, or shorthand object\",\n },\n };\n }\n\n const candidate = decodedObject.right as TypedEntry;\n if (\"type\" in candidate) {\n const type = Schema.decodeUnknownEither(NonEmptyTypeSchema)(candidate.type);\n if (Either.isLeft(type)) {\n return {\n error: { field, index, reason: \"`type` must be a non-empty string\" },\n };\n }\n return { entry: { type: type.right, args: candidate } };\n }\n\n const keys = Object.keys(candidate);\n if (keys.length !== 1) {\n return {\n error: {\n field,\n index,\n reason: \"shorthand assertion object must have exactly one key\",\n },\n };\n }\n\n const type = keys[0]!;\n const args = candidate[type] ?? {};\n if (args !== null && typeof args !== \"object\") {\n return {\n error: {\n field,\n index,\n reason: `shorthand assertion \"${type}\" value must be an object`,\n },\n };\n }\n return { entry: { type, args } };\n}\n","import { constants } from \"node:fs\";\nimport * as PlatformFileSystem from \"@effect/platform/FileSystem\";\nimport * as NodeFileSystem from \"@effect/platform-node/NodeFileSystem\";\nimport * as Context from \"effect/Context\";\nimport * as Effect from \"effect/Effect\";\nimport * as Layer from \"effect/Layer\";\nimport { parse as parseYaml } from \"yaml\";\n\ninterface DirectoryEntry {\n name: string;\n isDirectory(): boolean;\n isFile(): boolean;\n}\n\ninterface FileInfo {\n mode: number;\n isDirectory(): boolean;\n isFile(): boolean;\n}\n\ninterface FileSystemService {\n access(path: string, mode?: number): Effect.Effect<void, unknown>;\n copyDirectory(source: string, destination: string): Effect.Effect<void, unknown>;\n makeDirectory(path: string): Effect.Effect<void, unknown>;\n makeTempDirectory(prefix: string): Effect.Effect<string, unknown>;\n readFile(path: string): Effect.Effect<Buffer, unknown>;\n readText(path: string): Effect.Effect<string, unknown>;\n readDirectory(path: string): Effect.Effect<DirectoryEntry[], unknown>;\n stat(path: string): Effect.Effect<FileInfo, unknown>;\n writeText(path: string, contents: string): Effect.Effect<void, unknown>;\n}\n\ninterface EnvironmentService {\n cwd: Effect.Effect<string>;\n env: Effect.Effect<NodeJS.ProcessEnv>;\n}\n\ninterface YamlService {\n parse(input: string): Effect.Effect<unknown, unknown>;\n}\n\nexport class FileSystem extends Context.Tag(\"agent-skill-evals/promptfoo/FileSystem\")<\n FileSystem,\n FileSystemService\n>() {}\n\nexport class Environment extends Context.Tag(\"agent-skill-evals/promptfoo/Environment\")<\n Environment,\n EnvironmentService\n>() {}\n\nexport class YamlParser extends Context.Tag(\"agent-skill-evals/promptfoo/YamlParser\")<\n YamlParser,\n YamlService\n>() {}\n\nfunction toFileInfo(info: PlatformFileSystem.File.Info): FileInfo {\n return {\n mode: info.mode,\n isDirectory: () => info.type === \"Directory\",\n isFile: () => info.type === \"File\",\n };\n}\n\nfunction executableFromMode(mode: number): boolean {\n return (mode & 0o111) !== 0;\n}\n\nconst PlatformBackedFileSystemLive = Layer.effect(\n FileSystem,\n Effect.gen(function* () {\n const fs = yield* PlatformFileSystem.FileSystem;\n\n const statFile = (path: string) => fs.stat(path).pipe(Effect.map(toFileInfo));\n\n return {\n access: (path: string, mode?: number) => {\n const readable = mode === undefined ? undefined : (mode & constants.R_OK) !== 0;\n const writable = mode === undefined ? undefined : (mode & constants.W_OK) !== 0;\n const executable = mode === undefined ? false : (mode & constants.X_OK) !== 0;\n const baseAccess = fs.access(path, {\n ok: true,\n readable,\n writable,\n });\n if (!executable) return baseAccess;\n return baseAccess.pipe(\n Effect.zipRight(statFile(path)),\n Effect.flatMap((info) =>\n executableFromMode(info.mode)\n ? Effect.void\n : Effect.fail(new Error(`Path is not executable: ${path}`)),\n ),\n );\n },\n copyDirectory: (source: string, destination: string) =>\n fs.copy(source, destination),\n makeDirectory: (path: string) => fs.makeDirectory(path, { recursive: true }),\n makeTempDirectory: (prefix: string) => fs.makeTempDirectory({ prefix }),\n readFile: (path: string) => fs.readFile(path).pipe(Effect.map(Buffer.from)),\n readText: (path: string) => fs.readFileString(path),\n readDirectory: (path: string) =>\n fs.readDirectory(path).pipe(\n Effect.flatMap((names) =>\n Effect.forEach(names, (name) =>\n fs.stat(`${path}/${name}`).pipe(\n Effect.map((info): DirectoryEntry => ({\n name,\n isDirectory: () => info.type === \"Directory\",\n isFile: () => info.type === \"File\",\n })),\n ),\n ),\n ),\n ),\n stat: statFile,\n writeText: (path: string, contents: string) => fs.writeFileString(path, contents),\n };\n }),\n);\n\nexport const NodeFileSystemLive = PlatformBackedFileSystemLive.pipe(\n Layer.provide(NodeFileSystem.layer),\n);\n\nexport const NodeEnvironmentLive = Layer.succeed(Environment, {\n cwd: Effect.sync(() => process.cwd()),\n env: Effect.sync(() => ({ ...process.env })),\n});\n\nexport const YamlParserLive = Layer.succeed(YamlParser, {\n parse: (input) =>\n Effect.try({\n try: () => parseYaml(input),\n catch: (error) => error,\n }),\n});\n\nexport const NodeServicesLive = Layer.mergeAll(\n NodeFileSystemLive,\n NodeEnvironmentLive,\n YamlParserLive,\n);\n\nexport function pathExists(path: string): Effect.Effect<boolean, never, FileSystem> {\n return Effect.gen(function* () {\n const fs = yield* FileSystem;\n return yield* fs.stat(path).pipe(\n Effect.as(true),\n Effect.catchAll(() => Effect.succeed(false)),\n );\n });\n}\n\nexport function pathExecutable(path: string): Effect.Effect<boolean, never, FileSystem> {\n return Effect.gen(function* () {\n const fs = yield* FileSystem;\n return yield* fs.access(path, constants.X_OK).pipe(\n Effect.as(true),\n Effect.catchAll(() => Effect.succeed(false)),\n );\n });\n}\n"],"mappings":";;;;;;;;;;AAAA,MAAa,sBAAsB;CACjC;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AAID,MAAa,8BAA8B;CACzC;CACA;CACA;CACD;AAEkC,IAAI,IAAY,oBAAoB;AACvE,MAAa,6BAA6B,IAAI,IAAY,4BAA4B;;;ACStF,MAAM,0BAA0B,OAAO,MAAM,OAAO,QAAQ;AAC5D,MAAM,oBAAoB,OAAO,QAAQ,KACvC,OAAO,QACJ,UACC,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,MAAM,EACtE,EAAE,YAAY,wBAAwB,CACvC,CACF;AACD,MAAM,qBAAqB,OAAO,OAAO,KACvC,OAAO,QAAQ,UAAU,MAAM,SAAS,GAAG,EAAE,YAAY,gBAAgB,CAAC,CAC3E;;;;;;;;;AAUD,SAAgB,uBACd,MACyB;CACzB,MAAM,gBAAgB,sBAAsB,KAAK,eAAe,iBAAiB;EAC/E,cAAc;EACd,MAAM;EACP,CAAC;CACF,MAAM,SAAS,sBAAsB,KAAK,QAAQ,UAAU;EAC1D,cAAc;EACd,MAAM;EACP,CAAC;CACF,MAAM,YAAY,sBAAsB,KAAK,YAAY,cAAc;EACrE,cAAc;EACd,MAAM;EACP,CAAC;AACF,QAAO;EACL,eAAe,cAAc;EAC7B,QAAQ,OAAO;EACf,YAAY,UAAU;EACtB,QAAQ;GAAC,GAAG,cAAc;GAAQ,GAAG,OAAO;GAAQ,GAAG,UAAU;GAAO;EACzE;;AAGH,SAAgB,sBACd,KACA,OACA,UAA4D,EAAE,EACtC;CACxB,MAAM,UAA4B,EAAE;CACpC,MAAM,SAAgC,EAAE;AAExC,KAAI,QAAQ,KAAA,KAAa,QAAQ,MAAM;AACrC,MAAI,CAAC,QAAQ,aACX,QAAO,KAAK;GAAE;GAAO,QAAQ;GAAyC,CAAC;AAEzE,SAAO;GAAE;GAAS;GAAQ;;CAG5B,MAAM,aAAa,OAAO,oBAAoB,wBAAwB,CAAC,IAAI;AAC3E,KAAI,OAAO,OAAO,WAAW,CAC3B,QAAO;EACL;EACA,QAAQ,CAAC;GAAE;GAAO,QAAQ;GAAyC,CAAC;EACrE;AAGH,YAAW,MAAM,SAAS,OAAO,UAAU;EACzC,MAAM,SAAS,oBAAoB,OAAO,OAAO,MAAM;AACvD,MAAI,WAAW,OACb,QAAO,KAAK,OAAO,MAAM;OACpB;AACL,OACE,QAAQ,SAAS,gBACjB,2BAA2B,IAAI,OAAO,MAAM,KAAK,EACjD;AACA,WAAO,KAAK;KACV;KACA;KACA,QAAQ,IAAI,OAAO,MAAM,KAAK;KAC/B,CAAC;AACF;;AAEF,WAAQ,KAAK,OAAO,MAAM;;GAE5B;AAEF,QAAO;EAAE;EAAS;EAAQ;;AAG5B,SAAS,oBACP,OACA,OACA,OAC4D;AAC5D,KAAI,OAAO,UAAU,UAAU;AAC7B,MAAI,OAAO,OAAO,OAAO,oBAAoB,mBAAmB,CAAC,MAAM,CAAC,CACtE,QAAO,EAAE,OAAO;GAAE;GAAO;GAAO,QAAQ;GAAkC,EAAE;AAE9E,SAAO,EAAE,OAAO;GAAE,MAAM;GAAO,MAAM,EAAE;GAAE,EAAE;;CAG7C,MAAM,gBAAgB,OAAO,oBAAoB,kBAAkB,CAAC,MAAM;AAC1E,KAAI,OAAO,OAAO,cAAc,CAC9B,QAAO,EACL,OAAO;EACL;EACA;EACA,QAAQ;EACT,EACF;CAGH,MAAM,YAAY,cAAc;AAChC,KAAI,UAAU,WAAW;EACvB,MAAM,OAAO,OAAO,oBAAoB,mBAAmB,CAAC,UAAU,KAAK;AAC3E,MAAI,OAAO,OAAO,KAAK,CACrB,QAAO,EACL,OAAO;GAAE;GAAO;GAAO,QAAQ;GAAqC,EACrE;AAEH,SAAO,EAAE,OAAO;GAAE,MAAM,KAAK;GAAO,MAAM;GAAW,EAAE;;CAGzD,MAAM,OAAO,OAAO,KAAK,UAAU;AACnC,KAAI,KAAK,WAAW,EAClB,QAAO,EACL,OAAO;EACL;EACA;EACA,QAAQ;EACT,EACF;CAGH,MAAM,OAAO,KAAK;CAClB,MAAM,OAAO,UAAU,SAAS,EAAE;AAClC,KAAI,SAAS,QAAQ,OAAO,SAAS,SACnC,QAAO,EACL,OAAO;EACL;EACA;EACA,QAAQ,wBAAwB,KAAK;EACtC,EACF;AAEH,QAAO,EAAE,OAAO;EAAE;EAAM;EAAM,EAAE;;;;ACzIlC,IAAa,aAAb,cAAgC,QAAQ,IAAI,yCAAyC,EAGlF,CAAC;AAEJ,IAAa,cAAb,cAAiC,QAAQ,IAAI,0CAA0C,EAGpF,CAAC;AAEJ,IAAa,aAAb,cAAgC,QAAQ,IAAI,yCAAyC,EAGlF,CAAC;AAEJ,SAAS,WAAW,MAA8C;AAChE,QAAO;EACL,MAAM,KAAK;EACX,mBAAmB,KAAK,SAAS;EACjC,cAAc,KAAK,SAAS;EAC7B;;AAGH,SAAS,mBAAmB,MAAuB;AACjD,SAAQ,OAAO,QAAW;;AAwD5B,MAAa,qBArDwB,MAAM,OACzC,YACA,OAAO,IAAI,aAAa;CACtB,MAAM,KAAK,OAAO,mBAAmB;CAErC,MAAM,YAAY,SAAiB,GAAG,KAAK,KAAK,CAAC,KAAK,OAAO,IAAI,WAAW,CAAC;AAE7E,QAAO;EACL,SAAS,MAAc,SAAkB;GACvC,MAAM,WAAW,SAAS,KAAA,IAAY,KAAA,KAAa,OAAO,UAAU,UAAU;GAC9E,MAAM,WAAW,SAAS,KAAA,IAAY,KAAA,KAAa,OAAO,UAAU,UAAU;GAC9E,MAAM,aAAa,SAAS,KAAA,IAAY,SAAS,OAAO,UAAU,UAAU;GAC5E,MAAM,aAAa,GAAG,OAAO,MAAM;IACjC,IAAI;IACJ;IACA;IACD,CAAC;AACF,OAAI,CAAC,WAAY,QAAO;AACxB,UAAO,WAAW,KAChB,OAAO,SAAS,SAAS,KAAK,CAAC,EAC/B,OAAO,SAAS,SACd,mBAAmB,KAAK,KAAK,GACzB,OAAO,OACP,OAAO,qBAAK,IAAI,MAAM,2BAA2B,OAAO,CAAC,CAC9D,CACF;;EAEH,gBAAgB,QAAgB,gBAC9B,GAAG,KAAK,QAAQ,YAAY;EAC9B,gBAAgB,SAAiB,GAAG,cAAc,MAAM,EAAE,WAAW,MAAM,CAAC;EAC5E,oBAAoB,WAAmB,GAAG,kBAAkB,EAAE,QAAQ,CAAC;EACvE,WAAW,SAAiB,GAAG,SAAS,KAAK,CAAC,KAAK,OAAO,IAAI,OAAO,KAAK,CAAC;EAC3E,WAAW,SAAiB,GAAG,eAAe,KAAK;EACnD,gBAAgB,SACd,GAAG,cAAc,KAAK,CAAC,KACrB,OAAO,SAAS,UACd,OAAO,QAAQ,QAAQ,SACrB,GAAG,KAAK,GAAG,KAAK,GAAG,OAAO,CAAC,KACzB,OAAO,KAAK,UAA0B;GACpC;GACA,mBAAmB,KAAK,SAAS;GACjC,cAAc,KAAK,SAAS;GAC7B,EAAE,CACJ,CACF,CACF,CACF;EACH,MAAM;EACN,YAAY,MAAc,aAAqB,GAAG,gBAAgB,MAAM,SAAS;EAClF;EACD,CAG8B,CAA6B,KAC7D,MAAM,QAAQ,eAAe,MAAM,CACpC;AAED,MAAa,sBAAsB,MAAM,QAAQ,aAAa;CAC5D,KAAK,OAAO,WAAW,QAAQ,KAAK,CAAC;CACrC,KAAK,OAAO,YAAY,EAAE,GAAG,QAAQ,KAAK,EAAE;CAC7C,CAAC;AAEF,MAAa,iBAAiB,MAAM,QAAQ,YAAY,EACtD,QAAQ,UACN,OAAO,IAAI;CACT,WAAWA,MAAU,MAAM;CAC3B,QAAQ,UAAU;CACnB,CAAC,EACL,CAAC;AAEF,MAAa,mBAAmB,MAAM,SACpC,oBACA,qBACA,eACD;AAED,SAAgB,WAAW,MAAyD;AAClF,QAAO,OAAO,IAAI,aAAa;AAE7B,SAAO,QAAO,OADI,YACD,KAAK,KAAK,CAAC,KAC1B,OAAO,GAAG,KAAK,EACf,OAAO,eAAe,OAAO,QAAQ,MAAM,CAAC,CAC7C;GACD;;AAGJ,SAAgB,eAAe,MAAyD;AACtF,QAAO,OAAO,IAAI,aAAa;AAE7B,SAAO,QAAO,OADI,YACD,OAAO,MAAM,UAAU,KAAK,CAAC,KAC5C,OAAO,GAAG,KAAK,EACf,OAAO,eAAe,OAAO,QAAQ,MAAM,CAAC,CAC7C;GACD"}
@@ -0,0 +1,76 @@
1
+ //#region src/evidence-types.d.ts
2
+ declare const EVIDENCE_SCHEMA_VERSION = "agent-skill-evals.evidence.v1";
3
+ interface CommandEvent {
4
+ command: string;
5
+ args: string[];
6
+ exitCode: number;
7
+ signal?: string;
8
+ stdout?: string;
9
+ stderr?: string;
10
+ startedAt: number;
11
+ durationMs: number;
12
+ }
13
+ interface FileEvent {
14
+ path: string;
15
+ op: "create" | "modify" | "delete";
16
+ }
17
+ interface ToolCallEvent {
18
+ tool: string;
19
+ provider?: string;
20
+ server?: string;
21
+ args?: unknown;
22
+ result?: unknown;
23
+ startedAt: number;
24
+ durationMs: number;
25
+ }
26
+ interface SkillLoadEvent {
27
+ skill: string;
28
+ delivery: "native" | "mcp";
29
+ provider?: string;
30
+ server?: string;
31
+ source?: string;
32
+ startedAt: number;
33
+ }
34
+ interface Usage {
35
+ inputTokens?: number;
36
+ outputTokens?: number;
37
+ totalTokens?: number;
38
+ cacheReadTokens?: number;
39
+ cacheWriteTokens?: number;
40
+ }
41
+ interface RunSummary {
42
+ runDir: string;
43
+ worldPath: string;
44
+ fixture: string;
45
+ durationMs?: number;
46
+ }
47
+ interface EvidenceSnapshot {
48
+ schemaVersion: typeof EVIDENCE_SCHEMA_VERSION;
49
+ output: string;
50
+ run: RunSummary;
51
+ commands: CommandEvent[];
52
+ filesWritten: FileEvent[];
53
+ toolCalls: ToolCallEvent[];
54
+ skillsLoaded: SkillLoadEvent[];
55
+ usage: Usage;
56
+ extensions?: Record<string, unknown>;
57
+ }
58
+ //#endregion
59
+ //#region src/internal-types.d.ts
60
+ interface AgentSkillEvalsAssertionResult {
61
+ pass: boolean;
62
+ score: number;
63
+ reason: string;
64
+ componentResults?: AgentSkillEvalsAssertionResult[];
65
+ evidence?: unknown;
66
+ }
67
+ interface EvidenceHandle {
68
+ commands(): readonly CommandEvent[];
69
+ filesWritten(): readonly FileEvent[];
70
+ toolCalls(): readonly ToolCallEvent[];
71
+ skillsLoaded(): readonly SkillLoadEvent[];
72
+ usage(): Usage;
73
+ }
74
+ //#endregion
75
+ export { FileEvent as a, Usage as c, EvidenceSnapshot as i, EvidenceHandle as n, SkillLoadEvent as o, CommandEvent as r, ToolCallEvent as s, AgentSkillEvalsAssertionResult as t };
76
+ //# sourceMappingURL=internal-services-DbsekQ_K.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"internal-services-DbsekQ_K.d.mts","names":[],"sources":["../src/evidence-types.ts","../src/internal-types.ts"],"mappings":";cAAa,uBAAA;AAAA,UAEI,YAAA;EACf,OAAA;EACA,IAAA;EACA,QAAA;EACA,MAAA;EACA,MAAA;EACA,MAAA;EACA,SAAA;EACA,UAAA;AAAA;AAAA,UAGe,SAAA;EACf,IAAA;EACA,EAAA;AAAA;AAAA,UAGe,aAAA;EACf,IAAA;EACA,QAAA;EACA,MAAA;EACA,IAAA;EACA,MAAA;EACA,SAAA;EACA,UAAA;AAAA;AAAA,UAGe,cAAA;EACf,KAAA;EACA,QAAA;EACA,QAAA;EACA,MAAA;EACA,MAAA;EACA,SAAA;AAAA;AAAA,UAGe,KAAA;EACf,WAAA;EACA,YAAA;EACA,WAAA;EACA,eAAA;EACA,gBAAA;AAAA;AAAA,UAGe,UAAA;EACf,MAAA;EACA,SAAA;EACA,OAAA;EACA,UAAA;AAAA;AAAA,UAGe,gBAAA;EACf,aAAA,SAAsB,uBAAA;EACtB,MAAA;EACA,GAAA,EAAK,UAAA;EACL,QAAA,EAAU,YAAA;EACV,YAAA,EAAc,SAAA;EACd,SAAA,EAAW,aAAA;EACX,YAAA,EAAc,cAAA;EACd,KAAA,EAAO,KAAA;EACP,UAAA,GAAa,MAAA;AAAA;;;UClDE,8BAAA;EACf,IAAA;EACA,KAAA;EACA,MAAA;EACA,gBAAA,GAAmB,8BAAA;EACnB,QAAA;AAAA;AAAA,UAee,cAAA;EACf,QAAA,aAAqB,YAAA;EACrB,YAAA,aAAyB,SAAA;EACzB,SAAA,aAAsB,aAAA;EACtB,YAAA,aAAyB,cAAA;EACzB,KAAA,IAAS,KAAA;AAAA"}