npm - stepproof - Versions diffs - 0.2.0 - Mend

stepproof 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/LICENSE +21 -0
package/README.md +276 -0
package/dist/adapters/anthropic.d.ts +8 -0
package/dist/adapters/anthropic.d.ts.map +1 -0
package/dist/adapters/anthropic.js +26 -0
package/dist/adapters/anthropic.js.map +1 -0
package/dist/adapters/base.d.ts +4 -0
package/dist/adapters/base.d.ts.map +1 -0
package/dist/adapters/base.js +2 -0
package/dist/adapters/base.js.map +1 -0
package/dist/adapters/index.d.ts +4 -0
package/dist/adapters/index.d.ts.map +1 -0
package/dist/adapters/index.js +13 -0
package/dist/adapters/index.js.map +1 -0
package/dist/adapters/openai.d.ts +8 -0
package/dist/adapters/openai.d.ts.map +1 -0
package/dist/adapters/openai.js +25 -0
package/dist/adapters/openai.js.map +1 -0
package/dist/assertions/engine.d.ts +6 -0
package/dist/assertions/engine.d.ts.map +1 -0
package/dist/assertions/engine.js +124 -0
package/dist/assertions/engine.js.map +1 -0
package/dist/cli.d.ts +3 -0
package/dist/cli.d.ts.map +1 -0
package/dist/cli.js +126 -0
package/dist/cli.js.map +1 -0
package/dist/commands/init.d.ts +2 -0
package/dist/commands/init.d.ts.map +1 -0
package/dist/commands/init.js +39 -0
package/dist/commands/init.js.map +1 -0
package/dist/core/scenario-parser.d.ts +4 -0
package/dist/core/scenario-parser.d.ts.map +1 -0
package/dist/core/scenario-parser.js +92 -0
package/dist/core/scenario-parser.js.map +1 -0
package/dist/core/scenario-runner.d.ts +11 -0
package/dist/core/scenario-runner.d.ts.map +1 -0
package/dist/core/scenario-runner.js +85 -0
package/dist/core/scenario-runner.js.map +1 -0
package/dist/core/types.d.ts +71 -0
package/dist/core/types.d.ts.map +1 -0
package/dist/core/types.js +2 -0
package/dist/core/types.js.map +1 -0
package/dist/reporters/json-reporter.d.ts +4 -0
package/dist/reporters/json-reporter.d.ts.map +1 -0
package/dist/reporters/json-reporter.js +9 -0
package/dist/reporters/json-reporter.js.map +1 -0
package/dist/reporters/junit-reporter.d.ts +3 -0
package/dist/reporters/junit-reporter.d.ts.map +1 -0
package/dist/reporters/junit-reporter.js +34 -0
package/dist/reporters/junit-reporter.js.map +1 -0
package/dist/reporters/sarif-reporter.d.ts +3 -0
package/dist/reporters/sarif-reporter.d.ts.map +1 -0
package/dist/reporters/sarif-reporter.js +47 -0
package/dist/reporters/sarif-reporter.js.map +1 -0
package/dist/reporters/terminal-reporter.d.ts +4 -0
package/dist/reporters/terminal-reporter.d.ts.map +1 -0
package/dist/reporters/terminal-reporter.js +73 -0
package/dist/reporters/terminal-reporter.js.map +1 -0
package/package.json +62 -0
package/schemas/scenario.schema.json +119 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 StanislavBG
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,276 @@
+# stepproof
+[![Part of Preflight](https://img.shields.io/badge/suite-Preflight-blue)](https://github.com/StanislavBG/agent-gate)
+[![Tests](https://img.shields.io/badge/tests-passing-brightgreen)]()
+[![License](https://img.shields.io/badge/license-MIT-green)]()
+**Regression testing for multi-step AI workflows. Not observability.**
+---
+You upgraded to `gpt-4o-mini`. Your LangSmith traces look fine. Three days later a customer reports your extraction step stopped working. You found out from a Slack message, not a test.
+stepproof is what you run before you deploy.
+```bash
+npm install -g stepproof
+```
+---
+## 30-second quickstart
+Write a scenario:
+```yaml
+# classify.yaml
+name: "Intent classification"
+iterations: 10
+steps:
+  - id: classify
+    provider: anthropic
+    model: claude-sonnet-4-6
+    prompt: "Classify the intent of this message: {{input}}"
+    variables:
+      input: "I need to cancel my subscription"
+    min_pass_rate: 0.90
+    assertions:
+      - type: contains
+        value: "cancel"
+      - type: json_schema
+        schema: ./schemas/intent.json
+  - id: respond
+    provider: openai
+    model: gpt-4o
+    prompt: "Given intent '{{classify.output}}', write a helpful reply to: {{input}}"
+    min_pass_rate: 0.80
+    assertions:
+      - type: llm_judge
+        prompt: "Is this response helpful and on-topic? Answer yes/no."
+        pass_on: "yes"
+```
+Run it:
+```
+stepproof run classify.yaml
+```
+Output:
+```
+stepproof v0.2.0 — running "Intent classification" (10 iterations)
+  step: classify
+    ✓ 9/10 passed (90.0%) — threshold: 90% ✓
+  step: respond
+    ✓ 8/10 passed (80.0%) — threshold: 80% ✓
+All steps passed. Exit 0.
+```
+Now break it — swap to a cheaper model, lower the pass rate. It fails:
+```
+  step: classify
+    ✗ 5/10 passed (50.0%) — threshold: 90% ✗
+1 step failed. Exit 1.
+```
+---
+## Commands
+### `stepproof run <scenario>`
+Run a scenario file or directory of scenarios.
+```bash
+stepproof run classify.yaml
+stepproof run scenarios/
+stepproof run scenarios/ --format sarif --output results.sarif
+stepproof run scenarios/ --format junit --output results.xml
+```
+Flags:
+- `--format <format>` — output format: `terminal` (default), `sarif`, `junit`
+- `--output <file>` — write output to file instead of stdout
+### `stepproof init [dir]`
+Scaffold a starter scenario in the target directory. Defaults to `./scenarios/`.
+```bash
+stepproof init
+# Creates: ./scenarios/first-test.yaml
+stepproof init my-tests
+# Creates: ./my-tests/first-test.yaml
+```
+The generated `first-test.yaml` is a working example you can edit and run immediately.
+---
+## Environment Variables
+| Variable | Required | Purpose |
+|----------|----------|---------|
+| `ANTHROPIC_API_KEY` | For Anthropic steps | Authenticates calls to Claude models |
+| `OPENAI_API_KEY` | For OpenAI steps | Authenticates calls to GPT models |
+Only the keys for the providers you use in your scenarios are required.
+---
+## CI integration
+```yaml
+# .github/workflows/ai-regression.yml
+name: AI regression tests
+on: [push, pull_request]
+jobs:
+  stepproof:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - run: npm install -g stepproof
+      - run: stepproof run scenarios/classify.yaml
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+```
+Exit code 1 on regression. PR blocked. Done.
+---
+## Assertions
+| Type | What it checks |
+|------|---------------|
+| `contains` | Output includes this string |
+| `not_contains` | Output does not include this string |
+| `regex` | Output matches this pattern |
+| `json_schema` | Output is valid JSON matching this schema |
+| `llm_judge` | A second LLM call evaluates the output (boolean verdict) |
+---
+## Structured reports (v0.2.0)
+stepproof outputs machine-readable SARIF 2.1.0 and JUnit XML for CI pipeline integration.
+### SARIF — GitHub Advanced Security / GitLab / Azure DevOps
+```bash
+# Write SARIF to stdout
+stepproof run classify.yaml --format sarif
+# Write SARIF to file
+stepproof run classify.yaml --format sarif --output results.sarif
+```
+Integrate with GitHub Advanced Security:
+```yaml
+# .github/workflows/ai-regression.yml
+- name: Run stepproof
+  run: stepproof run scenarios/ --format sarif --output results.sarif
+- name: Upload to GitHub Security tab
+  uses: github/codeql-action/upload-sarif@v3
+  with:
+    sarif_file: results.sarif
+  if: always()
+```
+### JUnit XML — Jenkins / CircleCI / TeamCity
+```bash
+stepproof run classify.yaml --format junit
+stepproof run classify.yaml --format junit --output results.xml
+```
+```yaml
+# .github/workflows/ai-regression.yml
+- name: Run stepproof
+  run: stepproof run scenarios/ --format junit --output test-results.xml
+- name: Publish test results
+  uses: actions/upload-artifact@v4
+  with:
+    name: test-results
+    path: test-results.xml
+  if: always()
+```
+Default output (no `--format` flag) is unchanged — human-readable terminal output.
+> **Migration note (v0.2.x → v0.3.0):** `--report` still works but is deprecated and will print a warning. Switch to `--format` at your next convenience. `--report` will be removed at v1.0.0.
+---
+## How this is different from LangSmith / Braintrust / Langfuse
+| | stepproof | LangSmith / Braintrust |
+|--|-----------|------------------------|
+| When it runs | Before deploy (CI) | After deploy (production) |
+| What it answers | "Is my pipeline still correct?" | "What did my pipeline do?" |
+| Output | Pass/fail with exit code | Traces and dashboards |
+| Use case | Regression testing | Observability |
+They tell you what happened. We tell you whether to deploy.
+These are different jobs. Use both.
+---
+## Scenarios
+See [`/examples`](./examples) for copy-paste ready scenarios:
+- [`simple-chain.yaml`](./examples/simple-chain.yaml) — basic prompt → response → assertion
+- [`tool-calling.yaml`](./examples/tool-calling.yaml) — verify tool selection and output
+- [`multi-turn.yaml`](./examples/multi-turn.yaml) — conversation with memory, verify consistency
+---
+## Roadmap
+- **v0.2.0** (current): YAML scenarios, N iterations, 5 assertion types, exit code 1 on failure, OpenAI + Anthropic, SARIF 2.1.0 + JUnit XML reporters, `stepproof init` scaffolding
+- **v0.3.0** (next): Baseline comparison (fail on regression from last run), GitHub Actions native action, provider comparison mode — run the same scenario against two models and diff the results
+- **Cloud dashboard** (month 3–6): Persistent history, trend charts, team workspaces — never in the CLI
+---
+## Contributing
+Issues and PRs welcome. See [CONTRIBUTING.md](./CONTRIBUTING.md) for dev setup and guidelines. The tool is and will remain free. Cloud features are the business model, not the CLI.
+---
+## Part of the Preflight suite
+stepproof is one tool in the **Preflight** AI Agent DevOps suite — local-first CLIs covering the full lifecycle from pre-deploy validation to production observability:
+| Tool | Purpose | Install |
+|------|---------|---------|
+| **stepproof** | Behavioral regression testing | `npm install -g stepproof` |
+| **agent-comply** | EU AI Act compliance scanning | `npm install -g agent-comply` |
+| **agent-gate** | Unified pre-deploy CI gate | `npm install -g agent-gate` |
+| **agent-shift** | Config versioning + environment promotion | `npm install -g agent-shift` |
+| **agent-trace** | Local observability — OTel traces in SQLite | `npm install -g agent-trace` |
+Install the full suite:
+```bash
+npm install -g agent-gate stepproof agent-comply agent-shift agent-trace
+```
+---
+*stepproof — because "I checked manually before the deploy" is not a test.*

package/dist/adapters/anthropic.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { ProviderAdapter } from './base.js';
+export declare class AnthropicAdapter implements ProviderAdapter {
+    private client;
+    private model;
+    constructor(model: string);
+    call(prompt: string, system?: string): Promise<string>;
+}
+//# sourceMappingURL=anthropic.d.ts.map

package/dist/adapters/anthropic.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/adapters/anthropic.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,KAAK,CAAS;gBAEV,KAAK,EAAE,MAAM;IAQnB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAe7D"}

package/dist/adapters/anthropic.js ADDED Viewed

@@ -0,0 +1,26 @@
+import Anthropic from '@anthropic-ai/sdk';
+export class AnthropicAdapter {
+    client;
+    model;
+    constructor(model) {
+        this.model = model;
+        if (!process.env.ANTHROPIC_API_KEY) {
+            throw new Error('ANTHROPIC_API_KEY environment variable is required for Anthropic provider');
+        }
+        this.client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
+    }
+    async call(prompt, system) {
+        const response = await this.client.messages.create({
+            model: this.model,
+            max_tokens: 1024,
+            ...(system && { system }),
+            messages: [{ role: 'user', content: prompt }],
+        });
+        const content = response.content[0];
+        if (content?.type === 'text') {
+            return content.text;
+        }
+        return '';
+    }
+}
+//# sourceMappingURL=anthropic.js.map

package/dist/adapters/anthropic.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"anthropic.js","sourceRoot":"","sources":["../../src/adapters/anthropic.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAG1C,MAAM,OAAO,gBAAgB;IACnB,MAAM,CAAY;IAClB,KAAK,CAAS;IAEtB,YAAY,KAAa;QACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,2EAA2E,CAAC,CAAC;QAC/F,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,MAAe;QACxC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YACjD,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI;YAChB,GAAG,CAAC,MAAM,IAAI,EAAE,MAAM,EAAE,CAAC;YACzB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;SAC9C,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,IAAI,OAAO,EAAE,IAAI,KAAK,MAAM,EAAE,CAAC;YAC7B,OAAO,OAAO,CAAC,IAAI,CAAC;QACtB,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;CACF"}

package/dist/adapters/base.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export interface ProviderAdapter {
+    call(prompt: string, system?: string): Promise<string>;
+}
+//# sourceMappingURL=base.d.ts.map

package/dist/adapters/base.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/adapters/base.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,eAAe;IAC9B,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACxD"}

package/dist/adapters/base.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=base.js.map

package/dist/adapters/base.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"base.js","sourceRoot":"","sources":["../../src/adapters/base.ts"],"names":[],"mappings":""}

package/dist/adapters/index.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import type { ProviderAdapter } from './base.js';
+export declare function getAdapter(provider: string, model: string): ProviderAdapter;
+export type { ProviderAdapter };
+//# sourceMappingURL=index.d.ts.map

package/dist/adapters/index.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,eAAe,CAS3E;AAED,YAAY,EAAE,eAAe,EAAE,CAAC"}

package/dist/adapters/index.js ADDED Viewed

@@ -0,0 +1,13 @@
+import { OpenAIAdapter } from './openai.js';
+import { AnthropicAdapter } from './anthropic.js';
+export function getAdapter(provider, model) {
+    switch (provider) {
+        case 'openai':
+            return new OpenAIAdapter(model);
+        case 'anthropic':
+            return new AnthropicAdapter(model);
+        default:
+            throw new Error(`Unknown provider: "${provider}". Supported providers: openai, anthropic`);
+    }
+}
+//# sourceMappingURL=index.js.map

package/dist/adapters/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/adapters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAGlD,MAAM,UAAU,UAAU,CAAC,QAAgB,EAAE,KAAa;IACxD,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ;YACX,OAAO,IAAI,aAAa,CAAC,KAAK,CAAC,CAAC;QAClC,KAAK,WAAW;YACd,OAAO,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC;QACrC;YACE,MAAM,IAAI,KAAK,CAAC,sBAAsB,QAAQ,2CAA2C,CAAC,CAAC;IAC/F,CAAC;AACH,CAAC"}

package/dist/adapters/openai.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import type { ProviderAdapter } from './base.js';
+export declare class OpenAIAdapter implements ProviderAdapter {
+    private client;
+    private model;
+    constructor(model: string);
+    call(prompt: string, system?: string): Promise<string>;
+}
+//# sourceMappingURL=openai.d.ts.map

package/dist/adapters/openai.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../src/adapters/openai.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,qBAAa,aAAc,YAAW,eAAe;IACnD,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAS;gBAEV,KAAK,EAAE,MAAM;IAQnB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAe7D"}

package/dist/adapters/openai.js ADDED Viewed

@@ -0,0 +1,25 @@
+import OpenAI from 'openai';
+export class OpenAIAdapter {
+    client;
+    model;
+    constructor(model) {
+        this.model = model;
+        if (!process.env.OPENAI_API_KEY) {
+            throw new Error('OPENAI_API_KEY environment variable is required for OpenAI provider');
+        }
+        this.client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
+    }
+    async call(prompt, system) {
+        const messages = [];
+        if (system) {
+            messages.push({ role: 'system', content: system });
+        }
+        messages.push({ role: 'user', content: prompt });
+        const response = await this.client.chat.completions.create({
+            model: this.model,
+            messages,
+        });
+        return response.choices[0]?.message?.content ?? '';
+    }
+}
+//# sourceMappingURL=openai.js.map

package/dist/adapters/openai.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"openai.js","sourceRoot":"","sources":["../../src/adapters/openai.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,QAAQ,CAAC;AAG5B,MAAM,OAAO,aAAa;IAChB,MAAM,CAAS;IACf,KAAK,CAAS;IAEtB,YAAY,KAAa;QACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CAAC,qEAAqE,CAAC,CAAC;QACzF,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,MAAe;QACxC,MAAM,QAAQ,GAA6C,EAAE,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC;YACX,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QAEjD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;YACzD,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,QAAQ;SACT,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;IACrD,CAAC;CACF"}

package/dist/assertions/engine.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import type { Assertion, AssertionResult } from '../core/types.js';
+export declare function runAssertions(output: string, assertions: Assertion[], scenarioDir: string): Promise<{
+    results: AssertionResult[];
+    allPassed: boolean;
+}>;
+//# sourceMappingURL=engine.d.ts.map

package/dist/assertions/engine.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/assertions/engine.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAInE,wBAAsB,aAAa,CACjC,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,SAAS,EAAE,EACvB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC;IAAE,OAAO,EAAE,eAAe,EAAE,CAAC;IAAC,SAAS,EAAE,OAAO,CAAA;CAAE,CAAC,CAU7D"}

package/dist/assertions/engine.js ADDED Viewed

@@ -0,0 +1,124 @@
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+import { Ajv as AjvClass } from 'ajv';
+import { getAdapter } from '../adapters/index.js';
+const ajv = new AjvClass({ allErrors: true });
+export async function runAssertions(output, assertions, scenarioDir) {
+    const results = [];
+    for (const assertion of assertions) {
+        const result = await runAssertion(output, assertion, scenarioDir);
+        results.push(result);
+    }
+    const allPassed = results.every((r) => r.passed);
+    return { results, allPassed };
+}
+async function runAssertion(output, assertion, scenarioDir) {
+    const { type } = assertion;
+    switch (type) {
+        case 'contains': {
+            if (!assertion.value) {
+                return fail(type, 'Missing required field "value"');
+            }
+            const passed = output.toLowerCase().includes(assertion.value.toLowerCase());
+            return passed
+                ? pass(type)
+                : fail(type, `Expected output to contain: "${assertion.value}"`);
+        }
+        case 'not_contains': {
+            if (!assertion.value) {
+                return fail(type, 'Missing required field "value"');
+            }
+            const passed = !output.toLowerCase().includes(assertion.value.toLowerCase());
+            return passed
+                ? pass(type)
+                : fail(type, `Expected output NOT to contain: "${assertion.value}"`);
+        }
+        case 'regex': {
+            if (!assertion.value) {
+                return fail(type, 'Missing required field "value" (regex pattern)');
+            }
+            let regex;
+            try {
+                regex = new RegExp(assertion.value, 'i');
+            }
+            catch (e) {
+                return fail(type, `Invalid regex pattern: "${assertion.value}"`);
+            }
+            const passed = regex.test(output);
+            return passed
+                ? pass(type)
+                : fail(type, `Output did not match pattern: ${assertion.value}`);
+        }
+        case 'json_schema': {
+            if (!assertion.schema) {
+                return fail(type, 'Missing required field "schema" (path to JSON schema file)');
+            }
+            let parsed;
+            try {
+                parsed = JSON.parse(output);
+            }
+            catch {
+                return fail(type, `Output is not valid JSON`);
+            }
+            const schemaPath = path.resolve(scenarioDir, assertion.schema);
+            let schema;
+            try {
+                schema = JSON.parse(fs.readFileSync(schemaPath, 'utf-8'));
+            }
+            catch (e) {
+                return fail(type, `Cannot read schema file: ${assertion.schema}`);
+            }
+            let validate;
+            try {
+                validate = ajv.compile(schema);
+            }
+            catch (e) {
+                return fail(type, `Invalid JSON schema: ${e.message}`);
+            }
+            const valid = validate(parsed);
+            if (valid) {
+                return pass(type);
+            }
+            const errors = ajv.errorsText(validate.errors, { separator: '; ' });
+            return fail(type, `Schema validation failed: ${errors}`);
+        }
+        case 'llm_judge': {
+            if (!assertion.prompt) {
+                return fail(type, 'Missing required field "prompt" for llm_judge assertion');
+            }
+            const passOn = (assertion.pass_on ?? 'yes').toLowerCase().trim();
+            const provider = assertion.provider ?? 'anthropic';
+            const model = assertion.model ?? (provider === 'anthropic' ? 'claude-haiku-4-5-20251001' : 'gpt-4o-mini');
+            let adapter;
+            try {
+                adapter = getAdapter(provider, model);
+            }
+            catch (e) {
+                return fail(type, `Cannot create LLM judge adapter: ${e.message}`);
+            }
+            const judgePrompt = `${assertion.prompt}\n\nText to evaluate:\n---\n${output}\n---\n\nAnswer with a single word.`;
+            let judgeResponse;
+            try {
+                judgeResponse = await adapter.call(judgePrompt);
+            }
+            catch (e) {
+                return fail(type, `LLM judge API call failed: ${e.message}`);
+            }
+            const normalizedResponse = judgeResponse.trim().toLowerCase();
+            const passed = normalizedResponse.startsWith(passOn);
+            return passed
+                ? pass(type)
+                : fail(type, `LLM judge responded "${judgeResponse.trim()}" (expected to start with: "${passOn}")`);
+        }
+        default: {
+            return fail(type, `Unknown assertion type: "${type}". Valid types: contains, not_contains, regex, json_schema, llm_judge`);
+        }
+    }
+}
+function pass(type) {
+    return { type, passed: true };
+}
+function fail(type, message) {
+    return { type, passed: false, message };
+}
+//# sourceMappingURL=engine.js.map

package/dist/assertions/engine.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"engine.js","sourceRoot":"","sources":["../../src/assertions/engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,GAAG,IAAI,QAAQ,EAAE,MAAM,KAAK,CAAC;AAEtC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAGlD,MAAM,GAAG,GAAG,IAAI,QAAQ,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,MAAc,EACd,UAAuB,EACvB,WAAmB;IAEnB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;QAClE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IACjD,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,KAAK,UAAU,YAAY,CACzB,MAAc,EACd,SAAoB,EACpB,WAAmB;IAEnB,MAAM,EAAE,IAAI,EAAE,GAAG,SAAS,CAAC;IAE3B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC,IAAI,EAAE,gCAAgC,CAAC,CAAC;YACtD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;YAC5E,OAAO,MAAM;gBACX,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACZ,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,gCAAgC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;QACrE,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC,IAAI,EAAE,gCAAgC,CAAC,CAAC;YACtD,CAAC;YACD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7E,OAAO,MAAM;gBACX,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACZ,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,oCAAoC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;QACzE,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC,IAAI,EAAE,gDAAgD,CAAC,CAAC;YACtE,CAAC;YACD,IAAI,KAAa,CAAC;YAClB,IAAI,CAAC;gBACH,KAAK,GAAG,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAC3C,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC,IAAI,EAAE,2BAA2B,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC;YACnE,CAAC;YACD,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAClC,OAAO,MAAM;gBACX,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACZ,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,iCAAiC,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC;QACrE,CAAC;QAED,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,IAAI,EAAE,4DAA4D,CAAC,CAAC;YAClF,CAAC;YAED,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC9B,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC,IAAI,EAAE,0BAA0B,CAAC,CAAC;YAChD,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YAC/D,IAAI,MAAc,CAAC;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC;YAC5D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC,IAAI,EAAE,4BAA4B,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;YACpE,CAAC;YAED,IAAI,QAA0B,CAAC;YAC/B,IAAI,CAAC;gBACH,QAAQ,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACjC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC,IAAI,EAAE,wBAAyB,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YACpE,CAAC;YAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,IAAI,KAAK,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC;YACpB,CAAC;YACD,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC,IAAI,EAAE,6BAA6B,MAAM,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,IAAI,EAAE,yDAAyD,CAAC,CAAC;YAC/E,CAAC;YAED,MAAM,MAAM,GAAG,CAAC,SAAS,CAAC,OAAO,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;YACjE,MAAM,QAAQ,GAAG,SAAS,CAAC,QAAQ,IAAI,WAAW,CAAC;YACnD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,CAAC,QAAQ,KAAK,WAAW,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;YAE1G,IAAI,OAAO,CAAC;YACZ,IAAI,CAAC;gBACH,OAAO,GAAG,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;YACxC,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC,IAAI,EAAE,oCAAqC,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YAChF,CAAC;YAED,MAAM,WAAW,GAAG,GAAG,SAAS,CAAC,MAAM,+BAA+B,MAAM,qCAAqC,CAAC;YAElH,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAClD,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC,IAAI,EAAE,8BAA+B,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;YAC1E,CAAC;YAED,MAAM,kBAAkB,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC9D,MAAM,MAAM,GAAG,kBAAkB,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;YAErD,OAAO,MAAM;gBACX,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACZ,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,wBAAwB,aAAa,CAAC,IAAI,EAAE,+BAA+B,MAAM,IAAI,CAAC,CAAC;QACxG,CAAC;QAED,OAAO,CAAC,CAAC,CAAC;YACR,OAAO,IAAI,CAAC,IAAc,EAAE,4BAA4B,IAAI,uEAAuE,CAAC,CAAC;QACvI,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,IAAI,CAAC,IAAY;IACxB,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;AAChC,CAAC;AAED,SAAS,IAAI,CAAC,IAAY,EAAE,OAAe;IACzC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;AAC1C,CAAC"}

package/dist/cli.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+#!/usr/bin/env node
+export {};
+//# sourceMappingURL=cli.d.ts.map

package/dist/cli.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}