npm - safestar - Versions diffs - 1.0.0 - Mend

safestar 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/evaluator.js ADDED Viewed

@@ -0,0 +1,48 @@
+export function evaluate(scenario, currentRuns, baselineRuns) {
+    const report = {
+        scenario: scenario.name,
+        status: 'PASS',
+        length: { baseline: 0, current: 0, deltaPercent: 0 },
+        variance: { score: 0 },
+        violations: []
+    };
+    const currentLengths = currentRuns.map(r => r.length);
+    const avgCurrent = currentLengths.reduce((a, b) => a + b, 0) / currentLengths.length;
+    const squareDiffs = currentLengths.map(v => Math.pow(v - avgCurrent, 2));
+    const variance = Math.sqrt(squareDiffs.reduce((a, b) => a + b, 0) / currentLengths.length);
+    report.length.current = Math.round(avgCurrent);
+    report.variance.score = parseFloat(variance.toFixed(2));
+    if (baselineRuns) {
+        const baseLengths = baselineRuns.map(r => r.length);
+        const avgBase = baseLengths.reduce((a, b) => a + b, 0) / baseLengths.length;
+        report.length.baseline = Math.round(avgBase);
+        if (avgBase > 0) {
+            report.length.deltaPercent = Math.round(((avgCurrent - avgBase) / avgBase) * 100);
+        }
+        if (Math.abs(report.length.deltaPercent) > 50) {
+            report.status = 'WARN';
+        }
+    }
+    const violations = {};
+    currentRuns.forEach(run => {
+        if (scenario.checks?.max_length && run.length > scenario.checks.max_length) {
+            violations['max_length'] = (violations['max_length'] || 0) + 1;
+        }
+        scenario.checks?.must_contain?.forEach(word => {
+            if (!run.output.toLowerCase().includes(word.toLowerCase())) {
+                violations[`must_contain: "${word}"`] = (violations[`must_contain: "${word}"`] || 0) + 1;
+            }
+        });
+        scenario.checks?.must_not_contain?.forEach(word => {
+            if (run.output.toLowerCase().includes(word.toLowerCase())) {
+                violations[`must_not_contain: "${word}"`] = (violations[`must_not_contain: "${word}"`] || 0) + 1;
+            }
+        });
+    });
+    Object.entries(violations).forEach(([check, count]) => {
+        report.violations.push({ check, count });
+        if (count > 0)
+            report.status = 'FAIL';
+    });
+    return report;
+}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+import { Command } from 'commander';
+import chalk from 'chalk';
+import { ensureDirs, loadScenario, saveRuns, saveBaseline, loadBaseline, loadLatestRun } from './utils.js';
+import { runScenario } from './runner.js';
+import { evaluate } from './evaluator.js';
+const program = new Command();
+program
+    .name('safestar')
+    .description('Snapshot and diff AI behavior')
+    .version('1.0.0');
+// COMMAND: RUN
+program.command('run <scenarioPath>')
+    .description('Execute a scenario and save runs locally')
+    .action(async (scenarioPath) => {
+    try {
+        ensureDirs();
+        const scenario = loadScenario(scenarioPath);
+        const results = await runScenario(scenario);
+        const savedPath = saveRuns(scenario.name, results);
+        console.log(chalk.green(`✓ Runs completed. Saved to ${savedPath}`));
+        // Auto-run diff logic to show immediate feedback
+        const baseline = loadBaseline(scenario.name);
+        const report = evaluate(scenario, results, baseline);
+        printReport(report);
+    }
+    catch (e) {
+        console.error(chalk.red('Error:'), e.message);
+    }
+});
+// COMMAND: BASELINE
+program.command('baseline <scenarioName>')
+    .description('Promote the latest run to be the new baseline')
+    .action((scenarioName) => {
+    try {
+        const latest = loadLatestRun(scenarioName);
+        if (!latest) {
+            console.log(chalk.red('No runs found. Run "safestar run <scenario>" first.'));
+            return;
+        }
+        saveBaseline(scenarioName, latest);
+        console.log(chalk.green(`✓ Baseline updated for ${scenarioName}`));
+    }
+    catch (e) {
+        console.error(chalk.red('Error:'), e.message);
+    }
+});
+// COMMAND: DIFF
+program.command('diff <scenarioPath>')
+    .description('Compare latest runs against baseline')
+    .action((scenarioPath) => {
+    try {
+        const scenario = loadScenario(scenarioPath);
+        const current = loadLatestRun(scenario.name);
+        const baseline = loadBaseline(scenario.name);
+        if (!current) {
+            console.error(chalk.red('No current runs found.'));
+            return;
+        }
+        const report = evaluate(scenario, current, baseline);
+        printReport(report);
+    }
+    catch (e) {
+        console.error(chalk.red('Error:'), e.message);
+    }
+});
+// Helper to pretty print the report
+function printReport(report) {
+    console.log(chalk.bold('\n--- SAFESTAR REPORT ---'));
+    if (report.status === 'FAIL')
+        console.log(`Status: ${chalk.red.bold('FAIL')}`);
+    else if (report.status === 'WARN')
+        console.log(`Status: ${chalk.yellow.bold('WARN')}`);
+    else
+        console.log(`Status: ${chalk.green.bold('PASS')}`);
+    console.log(`\nMetrics:`);
+    console.log(`  Avg Length: ${report.length.current} chars`);
+    if (report.length.baseline > 0) {
+        const color = report.length.deltaPercent > 0 ? chalk.yellow : chalk.blue;
+        console.log(`  Drift:      ${color(report.length.deltaPercent + '%')} vs baseline`);
+    }
+    console.log(`  Variance:   ${report.variance.score} (std dev)`);
+    if (report.violations.length > 0) {
+        console.log(chalk.red(`\nViolations:`));
+        report.violations.forEach((v) => {
+            console.log(`  - ${v.check}: failed in ${v.count} runs`);
+        });
+    }
+    else {
+        console.log(chalk.green(`\nNo heuristic violations.`));
+    }
+    console.log('-----------------------\n');
+}
+program.parse();

package/dist/runner.js ADDED Viewed

@@ -0,0 +1,34 @@
+import { execSync } from 'child_process';
+export async function runScenario(scenario) {
+    const results = [];
+    console.log(`Running scenario: ${scenario.name} (${scenario.runs} times)...`);
+    for (let i = 0; i < scenario.runs; i++) {
+        let output = "";
+        // 1. REAL MODE: If user provided an exec command
+        if (scenario.exec) {
+            try {
+                // We pass the PROMPT as an environment variable to the user's script
+                output = execSync(scenario.exec, {
+                    encoding: 'utf-8',
+                    env: { ...process.env, PROMPT: scenario.prompt },
+                    stdio: ['ignore', 'pipe', 'ignore'] // Clean output, ignore stderr
+                });
+            }
+            catch (error) {
+                console.error(`Execution failed: ${error.message}`);
+                output = "ERROR_IN_EXECUTION";
+            }
+        }
+        // 2. DEMO MODE: If no exec provided, fallback to mock (so new users can try it)
+        else {
+            output = "Mock Response " + Math.random().toString(36).substring(7);
+        }
+        results.push({
+            scenario: scenario.name,
+            output: output.trim(),
+            length: output.length,
+            timestamp: new Date().toISOString()
+        });
+    }
+    return results;
+}

package/dist/types.js ADDED Viewed

@@ -0,0 +1,15 @@
+import { z } from 'zod';
+// 1. Zod Schema for the User's YAML Scenario
+export const ScenarioSchema = z.object({
+    name: z.string(),
+    description: z.string().optional(),
+    prompt: z.string(),
+    // NEW: The command to execute (e.g., "python bot.py")
+    exec: z.string().optional(),
+    runs: z.number().int().min(1).default(5),
+    checks: z.object({
+        max_length: z.number().optional(),
+        must_contain: z.array(z.string()).optional(),
+        must_not_contain: z.array(z.string()).optional(),
+    }).optional()
+});

package/dist/utils.js ADDED Viewed

@@ -0,0 +1,47 @@
+import fs from 'fs';
+import path from 'path';
+import yaml from 'js-yaml';
+import { ScenarioSchema } from './types.js';
+const RUNS_DIR = '.safestar/runs';
+const BASELINE_DIR = '.baselines';
+export function ensureDirs() {
+    if (!fs.existsSync(RUNS_DIR))
+        fs.mkdirSync(RUNS_DIR, { recursive: true });
+    if (!fs.existsSync(BASELINE_DIR))
+        fs.mkdirSync(BASELINE_DIR, { recursive: true });
+}
+export function loadScenario(filepath) {
+    const content = fs.readFileSync(filepath, 'utf-8');
+    const raw = yaml.load(content);
+    return ScenarioSchema.parse(raw);
+}
+export function saveRuns(scenarioName, runs) {
+    const targetDir = path.join(RUNS_DIR, scenarioName);
+    if (!fs.existsSync(targetDir))
+        fs.mkdirSync(targetDir, { recursive: true });
+    const filename = `run_${Date.now()}.json`;
+    fs.writeFileSync(path.join(targetDir, filename), JSON.stringify(runs, null, 2));
+    return path.join(targetDir, filename);
+}
+export function saveBaseline(scenarioName, runs) {
+    const targetDir = path.join(BASELINE_DIR, scenarioName);
+    if (!fs.existsSync(targetDir))
+        fs.mkdirSync(targetDir, { recursive: true });
+    const filename = 'latest.json';
+    fs.writeFileSync(path.join(targetDir, filename), JSON.stringify(runs, null, 2));
+}
+export function loadBaseline(scenarioName) {
+    const filepath = path.join(BASELINE_DIR, scenarioName, 'latest.json');
+    if (!fs.existsSync(filepath))
+        return null;
+    return JSON.parse(fs.readFileSync(filepath, 'utf-8'));
+}
+export function loadLatestRun(scenarioName) {
+    const targetDir = path.join(RUNS_DIR, scenarioName);
+    if (!fs.existsSync(targetDir))
+        return null;
+    const files = fs.readdirSync(targetDir).sort().reverse();
+    if (files.length === 0)
+        return null;
+    return JSON.parse(fs.readFileSync(path.join(targetDir, files[0]), 'utf-8'));
+}

package/package.json ADDED Viewed

@@ -0,0 +1,43 @@
+{
+  "name": "safestar",
+  "version": "1.0.0",
+  "description": "Snapshot, version, and diff AI behavior over time.",
+  "main": "dist/index.js",
+  "bin": {
+    "safestar": "./dist/index.js"
+  },
+  "type": "module",
+  "files": [
+    "dist",
+    "README.md",
+    "package.json"
+  ],
+  "scripts": {
+    "dev": "tsx src/index.ts",
+    "build": "tsc",
+    "prepublishOnly": "npm run build"
+  },
+  "keywords": [
+    "ai",
+    "testing",
+    "drift",
+    "snapshot",
+    "cli"
+  ],
+  "author": "Aditya Pandey",
+  "license": "ISC",
+  "dependencies": {
+    "chalk": "^5.3.0",
+    "commander": "^11.1.0",
+    "glob": "^10.3.10",
+    "js-yaml": "^4.1.0",
+    "zod": "^3.22.4"
+  },
+  "devDependencies": {
+    "@types/node": "^20.11.0",
+    "tsx": "^4.7.0",
+    "typescript": "^5.3.3",
+    "@types/glob": "^8.1.0",
+    "@types/js-yaml": "^4.0.9"
+  }
+}