npm - @evalgate/sdk - Versions diffs - 2.1.3 → 2.2.0 - Mend

@evalgate/sdk 2.1.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +25 -0
package/README.md +30 -10
package/dist/assertions.d.ts +21 -0
package/dist/assertions.js +43 -2
package/dist/cli/baseline.js +31 -6
package/dist/cli/discover.js +6 -7
package/dist/cli/explain.js +68 -1
package/dist/cli/impact-analysis.js +12 -1
package/dist/cli/print-config.js +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +3 -2
package/dist/runtime/eval.d.ts +12 -3
package/dist/runtime/eval.js +15 -6
package/dist/snapshot.d.ts +3 -3
package/dist/snapshot.js +3 -3
package/dist/version.d.ts +2 -2
package/dist/version.js +2 -2
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,31 @@ All notable changes to the @evalgate/sdk package will be documented in this file
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [2.2.0] - 2026-03-03
+### Breaking
+- **`snapshot(output, name)` → `snapshot(name, output)`** — parameter order swapped to match natural call convention (`name` first, value second, same as `test('name', fn)`). Update any existing `snapshot(output, 'label')` calls to `snapshot('label', output)`.
+### Added
+- **`expect().not` modifier** — `expect('drop table').not.toContain('drop table')` now works; negates `passed` on any chained assertion via Proxy
+- **`hasPII(text)`** — semantic inverse of `notContainsPII`; returns `true` when PII is detected (email, phone, SSN, IP). Exported from main package. Eliminates double-negative confusion.
+- **`defineSuite` object form** — now accepts both `defineSuite(name, [...fns])` and `defineSuite({ name, specs: [...fns] })`. README updated with examples.
+### Fixed
+- **`specId` collision** — all specs in `eval/` directory shared the same 8-char ID (`ZXZhbC9j`). Root cause: short base64 prefix was identical for any path starting with `eval/c`. Fixed: SHA-256 hex (16 chars) in `discover.ts`.
+- **`explain` UNKNOWN verdict** — `evalgate explain` showed `Verdict: UNKNOWN` when reading `.evalgate/last-run.json`. Added `RunResult` format detection (`results[]` + `summary`). Added `.evalgate/last-run.json` and `.evalgate/runs/latest.json` to auto-search paths. Passing runs now show clean `✅ PASS` with no spurious "Run doctor" suggestions.
+- **`print-config` baseUrl default** — was `http://localhost:3000`; now `https://api.evalgate.com` to match `evalgate doctor`.
+- **`baseline update` self-contained** — no longer requires a custom `eval:baseline-update` npm script. Falls back to built-in mode (runs `pm test`, stamps baseline) if no script is present.
+- **`notContainsPII` phone regex** — broadened to cover `555-123-4567`, `555.123.4567`, and `555 123 4567` formats. JSDoc clarified: `false` = PII found (unsafe), `true` = no PII (safe).
+- **`impact-analysis` git error** — replaced raw `git diff --help` wall-of-text with clean targeted messages: `Not a git repository`, `Base branch 'X' not found. Fetch it first`, or generic exit-code message.
+- **README quickstart** — both `defineEval` examples now include an `executor` function. Running the quickstart no longer throws `Executor must be a function`.
+- **`snapshot` module docstring** — updated `@example` to reflect new `(name, output)` parameter order.
+---
 ## [2.1.3] - 2026-03-02
 ### Fixed

package/README.md CHANGED Viewed

@@ -40,13 +40,20 @@ Create `eval/your-spec.spec.ts`:
 ```typescript
 import { defineEval } from "@evalgate/sdk";
+defineEval("Basic Math Operations", async () => {
+  const result = 1 + 1;
+  return { pass: result === 2, score: result === 2 ? 100 : 0 };
+});
+// Object form (with metadata):
 defineEval({
-  name: "Basic Math Operations",
-  description: "Test fundamental arithmetic",
-  prompt: "Test: 1+1=2, string concatenation, array includes",
-  expected: "All tests should pass",
+  name: "String concatenation",
+  description: "Test string operations",
   tags: ["basic", "math"],
-  category: "unit-test"
+  executor: async () => {
+    const result = "hello" + " world";
+    return { pass: result === "hello world", score: 100 };
+  },
 });
 ```
@@ -259,14 +266,27 @@ Create `eval/your-spec.spec.ts`:
 ```typescript
 import { defineEval } from "@evalgate/sdk";
+defineEval("Basic Math Operations", async () => {
+  const result = 1 + 1;
+  return { pass: result === 2, score: result === 2 ? 100 : 0 };
+});
+// Object form (with metadata):
 defineEval({
-  name: "Basic Math Operations",
-  description: "Test fundamental arithmetic",
-  prompt: "Test: 1+1=2, string concatenation, array includes",
-  expected: "All tests should pass",
+  name: "String concatenation",
+  description: "Test string operations",
   tags: ["basic", "math"],
-  category: "unit-test"
+  executor: async () => {
+    const result = "hello" + " world";
+    return { pass: result === "hello world", score: 100 };
+  },
 });
+// Suite form — group related specs:
+defineSuite("Math suite", [
+  () => defineEval("addition", async () => ({ pass: 1 + 1 === 2, score: 100 })),
+  () => defineEval("subtraction", async () => ({ pass: 5 - 3 === 2, score: 100 })),
+]);
 ```
 ```bash

package/dist/assertions.d.ts CHANGED Viewed

@@ -32,6 +32,11 @@ export declare class AssertionError extends Error {
 export declare class Expectation {
     private value;
     constructor(value: unknown);
+    /**
+     * Negate the next assertion — inverts `passed` on any chained method.
+     * @example expect('drop table').not.toContain('drop table')
+     */
+    get not(): Expectation;
     /**
      * Assert value equals expected
      * @example expect(output).toEqual("Hello")
@@ -171,7 +176,23 @@ export declare function hasLength(text: string, range: {
     max?: number;
 }): boolean;
 export declare function containsJSON(text: string): boolean;
+/**
+ * Returns `true` when the text is PII-free (safe to use), `false` when PII is detected.
+ *
+ * @example
+ * if (!notContainsPII(response)) throw new Error("PII leak detected");
+ * // Or use the clearer alias:
+ * if (hasPII(response)) throw new Error("PII leak detected");
+ */
 export declare function notContainsPII(text: string): boolean;
+/**
+ * Returns `true` when PII is detected in the text (unsafe), `false` when safe.
+ * This is the semantic inverse of `notContainsPII` and may be easier to reason about.
+ *
+ * @example
+ * if (hasPII(response)) throw new Error("PII leak");
+ */
+export declare function hasPII(text: string): boolean;
 export declare function hasSentiment(text: string, expected: "positive" | "negative" | "neutral"): boolean;
 export declare function similarTo(text1: string, text2: string, threshold?: number): boolean;
 export declare function withinRange(value: number, min: number, max: number): boolean;

package/dist/assertions.js CHANGED Viewed

@@ -24,6 +24,7 @@ exports.matchesPattern = matchesPattern;
 exports.hasLength = hasLength;
 exports.containsJSON = containsJSON;
 exports.notContainsPII = notContainsPII;
+exports.hasPII = hasPII;
 exports.hasSentiment = hasSentiment;
 exports.similarTo = similarTo;
 exports.withinRange = withinRange;
@@ -56,6 +57,28 @@ class Expectation {
     constructor(value) {
         this.value = value;
     }
+    /**
+     * Negate the next assertion — inverts `passed` on any chained method.
+     * @example expect('drop table').not.toContain('drop table')
+     */
+    get not() {
+        const value = this.value;
+        return new Proxy(new Expectation(value), {
+            get(target, prop) {
+                const orig = target[prop];
+                if (typeof orig === "function" && prop !== "constructor") {
+                    return (...args) => {
+                        const result = orig.call(target, ...args);
+                        if (result && typeof result === "object" && "passed" in result) {
+                            return { ...result, passed: !result.passed };
+                        }
+                        return result;
+                    };
+                }
+                return orig;
+            },
+        });
+    }
     /**
      * Assert value equals expected
      * @example expect(output).toEqual("Hello")
@@ -539,17 +562,35 @@ function containsJSON(text) {
         return false;
     }
 }
+/**
+ * Returns `true` when the text is PII-free (safe to use), `false` when PII is detected.
+ *
+ * @example
+ * if (!notContainsPII(response)) throw new Error("PII leak detected");
+ * // Or use the clearer alias:
+ * if (hasPII(response)) throw new Error("PII leak detected");
+ */
 function notContainsPII(text) {
     // Simple PII detection patterns
     const piiPatterns = [
         /\b\d{3}-\d{2}-\d{4}\b/, // SSN
         /\b\d{3}\.\d{3}\.\d{4}\b/, // SSN with dots
-        /\b\d{10}\b/, // Phone number
-        /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, // Email
+        /\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b/, // Phone (various formats)
+        /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/, // Email
         /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, // IP address
     ];
     return !piiPatterns.some((pattern) => pattern.test(text));
 }
+/**
+ * Returns `true` when PII is detected in the text (unsafe), `false` when safe.
+ * This is the semantic inverse of `notContainsPII` and may be easier to reason about.
+ *
+ * @example
+ * if (hasPII(response)) throw new Error("PII leak");
+ */
+function hasPII(text) {
+    return !notContainsPII(text);
+}
 function hasSentiment(text, expected) {
     // This is a simplified implementation
     const positiveWords = ["good", "great", "excellent", "awesome"];

package/dist/cli/baseline.js CHANGED Viewed

@@ -126,7 +126,6 @@ function runBaselineInit(cwd) {
 }
 // ── baseline update ──
 function runBaselineUpdate(cwd) {
-    // Check if eval:baseline-update script exists in package.json
     const pkgPath = path.join(cwd, "package.json");
     if (!fs.existsSync(pkgPath)) {
         console.error("❌ No package.json found. Run this from your project root.");
@@ -140,13 +139,39 @@ function runBaselineUpdate(cwd) {
         console.error("❌ Failed to parse package.json");
         return 1;
     }
-    if (!pkg.scripts?.["eval:baseline-update"]) {
-        console.error("❌ Missing 'eval:baseline-update' script in package.json.");
-        console.error('   Add it:  "eval:baseline-update": "npx tsx scripts/regression-gate.ts --update-baseline"');
+    // Use custom script if available
+    if (pkg.scripts?.["eval:baseline-update"]) {
+        console.log("📊 Running baseline update (custom script)...\n");
+        return runScript(cwd, "eval:baseline-update");
+    }
+    // Self-contained built-in mode: run the test suite then stamp the baseline
+    console.log("📊 Running baseline update (built-in mode)...\n");
+    const pm = detectPackageManager(cwd);
+    const isWin = process.platform === "win32";
+    const testResult = (0, node_child_process_1.spawnSync)(pm, ["test"], {
+        cwd,
+        stdio: "inherit",
+        shell: isWin,
+    });
+    const baselinePath = path.join(cwd, BASELINE_REL);
+    if (!fs.existsSync(baselinePath)) {
+        console.error("❌ No baseline found. Run 'evalgate baseline init' first.");
+        return 1;
+    }
+    try {
+        const baseline = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
+        baseline.updatedAt = new Date().toISOString();
+        baseline.updatedBy = process.env.USER || process.env.USERNAME || "unknown";
+        baseline.confidenceTests = baseline.confidenceTests ?? {};
+        baseline.confidenceTests.unitPassed = testResult.status === 0;
+        fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
+        console.log("\n✅ Baseline updated successfully");
+    }
+    catch {
+        console.error("❌ Failed to update baseline file");
         return 1;
     }
-    console.log("📊 Running baseline update...\n");
-    return runScript(cwd, "eval:baseline-update");
+    return testResult.status ?? 1;
 }
 // ── baseline router ──
 function runBaseline(argv) {

package/dist/cli/discover.js CHANGED Viewed

@@ -59,6 +59,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.discoverSpecs = discoverSpecs;
 exports.printDiscoveryResults = printDiscoveryResults;
 exports.runDiscover = runDiscover;
+const crypto = __importStar(require("node:crypto"));
 const fs = __importStar(require("node:fs/promises"));
 const path = __importStar(require("node:path"));
 const execution_mode_1 = require("../runtime/execution-mode");
@@ -284,13 +285,11 @@ function analyzeComplexity(content) {
  * Generate specification ID from file path + name + index (unique per defineEval call)
  */
 function generateSpecId(filePath, name, index) {
-    const relativePath = path.relative(process.cwd(), filePath);
-    const key = `${relativePath}:${name}:${index}`;
-    const hash = Buffer.from(key)
-        .toString("base64")
-        .replace(/[+/=]/g, "")
-        .slice(0, 8);
-    return hash;
+    const relativePath = path
+        .relative(process.cwd(), filePath)
+        .replace(/\\/g, "/");
+    const key = `${relativePath}|${name}|${index}`;
+    return crypto.createHash("sha256").update(key).digest("hex").slice(0, 16);
 }
 /**
  * Calculate discovery statistics

package/dist/cli/explain.js CHANGED Viewed

@@ -84,6 +84,8 @@ const REPORT_SEARCH_PATHS = [
     "evals/regression-report.json",
     ".evalgate/last-report.json",
     ".evalgate/last_report.json",
+    ".evalgate/last-run.json",
+    ".evalgate/runs/latest.json",
 ];
 function findReport(cwd, explicitPath) {
     if (explicitPath) {
@@ -354,13 +356,78 @@ function suggestFixes(causes) {
 }
 // ── Build explain output ──
 function buildExplainOutput(report, reportPath) {
-    // Support both CheckReport (from evalgate check) and BuiltinReport (from evalgate gate)
+    // Support RunResult (from evalgate run) — has schemaVersion + results[] + summary
+    const isRunResult = "results" in report &&
+        Array.isArray(report.results) &&
+        "summary" in report &&
+        report.summary !== null &&
+        typeof report.summary === "object";
+    if (isRunResult) {
+        return buildFromRunResult(report, reportPath);
+    }
+    // Support BuiltinReport (from evalgate gate)
     const isBuiltinReport = "category" in report && "deltas" in report;
     if (isBuiltinReport) {
         return buildFromBuiltinReport(report, reportPath);
     }
     return buildFromCheckReport(report, reportPath);
 }
+function buildFromRunResult(report, reportPath) {
+    const summary = report.summary;
+    const results = report.results ?? [];
+    const passed = summary.failed === 0;
+    // Top failures
+    const failures = results.filter((r) => r.result.status === "failed");
+    const topFailures = failures.slice(0, 3).map((r, i) => ({
+        rank: i + 1,
+        name: r.name,
+        filePath: r.filePath,
+        reason: r.result.error,
+    }));
+    // Changes: pass rate
+    const changes = [
+        {
+            metric: "Pass rate",
+            baseline: "—",
+            current: `${Math.round(summary.passRate * 100)}%`,
+            direction: passed ? "same" : "worse",
+        },
+    ];
+    // For passing runs, emit nothing so no misleading "Run doctor" suggestions appear
+    if (passed) {
+        return {
+            verdict: "pass",
+            reasonMessage: `All ${summary.passed} spec${summary.passed === 1 ? "" : "s"} passed`,
+            topFailures: [],
+            totalFailures: 0,
+            changes,
+            rootCauses: [],
+            suggestedFixes: [],
+            reportPath,
+        };
+    }
+    // Classify root cause by inspecting error messages
+    const errorText = failures
+        .map((r) => (r.result.error ?? "").toLowerCase())
+        .join(" ");
+    const rootCauses = [];
+    if (errorText.includes("pii") || errorText.includes("safety"))
+        rootCauses.push("safety_regression");
+    if (errorText.includes("tool") || errorText.includes("function_call"))
+        rootCauses.push("tool_use_drift");
+    if (rootCauses.length === 0)
+        rootCauses.push("prompt_drift");
+    return {
+        verdict: "fail",
+        reasonMessage: `${summary.failed} of ${results.length} spec${results.length === 1 ? "" : "s"} failed`,
+        topFailures,
+        totalFailures: failures.length,
+        changes,
+        rootCauses,
+        suggestedFixes: suggestFixes(rootCauses),
+        reportPath,
+    };
+}
 function buildFromCheckReport(report, reportPath) {
     const failedCases = report.failedCases ?? [];
     // Top failures (up to 3)

package/dist/cli/impact-analysis.js CHANGED Viewed

@@ -109,7 +109,18 @@ async function getChangedFiles(baseBranch) {
         });
         git.on("close", (code) => {
             if (code !== 0) {
-                reject(new Error(`Git diff failed: ${error}`));
+                const lowerError = error.toLowerCase();
+                if (lowerError.includes("not a git repository") ||
+                    lowerError.includes("fatal: not a git")) {
+                    reject(new Error("Not a git repository. Run 'git init' or run evalgate from inside a git repo."));
+                }
+                else if (lowerError.includes("unknown revision") ||
+                    lowerError.includes("bad revision")) {
+                    reject(new Error(`Base branch '${baseBranch}' not found. Fetch it first: git fetch origin ${baseBranch}`));
+                }
+                else {
+                    reject(new Error(`Git diff failed (exit ${code}). Ensure git is installed and '${baseBranch}' exists.`));
+                }
                 return;
             }
             const files = output

package/dist/cli/print-config.js CHANGED Viewed

@@ -138,7 +138,7 @@ function buildResolvedConfig(cwd, flags) {
         value: flags.baseUrl ||
             envBaseUrl ||
             fileConfig?.baseUrl ||
-            "http://localhost:3000",
+            "https://api.evalgate.com",
         source: baseUrlSource,
     });
     // apiKey (always redacted)

package/dist/index.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export { AIEvalClient } from "./client";
 import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
 export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
 NetworkError, };
-export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
+export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasPII, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
 import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
 export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
 export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";

package/dist/index.js CHANGED Viewed

@@ -8,8 +8,8 @@
  * @packageDocumentation
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
-exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = void 0;
+exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
+exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = void 0;
 // Main SDK exports
 var client_1 = require("./client");
 Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -32,6 +32,7 @@ Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: fu
 Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
 Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
 Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
+Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () { return assertions_1.hasPII; } });
 Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
 Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
 Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });

package/dist/runtime/eval.d.ts CHANGED Viewed

@@ -18,10 +18,19 @@ export declare const evalai: {
     test: DefineEvalFunction;
 };
 /**
- * Suite definition for grouping related specifications
- * This will be expanded in Layer 3 for dependency graph support
+ * Suite definition for grouping related specifications.
+ * Accepts both a positional form and an object form:
+ *
+ * @example Positional form:
+ * defineSuite('My Suite', [() => defineEval('spec 1', executor), ...])
+ *
+ * @example Object form:
+ * defineSuite({ name: 'My Suite', specs: [() => defineEval('spec 1', executor), ...] })
  */
-export declare function defineSuite(_name: string, specs: (() => void)[]): void;
+export declare function defineSuite(nameOrConfig: string | {
+    name: string;
+    specs: (() => void)[];
+}, specsArg?: (() => void)[]): void;
 /**
  * Helper function to create specification contexts
  * Useful for testing and manual execution

package/dist/runtime/eval.js CHANGED Viewed

@@ -204,13 +204,22 @@ exports.evalai = {
     test: exports.defineEval,
 };
 /**
- * Suite definition for grouping related specifications
- * This will be expanded in Layer 3 for dependency graph support
+ * Suite definition for grouping related specifications.
+ * Accepts both a positional form and an object form:
+ *
+ * @example Positional form:
+ * defineSuite('My Suite', [() => defineEval('spec 1', executor), ...])
+ *
+ * @example Object form:
+ * defineSuite({ name: 'My Suite', specs: [() => defineEval('spec 1', executor), ...] })
  */
-function defineSuite(_name, specs) {
-    // For now, just execute the specs to register them
-    // In Layer 3, this will build the dependency graph
-    for (const specFn of specs) {
+function defineSuite(nameOrConfig, specsArg) {
+    const specFns = typeof nameOrConfig === "string"
+        ? (specsArg ?? [])
+        : (nameOrConfig.specs ?? []);
+    // Execute each spec function to register its defineEval calls
+    // In Layer 3, this will also build the dependency graph
+    for (const specFn of specFns) {
         specFn();
     }
 }

package/dist/snapshot.d.ts CHANGED Viewed

@@ -9,7 +9,7 @@
  * import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
  *
  * const output = await generateText('Write a haiku about coding');
- * await snapshot(output, 'haiku-test');
+ * await snapshot('haiku-test', output);
  *
  * // Later, compare with snapshot
  * const saved = await loadSnapshot('haiku-test');
@@ -135,10 +135,10 @@ export declare class SnapshotManager {
  * @example
  * ```typescript
  * const output = await generateText('Write a haiku');
- * await snapshot(output, 'haiku-test');
+ * await snapshot('haiku-test', output);
  * ```
  */
-export declare function snapshot(output: string, name: string, options?: {
+export declare function snapshot(name: string, output: string, options?: {
     tags?: string[];
     metadata?: Record<string, unknown>;
     overwrite?: boolean;

package/dist/snapshot.js CHANGED Viewed

@@ -10,7 +10,7 @@
  * import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
  *
  * const output = await generateText('Write a haiku about coding');
- * await snapshot(output, 'haiku-test');
+ * await snapshot('haiku-test', output);
  *
  * // Later, compare with snapshot
  * const saved = await loadSnapshot('haiku-test');
@@ -271,10 +271,10 @@ function getSnapshotManager(dir) {
  * @example
  * ```typescript
  * const output = await generateText('Write a haiku');
- * await snapshot(output, 'haiku-test');
+ * await snapshot('haiku-test', output);
  * ```
  */
-async function snapshot(output, name, options) {
+async function snapshot(name, output, options) {
     const manager = getSnapshotManager(options?.dir);
     return manager.save(name, output, options);
 }

package/dist/version.d.ts CHANGED Viewed

@@ -3,5 +3,5 @@
  * X-EvalGate-SDK-Version: SDK package version
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
  */
-export declare const SDK_VERSION = "2.1.3";
-export declare const SPEC_VERSION = "2.1.3";
+export declare const SDK_VERSION = "2.2.0";
+export declare const SPEC_VERSION = "2.2.0";

package/dist/version.js CHANGED Viewed

@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
  * X-EvalGate-SDK-Version: SDK package version
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
  */
-exports.SDK_VERSION = "2.1.3";
-exports.SPEC_VERSION = "2.1.3";
+exports.SDK_VERSION = "2.2.0";
+exports.SPEC_VERSION = "2.2.0";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@evalgate/sdk",
-	"version": "2.1.3",
+	"version": "2.2.0",
 	"publishConfig": {
 		"access": "public",
 		"registry": "https://registry.npmjs.org/"