@evalgate/sdk 2.1.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,31 @@ All notable changes to the @evalgate/sdk package will be documented in this file
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [2.2.0] - 2026-03-03
9
+
10
+ ### Breaking
11
+
12
+ - **`snapshot(output, name)` → `snapshot(name, output)`** — parameter order swapped to match natural call convention (`name` first, value second, same as `test('name', fn)`). Update any existing `snapshot(output, 'label')` calls to `snapshot('label', output)`.
13
+
14
+ ### Added
15
+
16
+ - **`expect().not` modifier** — `expect('drop table').not.toContain('drop table')` now works; negates `passed` on any chained assertion via Proxy
17
+ - **`hasPII(text)`** — semantic inverse of `notContainsPII`; returns `true` when PII is detected (email, phone, SSN, IP). Exported from main package. Eliminates double-negative confusion.
18
+ - **`defineSuite` object form** — now accepts both `defineSuite(name, [...fns])` and `defineSuite({ name, specs: [...fns] })`. README updated with examples.
19
+
20
+ ### Fixed
21
+
22
+ - **`specId` collision** — all specs in `eval/` directory shared the same 8-char ID (`ZXZhbC9j`). Root cause: short base64 prefix was identical for any path starting with `eval/c`. Fixed: SHA-256 hex (16 chars) in `discover.ts`.
23
+ - **`explain` UNKNOWN verdict** — `evalgate explain` showed `Verdict: UNKNOWN` when reading `.evalgate/last-run.json`. Added `RunResult` format detection (`results[]` + `summary`). Added `.evalgate/last-run.json` and `.evalgate/runs/latest.json` to auto-search paths. Passing runs now show clean `✅ PASS` with no spurious "Run doctor" suggestions.
24
+ - **`print-config` baseUrl default** — was `http://localhost:3000`; now `https://api.evalgate.com` to match `evalgate doctor`.
25
+ - **`baseline update` self-contained** — no longer requires a custom `eval:baseline-update` npm script. Falls back to built-in mode (runs `pm test`, stamps baseline) if no script is present.
26
+ - **`notContainsPII` phone regex** — broadened to cover `555-123-4567`, `555.123.4567`, and `555 123 4567` formats. JSDoc clarified: `false` = PII found (unsafe), `true` = no PII (safe).
27
+ - **`impact-analysis` git error** — replaced raw `git diff --help` wall-of-text with clean targeted messages: `Not a git repository`, `Base branch 'X' not found. Fetch it first`, or generic exit-code message.
28
+ - **README quickstart** — both `defineEval` examples now include an `executor` function. Running the quickstart no longer throws `Executor must be a function`.
29
+ - **`snapshot` module docstring** — updated `@example` to reflect new `(name, output)` parameter order.
30
+
31
+ ---
32
+
8
33
  ## [2.1.3] - 2026-03-02
9
34
 
10
35
  ### Fixed
package/README.md CHANGED
@@ -40,13 +40,20 @@ Create `eval/your-spec.spec.ts`:
40
40
  ```typescript
41
41
  import { defineEval } from "@evalgate/sdk";
42
42
 
43
+ defineEval("Basic Math Operations", async () => {
44
+ const result = 1 + 1;
45
+ return { pass: result === 2, score: result === 2 ? 100 : 0 };
46
+ });
47
+
48
+ // Object form (with metadata):
43
49
  defineEval({
44
- name: "Basic Math Operations",
45
- description: "Test fundamental arithmetic",
46
- prompt: "Test: 1+1=2, string concatenation, array includes",
47
- expected: "All tests should pass",
50
+ name: "String concatenation",
51
+ description: "Test string operations",
48
52
  tags: ["basic", "math"],
49
- category: "unit-test"
53
+ executor: async () => {
54
+ const result = "hello" + " world";
55
+ return { pass: result === "hello world", score: 100 };
56
+ },
50
57
  });
51
58
  ```
52
59
 
@@ -259,14 +266,27 @@ Create `eval/your-spec.spec.ts`:
259
266
  ```typescript
260
267
  import { defineEval } from "@evalgate/sdk";
261
268
 
269
+ defineEval("Basic Math Operations", async () => {
270
+ const result = 1 + 1;
271
+ return { pass: result === 2, score: result === 2 ? 100 : 0 };
272
+ });
273
+
274
+ // Object form (with metadata):
262
275
  defineEval({
263
- name: "Basic Math Operations",
264
- description: "Test fundamental arithmetic",
265
- prompt: "Test: 1+1=2, string concatenation, array includes",
266
- expected: "All tests should pass",
276
+ name: "String concatenation",
277
+ description: "Test string operations",
267
278
  tags: ["basic", "math"],
268
- category: "unit-test"
279
+ executor: async () => {
280
+ const result = "hello" + " world";
281
+ return { pass: result === "hello world", score: 100 };
282
+ },
269
283
  });
284
+
285
+ // Suite form — group related specs:
286
+ defineSuite("Math suite", [
287
+ () => defineEval("addition", async () => ({ pass: 1 + 1 === 2, score: 100 })),
288
+ () => defineEval("subtraction", async () => ({ pass: 5 - 3 === 2, score: 100 })),
289
+ ]);
270
290
  ```
271
291
 
272
292
  ```bash
@@ -32,6 +32,11 @@ export declare class AssertionError extends Error {
32
32
  export declare class Expectation {
33
33
  private value;
34
34
  constructor(value: unknown);
35
+ /**
36
+ * Negate the next assertion — inverts `passed` on any chained method.
37
+ * @example expect('drop table').not.toContain('drop table')
38
+ */
39
+ get not(): Expectation;
35
40
  /**
36
41
  * Assert value equals expected
37
42
  * @example expect(output).toEqual("Hello")
@@ -171,7 +176,23 @@ export declare function hasLength(text: string, range: {
171
176
  max?: number;
172
177
  }): boolean;
173
178
  export declare function containsJSON(text: string): boolean;
179
+ /**
180
+ * Returns `true` when the text is PII-free (safe to use), `false` when PII is detected.
181
+ *
182
+ * @example
183
+ * if (!notContainsPII(response)) throw new Error("PII leak detected");
184
+ * // Or use the clearer alias:
185
+ * if (hasPII(response)) throw new Error("PII leak detected");
186
+ */
174
187
  export declare function notContainsPII(text: string): boolean;
188
+ /**
189
+ * Returns `true` when PII is detected in the text (unsafe), `false` when safe.
190
+ * This is the semantic inverse of `notContainsPII` and may be easier to reason about.
191
+ *
192
+ * @example
193
+ * if (hasPII(response)) throw new Error("PII leak");
194
+ */
195
+ export declare function hasPII(text: string): boolean;
175
196
  export declare function hasSentiment(text: string, expected: "positive" | "negative" | "neutral"): boolean;
176
197
  export declare function similarTo(text1: string, text2: string, threshold?: number): boolean;
177
198
  export declare function withinRange(value: number, min: number, max: number): boolean;
@@ -24,6 +24,7 @@ exports.matchesPattern = matchesPattern;
24
24
  exports.hasLength = hasLength;
25
25
  exports.containsJSON = containsJSON;
26
26
  exports.notContainsPII = notContainsPII;
27
+ exports.hasPII = hasPII;
27
28
  exports.hasSentiment = hasSentiment;
28
29
  exports.similarTo = similarTo;
29
30
  exports.withinRange = withinRange;
@@ -56,6 +57,28 @@ class Expectation {
56
57
  constructor(value) {
57
58
  this.value = value;
58
59
  }
60
+ /**
61
+ * Negate the next assertion — inverts `passed` on any chained method.
62
+ * @example expect('drop table').not.toContain('drop table')
63
+ */
64
+ get not() {
65
+ const value = this.value;
66
+ return new Proxy(new Expectation(value), {
67
+ get(target, prop) {
68
+ const orig = target[prop];
69
+ if (typeof orig === "function" && prop !== "constructor") {
70
+ return (...args) => {
71
+ const result = orig.call(target, ...args);
72
+ if (result && typeof result === "object" && "passed" in result) {
73
+ return { ...result, passed: !result.passed };
74
+ }
75
+ return result;
76
+ };
77
+ }
78
+ return orig;
79
+ },
80
+ });
81
+ }
59
82
  /**
60
83
  * Assert value equals expected
61
84
  * @example expect(output).toEqual("Hello")
@@ -539,17 +562,35 @@ function containsJSON(text) {
539
562
  return false;
540
563
  }
541
564
  }
565
+ /**
566
+ * Returns `true` when the text is PII-free (safe to use), `false` when PII is detected.
567
+ *
568
+ * @example
569
+ * if (!notContainsPII(response)) throw new Error("PII leak detected");
570
+ * // Or use the clearer alias:
571
+ * if (hasPII(response)) throw new Error("PII leak detected");
572
+ */
542
573
  function notContainsPII(text) {
543
574
  // Simple PII detection patterns
544
575
  const piiPatterns = [
545
576
  /\b\d{3}-\d{2}-\d{4}\b/, // SSN
546
577
  /\b\d{3}\.\d{3}\.\d{4}\b/, // SSN with dots
547
- /\b\d{10}\b/, // Phone number
548
- /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, // Email
578
+ /\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b/, // Phone (various formats)
579
+ /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/, // Email
549
580
  /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/, // IP address
550
581
  ];
551
582
  return !piiPatterns.some((pattern) => pattern.test(text));
552
583
  }
584
+ /**
585
+ * Returns `true` when PII is detected in the text (unsafe), `false` when safe.
586
+ * This is the semantic inverse of `notContainsPII` and may be easier to reason about.
587
+ *
588
+ * @example
589
+ * if (hasPII(response)) throw new Error("PII leak");
590
+ */
591
+ function hasPII(text) {
592
+ return !notContainsPII(text);
593
+ }
553
594
  function hasSentiment(text, expected) {
554
595
  // This is a simplified implementation
555
596
  const positiveWords = ["good", "great", "excellent", "awesome"];
@@ -126,7 +126,6 @@ function runBaselineInit(cwd) {
126
126
  }
127
127
  // ── baseline update ──
128
128
  function runBaselineUpdate(cwd) {
129
- // Check if eval:baseline-update script exists in package.json
130
129
  const pkgPath = path.join(cwd, "package.json");
131
130
  if (!fs.existsSync(pkgPath)) {
132
131
  console.error("❌ No package.json found. Run this from your project root.");
@@ -140,13 +139,39 @@ function runBaselineUpdate(cwd) {
140
139
  console.error("❌ Failed to parse package.json");
141
140
  return 1;
142
141
  }
143
- if (!pkg.scripts?.["eval:baseline-update"]) {
144
- console.error("❌ Missing 'eval:baseline-update' script in package.json.");
145
- console.error(' Add it: "eval:baseline-update": "npx tsx scripts/regression-gate.ts --update-baseline"');
142
+ // Use custom script if available
143
+ if (pkg.scripts?.["eval:baseline-update"]) {
144
+ console.log("📊 Running baseline update (custom script)...\n");
145
+ return runScript(cwd, "eval:baseline-update");
146
+ }
147
+ // Self-contained built-in mode: run the test suite then stamp the baseline
148
+ console.log("📊 Running baseline update (built-in mode)...\n");
149
+ const pm = detectPackageManager(cwd);
150
+ const isWin = process.platform === "win32";
151
+ const testResult = (0, node_child_process_1.spawnSync)(pm, ["test"], {
152
+ cwd,
153
+ stdio: "inherit",
154
+ shell: isWin,
155
+ });
156
+ const baselinePath = path.join(cwd, BASELINE_REL);
157
+ if (!fs.existsSync(baselinePath)) {
158
+ console.error("❌ No baseline found. Run 'evalgate baseline init' first.");
159
+ return 1;
160
+ }
161
+ try {
162
+ const baseline = JSON.parse(fs.readFileSync(baselinePath, "utf-8"));
163
+ baseline.updatedAt = new Date().toISOString();
164
+ baseline.updatedBy = process.env.USER || process.env.USERNAME || "unknown";
165
+ baseline.confidenceTests = baseline.confidenceTests ?? {};
166
+ baseline.confidenceTests.unitPassed = testResult.status === 0;
167
+ fs.writeFileSync(baselinePath, `${JSON.stringify(baseline, null, 2)}\n`);
168
+ console.log("\n✅ Baseline updated successfully");
169
+ }
170
+ catch {
171
+ console.error("❌ Failed to update baseline file");
146
172
  return 1;
147
173
  }
148
- console.log("📊 Running baseline update...\n");
149
- return runScript(cwd, "eval:baseline-update");
174
+ return testResult.status ?? 1;
150
175
  }
151
176
  // ── baseline router ──
152
177
  function runBaseline(argv) {
@@ -59,6 +59,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
59
59
  exports.discoverSpecs = discoverSpecs;
60
60
  exports.printDiscoveryResults = printDiscoveryResults;
61
61
  exports.runDiscover = runDiscover;
62
+ const crypto = __importStar(require("node:crypto"));
62
63
  const fs = __importStar(require("node:fs/promises"));
63
64
  const path = __importStar(require("node:path"));
64
65
  const execution_mode_1 = require("../runtime/execution-mode");
@@ -284,13 +285,11 @@ function analyzeComplexity(content) {
284
285
  * Generate specification ID from file path + name + index (unique per defineEval call)
285
286
  */
286
287
  function generateSpecId(filePath, name, index) {
287
- const relativePath = path.relative(process.cwd(), filePath);
288
- const key = `${relativePath}:${name}:${index}`;
289
- const hash = Buffer.from(key)
290
- .toString("base64")
291
- .replace(/[+/=]/g, "")
292
- .slice(0, 8);
293
- return hash;
288
+ const relativePath = path
289
+ .relative(process.cwd(), filePath)
290
+ .replace(/\\/g, "/");
291
+ const key = `${relativePath}|${name}|${index}`;
292
+ return crypto.createHash("sha256").update(key).digest("hex").slice(0, 16);
294
293
  }
295
294
  /**
296
295
  * Calculate discovery statistics
@@ -84,6 +84,8 @@ const REPORT_SEARCH_PATHS = [
84
84
  "evals/regression-report.json",
85
85
  ".evalgate/last-report.json",
86
86
  ".evalgate/last_report.json",
87
+ ".evalgate/last-run.json",
88
+ ".evalgate/runs/latest.json",
87
89
  ];
88
90
  function findReport(cwd, explicitPath) {
89
91
  if (explicitPath) {
@@ -354,13 +356,78 @@ function suggestFixes(causes) {
354
356
  }
355
357
  // ── Build explain output ──
356
358
  function buildExplainOutput(report, reportPath) {
357
- // Support both CheckReport (from evalgate check) and BuiltinReport (from evalgate gate)
359
+ // Support RunResult (from evalgate run) has schemaVersion + results[] + summary
360
+ const isRunResult = "results" in report &&
361
+ Array.isArray(report.results) &&
362
+ "summary" in report &&
363
+ report.summary !== null &&
364
+ typeof report.summary === "object";
365
+ if (isRunResult) {
366
+ return buildFromRunResult(report, reportPath);
367
+ }
368
+ // Support BuiltinReport (from evalgate gate)
358
369
  const isBuiltinReport = "category" in report && "deltas" in report;
359
370
  if (isBuiltinReport) {
360
371
  return buildFromBuiltinReport(report, reportPath);
361
372
  }
362
373
  return buildFromCheckReport(report, reportPath);
363
374
  }
375
+ function buildFromRunResult(report, reportPath) {
376
+ const summary = report.summary;
377
+ const results = report.results ?? [];
378
+ const passed = summary.failed === 0;
379
+ // Top failures
380
+ const failures = results.filter((r) => r.result.status === "failed");
381
+ const topFailures = failures.slice(0, 3).map((r, i) => ({
382
+ rank: i + 1,
383
+ name: r.name,
384
+ filePath: r.filePath,
385
+ reason: r.result.error,
386
+ }));
387
+ // Changes: pass rate
388
+ const changes = [
389
+ {
390
+ metric: "Pass rate",
391
+ baseline: "—",
392
+ current: `${Math.round(summary.passRate * 100)}%`,
393
+ direction: passed ? "same" : "worse",
394
+ },
395
+ ];
396
+ // For passing runs, emit nothing so no misleading "Run doctor" suggestions appear
397
+ if (passed) {
398
+ return {
399
+ verdict: "pass",
400
+ reasonMessage: `All ${summary.passed} spec${summary.passed === 1 ? "" : "s"} passed`,
401
+ topFailures: [],
402
+ totalFailures: 0,
403
+ changes,
404
+ rootCauses: [],
405
+ suggestedFixes: [],
406
+ reportPath,
407
+ };
408
+ }
409
+ // Classify root cause by inspecting error messages
410
+ const errorText = failures
411
+ .map((r) => (r.result.error ?? "").toLowerCase())
412
+ .join(" ");
413
+ const rootCauses = [];
414
+ if (errorText.includes("pii") || errorText.includes("safety"))
415
+ rootCauses.push("safety_regression");
416
+ if (errorText.includes("tool") || errorText.includes("function_call"))
417
+ rootCauses.push("tool_use_drift");
418
+ if (rootCauses.length === 0)
419
+ rootCauses.push("prompt_drift");
420
+ return {
421
+ verdict: "fail",
422
+ reasonMessage: `${summary.failed} of ${results.length} spec${results.length === 1 ? "" : "s"} failed`,
423
+ topFailures,
424
+ totalFailures: failures.length,
425
+ changes,
426
+ rootCauses,
427
+ suggestedFixes: suggestFixes(rootCauses),
428
+ reportPath,
429
+ };
430
+ }
364
431
  function buildFromCheckReport(report, reportPath) {
365
432
  const failedCases = report.failedCases ?? [];
366
433
  // Top failures (up to 3)
@@ -109,7 +109,18 @@ async function getChangedFiles(baseBranch) {
109
109
  });
110
110
  git.on("close", (code) => {
111
111
  if (code !== 0) {
112
- reject(new Error(`Git diff failed: ${error}`));
112
+ const lowerError = error.toLowerCase();
113
+ if (lowerError.includes("not a git repository") ||
114
+ lowerError.includes("fatal: not a git")) {
115
+ reject(new Error("Not a git repository. Run 'git init' or run evalgate from inside a git repo."));
116
+ }
117
+ else if (lowerError.includes("unknown revision") ||
118
+ lowerError.includes("bad revision")) {
119
+ reject(new Error(`Base branch '${baseBranch}' not found. Fetch it first: git fetch origin ${baseBranch}`));
120
+ }
121
+ else {
122
+ reject(new Error(`Git diff failed (exit ${code}). Ensure git is installed and '${baseBranch}' exists.`));
123
+ }
113
124
  return;
114
125
  }
115
126
  const files = output
@@ -138,7 +138,7 @@ function buildResolvedConfig(cwd, flags) {
138
138
  value: flags.baseUrl ||
139
139
  envBaseUrl ||
140
140
  fileConfig?.baseUrl ||
141
- "http://localhost:3000",
141
+ "https://api.evalgate.com",
142
142
  source: baseUrlSource,
143
143
  });
144
144
  // apiKey (always redacted)
package/dist/index.d.ts CHANGED
@@ -10,7 +10,7 @@ export { AIEvalClient } from "./client";
10
10
  import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, SDKError } from "./errors";
11
11
  export { EvalGateError, RateLimitError, AuthenticationError, SDKError as ValidationError, // Using SDKError as ValidationError for backward compatibility
12
12
  NetworkError, };
13
- export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
13
+ export { containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, expect, followsInstructions, hasFactualAccuracy, hasLength, hasNoHallucinations, hasNoToxicity, hasPII, hasReadabilityScore, hasSentiment, hasValidCodeSyntax, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinTime, similarTo, withinRange, } from "./assertions";
14
14
  import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
15
15
  export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
16
16
  export { cloneContext, mergeContexts, validateContext, } from "./runtime/context";
package/dist/index.js CHANGED
@@ -8,8 +8,8 @@
8
8
  * @packageDocumentation
9
9
  */
10
10
  Object.defineProperty(exports, "__esModule", { value: true });
11
- exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
- exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = void 0;
11
+ exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.withinRange = exports.similarTo = exports.respondedWithinTime = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntax = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicity = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracy = exports.followsInstructions = exports.expect = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
12
+ exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginate = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.RequestCache = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.compareSnapshots = exports.saveSnapshot = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = void 0;
13
13
  // Main SDK exports
14
14
  var client_1 = require("./client");
15
15
  Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
@@ -32,6 +32,7 @@ Object.defineProperty(exports, "hasFactualAccuracy", { enumerable: true, get: fu
32
32
  Object.defineProperty(exports, "hasLength", { enumerable: true, get: function () { return assertions_1.hasLength; } });
33
33
  Object.defineProperty(exports, "hasNoHallucinations", { enumerable: true, get: function () { return assertions_1.hasNoHallucinations; } });
34
34
  Object.defineProperty(exports, "hasNoToxicity", { enumerable: true, get: function () { return assertions_1.hasNoToxicity; } });
35
+ Object.defineProperty(exports, "hasPII", { enumerable: true, get: function () { return assertions_1.hasPII; } });
35
36
  Object.defineProperty(exports, "hasReadabilityScore", { enumerable: true, get: function () { return assertions_1.hasReadabilityScore; } });
36
37
  Object.defineProperty(exports, "hasSentiment", { enumerable: true, get: function () { return assertions_1.hasSentiment; } });
37
38
  Object.defineProperty(exports, "hasValidCodeSyntax", { enumerable: true, get: function () { return assertions_1.hasValidCodeSyntax; } });
@@ -18,10 +18,19 @@ export declare const evalai: {
18
18
  test: DefineEvalFunction;
19
19
  };
20
20
  /**
21
- * Suite definition for grouping related specifications
22
- * This will be expanded in Layer 3 for dependency graph support
21
+ * Suite definition for grouping related specifications.
22
+ * Accepts both a positional form and an object form:
23
+ *
24
+ * @example Positional form:
25
+ * defineSuite('My Suite', [() => defineEval('spec 1', executor), ...])
26
+ *
27
+ * @example Object form:
28
+ * defineSuite({ name: 'My Suite', specs: [() => defineEval('spec 1', executor), ...] })
23
29
  */
24
- export declare function defineSuite(_name: string, specs: (() => void)[]): void;
30
+ export declare function defineSuite(nameOrConfig: string | {
31
+ name: string;
32
+ specs: (() => void)[];
33
+ }, specsArg?: (() => void)[]): void;
25
34
  /**
26
35
  * Helper function to create specification contexts
27
36
  * Useful for testing and manual execution
@@ -204,13 +204,22 @@ exports.evalai = {
204
204
  test: exports.defineEval,
205
205
  };
206
206
  /**
207
- * Suite definition for grouping related specifications
208
- * This will be expanded in Layer 3 for dependency graph support
207
+ * Suite definition for grouping related specifications.
208
+ * Accepts both a positional form and an object form:
209
+ *
210
+ * @example Positional form:
211
+ * defineSuite('My Suite', [() => defineEval('spec 1', executor), ...])
212
+ *
213
+ * @example Object form:
214
+ * defineSuite({ name: 'My Suite', specs: [() => defineEval('spec 1', executor), ...] })
209
215
  */
210
- function defineSuite(_name, specs) {
211
- // For now, just execute the specs to register them
212
- // In Layer 3, this will build the dependency graph
213
- for (const specFn of specs) {
216
+ function defineSuite(nameOrConfig, specsArg) {
217
+ const specFns = typeof nameOrConfig === "string"
218
+ ? (specsArg ?? [])
219
+ : (nameOrConfig.specs ?? []);
220
+ // Execute each spec function to register its defineEval calls
221
+ // In Layer 3, this will also build the dependency graph
222
+ for (const specFn of specFns) {
214
223
  specFn();
215
224
  }
216
225
  }
@@ -9,7 +9,7 @@
9
9
  * import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
10
10
  *
11
11
  * const output = await generateText('Write a haiku about coding');
12
- * await snapshot(output, 'haiku-test');
12
+ * await snapshot('haiku-test', output);
13
13
  *
14
14
  * // Later, compare with snapshot
15
15
  * const saved = await loadSnapshot('haiku-test');
@@ -135,10 +135,10 @@ export declare class SnapshotManager {
135
135
  * @example
136
136
  * ```typescript
137
137
  * const output = await generateText('Write a haiku');
138
- * await snapshot(output, 'haiku-test');
138
+ * await snapshot('haiku-test', output);
139
139
  * ```
140
140
  */
141
- export declare function snapshot(output: string, name: string, options?: {
141
+ export declare function snapshot(name: string, output: string, options?: {
142
142
  tags?: string[];
143
143
  metadata?: Record<string, unknown>;
144
144
  overwrite?: boolean;
package/dist/snapshot.js CHANGED
@@ -10,7 +10,7 @@
10
10
  * import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
11
11
  *
12
12
  * const output = await generateText('Write a haiku about coding');
13
- * await snapshot(output, 'haiku-test');
13
+ * await snapshot('haiku-test', output);
14
14
  *
15
15
  * // Later, compare with snapshot
16
16
  * const saved = await loadSnapshot('haiku-test');
@@ -271,10 +271,10 @@ function getSnapshotManager(dir) {
271
271
  * @example
272
272
  * ```typescript
273
273
  * const output = await generateText('Write a haiku');
274
- * await snapshot(output, 'haiku-test');
274
+ * await snapshot('haiku-test', output);
275
275
  * ```
276
276
  */
277
- async function snapshot(output, name, options) {
277
+ async function snapshot(name, output, options) {
278
278
  const manager = getSnapshotManager(options?.dir);
279
279
  return manager.save(name, output, options);
280
280
  }
package/dist/version.d.ts CHANGED
@@ -3,5 +3,5 @@
3
3
  * X-EvalGate-SDK-Version: SDK package version
4
4
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
5
5
  */
6
- export declare const SDK_VERSION = "2.1.3";
7
- export declare const SPEC_VERSION = "2.1.3";
6
+ export declare const SDK_VERSION = "2.2.0";
7
+ export declare const SPEC_VERSION = "2.2.0";
package/dist/version.js CHANGED
@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
6
6
  * X-EvalGate-SDK-Version: SDK package version
7
7
  * X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
8
8
  */
9
- exports.SDK_VERSION = "2.1.3";
10
- exports.SPEC_VERSION = "2.1.3";
9
+ exports.SDK_VERSION = "2.2.0";
10
+ exports.SPEC_VERSION = "2.2.0";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@evalgate/sdk",
3
- "version": "2.1.3",
3
+ "version": "2.2.0",
4
4
  "publishConfig": {
5
5
  "access": "public",
6
6
  "registry": "https://registry.npmjs.org/"