@evalgate/sdk 2.2.4 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +1 -1
- package/dist/assertions.d.ts +8 -8
- package/dist/assertions.js +28 -9
- package/dist/cli/index.js +0 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +7 -3
- package/dist/otel.js +1 -1
- package/dist/version.d.ts +2 -2
- package/dist/version.js +2 -2
- package/package.json +124 -124
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,32 @@ All notable changes to the @evalgate/sdk package will be documented in this file
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [2.3.0] - 2026-03-04
|
|
9
|
+
|
|
10
|
+
### Breaking
|
|
11
|
+
|
|
12
|
+
- **`hasConsistency` / `hasConsistencyAsync` return `{ score, passed }` instead of `{ score, consistent }`** — aligns with every other assertion in the SDK that returns a `passed` field. If you were destructuring `consistent`, rename it to `passed`:
|
|
13
|
+
```ts
|
|
14
|
+
// Before:
|
|
15
|
+
const { score, consistent } = hasConsistency(outputs);
|
|
16
|
+
// After:
|
|
17
|
+
const { score, passed } = hasConsistency(outputs);
|
|
18
|
+
```
|
|
19
|
+
- **`respondedWithinDuration` / `respondedWithinTimeSince` return `AssertionResult` instead of `boolean`** — these now return `{ name, passed, expected, actual, message }` like all other assertions, enabling uniform pipeline usage and failure messages. The deprecated `respondedWithinTime` alias also returns `AssertionResult`.
|
|
20
|
+
```ts
|
|
21
|
+
// Before:
|
|
22
|
+
const ok = respondedWithinDuration(250, 500); // boolean
|
|
23
|
+
// After:
|
|
24
|
+
const { passed } = respondedWithinDuration(250, 500); // AssertionResult
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Added
|
|
28
|
+
|
|
29
|
+
- **`computeBaselineChecksum` / `verifyBaselineChecksum` in main barrel** — previously only reachable via `@evalgate/sdk/cli/baseline` subpath. Now importable directly from `@evalgate/sdk`.
|
|
30
|
+
- **`resetSentimentDeprecationWarning` in main barrel** — the one-time deprecation reset utility for `hasSentimentAsync` is now importable from the main entry point, making it easier to test deprecation behavior. `SentimentAsyncResult` type was already exported.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
8
34
|
## [2.2.3] - 2026-03-03
|
|
9
35
|
|
|
10
36
|
### Breaking
|
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@evalgate/sdk)
|
|
4
4
|
[](https://www.npmjs.com/package/@evalgate/sdk)
|
|
5
5
|
[](https://www.typescriptlang.org/)
|
|
6
|
-
[](#)
|
|
7
7
|
[](#)
|
|
8
8
|
[](https://opensource.org/licenses/MIT)
|
|
9
9
|
|
package/dist/assertions.d.ts
CHANGED
|
@@ -238,21 +238,21 @@ export declare function similarTo(text1: string, text2: string, threshold?: numb
|
|
|
238
238
|
*
|
|
239
239
|
* @param outputs - Array of LLM outputs to compare (minimum 2)
|
|
240
240
|
* @param threshold - Optional minimum consistency score to return true (default 0.7)
|
|
241
|
-
* @returns `{ score,
|
|
241
|
+
* @returns `{ score, passed }` where `passed` is `score >= threshold`
|
|
242
242
|
*
|
|
243
243
|
* @example
|
|
244
244
|
* ```ts
|
|
245
|
-
* const { score,
|
|
245
|
+
* const { score, passed } = hasConsistency([
|
|
246
246
|
* "The capital of France is Paris.",
|
|
247
247
|
* "Paris is the capital of France.",
|
|
248
248
|
* "France's capital city is Paris.",
|
|
249
249
|
* ]);
|
|
250
|
-
* // score ≈ 0.6-0.8,
|
|
250
|
+
* // score ≈ 0.6-0.8, passed = true at default threshold
|
|
251
251
|
* ```
|
|
252
252
|
*/
|
|
253
253
|
export declare function hasConsistency(outputs: string[], threshold?: number): {
|
|
254
254
|
score: number;
|
|
255
|
-
|
|
255
|
+
passed: boolean;
|
|
256
256
|
};
|
|
257
257
|
/**
|
|
258
258
|
* LLM-backed consistency check. **Slow and accurate** — asks the LLM to
|
|
@@ -263,7 +263,7 @@ export declare function hasConsistency(outputs: string[], threshold?: number): {
|
|
|
263
263
|
*/
|
|
264
264
|
export declare function hasConsistencyAsync(outputs: string[], config?: AssertionLLMConfig): Promise<{
|
|
265
265
|
score: number;
|
|
266
|
-
|
|
266
|
+
passed: boolean;
|
|
267
267
|
}>;
|
|
268
268
|
export declare function withinRange(value: number, min: number, max: number): boolean;
|
|
269
269
|
export declare function isValidEmail(email: string): boolean;
|
|
@@ -298,19 +298,19 @@ export declare function hasFactualAccuracy(text: string, facts: string[]): boole
|
|
|
298
298
|
* @param durationMs - The actual elapsed time in milliseconds
|
|
299
299
|
* @param maxMs - Maximum allowed duration in milliseconds
|
|
300
300
|
*/
|
|
301
|
-
export declare function respondedWithinDuration(durationMs: number, maxMs: number):
|
|
301
|
+
export declare function respondedWithinDuration(durationMs: number, maxMs: number): AssertionResult;
|
|
302
302
|
/**
|
|
303
303
|
* Check if elapsed time since a start timestamp is within the allowed limit.
|
|
304
304
|
* @param startTime - Timestamp from Date.now() captured before the operation
|
|
305
305
|
* @param maxMs - Maximum allowed duration in milliseconds
|
|
306
306
|
*/
|
|
307
|
-
export declare function respondedWithinTimeSince(startTime: number, maxMs: number):
|
|
307
|
+
export declare function respondedWithinTimeSince(startTime: number, maxMs: number): AssertionResult;
|
|
308
308
|
/**
|
|
309
309
|
* @deprecated Use {@link respondedWithinDuration} (takes measured duration)
|
|
310
310
|
* or {@link respondedWithinTimeSince} (takes start timestamp) instead.
|
|
311
311
|
* This function takes a start timestamp, not a duration — the name is misleading.
|
|
312
312
|
*/
|
|
313
|
-
export declare function respondedWithinTime(startTime: number, maxMs: number):
|
|
313
|
+
export declare function respondedWithinTime(startTime: number, maxMs: number): AssertionResult;
|
|
314
314
|
/**
|
|
315
315
|
* Blocklist-based toxicity check (~80 terms across 9 categories).
|
|
316
316
|
* **Fast and approximate** — catches explicit harmful language but has
|
package/dist/assertions.js
CHANGED
|
@@ -884,24 +884,24 @@ function meanPairwiseJaccard(texts) {
|
|
|
884
884
|
*
|
|
885
885
|
* @param outputs - Array of LLM outputs to compare (minimum 2)
|
|
886
886
|
* @param threshold - Optional minimum consistency score to return true (default 0.7)
|
|
887
|
-
* @returns `{ score,
|
|
887
|
+
* @returns `{ score, passed }` where `passed` is `score >= threshold`
|
|
888
888
|
*
|
|
889
889
|
* @example
|
|
890
890
|
* ```ts
|
|
891
|
-
* const { score,
|
|
891
|
+
* const { score, passed } = hasConsistency([
|
|
892
892
|
* "The capital of France is Paris.",
|
|
893
893
|
* "Paris is the capital of France.",
|
|
894
894
|
* "France's capital city is Paris.",
|
|
895
895
|
* ]);
|
|
896
|
-
* // score ≈ 0.6-0.8,
|
|
896
|
+
* // score ≈ 0.6-0.8, passed = true at default threshold
|
|
897
897
|
* ```
|
|
898
898
|
*/
|
|
899
899
|
function hasConsistency(outputs, threshold = 0.7) {
|
|
900
900
|
if (outputs.length < 2) {
|
|
901
|
-
return { score: 1,
|
|
901
|
+
return { score: 1, passed: true };
|
|
902
902
|
}
|
|
903
903
|
const score = meanPairwiseJaccard(outputs);
|
|
904
|
-
return { score,
|
|
904
|
+
return { score, passed: score >= threshold };
|
|
905
905
|
}
|
|
906
906
|
/**
|
|
907
907
|
* LLM-backed consistency check. **Slow and accurate** — asks the LLM to
|
|
@@ -912,7 +912,7 @@ function hasConsistency(outputs, threshold = 0.7) {
|
|
|
912
912
|
*/
|
|
913
913
|
async function hasConsistencyAsync(outputs, config) {
|
|
914
914
|
if (outputs.length < 2) {
|
|
915
|
-
return { score: 1,
|
|
915
|
+
return { score: 1, passed: true };
|
|
916
916
|
}
|
|
917
917
|
const numbered = outputs.map((o, i) => `Output ${i + 1}: "${o}"`).join("\n");
|
|
918
918
|
const prompt = `Rate the semantic consistency of the following ${outputs.length} outputs on a scale from 0 to 100, where 100 means they all convey exactly the same meaning and 0 means they completely contradict each other. Reply with ONLY a number.\n\n${numbered}`;
|
|
@@ -921,7 +921,7 @@ async function hasConsistencyAsync(outputs, config) {
|
|
|
921
921
|
const score = Number.isNaN(parsed)
|
|
922
922
|
? 0
|
|
923
923
|
: Math.min(100, Math.max(0, parsed)) / 100;
|
|
924
|
-
return { score,
|
|
924
|
+
return { score, passed: score >= 0.7 };
|
|
925
925
|
}
|
|
926
926
|
function withinRange(value, min, max) {
|
|
927
927
|
return value >= min && value <= max;
|
|
@@ -1218,7 +1218,16 @@ function hasFactualAccuracy(text, facts) {
|
|
|
1218
1218
|
* @param maxMs - Maximum allowed duration in milliseconds
|
|
1219
1219
|
*/
|
|
1220
1220
|
function respondedWithinDuration(durationMs, maxMs) {
|
|
1221
|
-
|
|
1221
|
+
const passed = durationMs <= maxMs;
|
|
1222
|
+
return {
|
|
1223
|
+
name: "respondedWithinDuration",
|
|
1224
|
+
passed,
|
|
1225
|
+
expected: `<= ${maxMs}ms`,
|
|
1226
|
+
actual: `${durationMs}ms`,
|
|
1227
|
+
message: passed
|
|
1228
|
+
? `Response time ${durationMs}ms is within ${maxMs}ms limit`
|
|
1229
|
+
: `Response time ${durationMs}ms exceeded ${maxMs}ms limit`,
|
|
1230
|
+
};
|
|
1222
1231
|
}
|
|
1223
1232
|
/**
|
|
1224
1233
|
* Check if elapsed time since a start timestamp is within the allowed limit.
|
|
@@ -1226,7 +1235,17 @@ function respondedWithinDuration(durationMs, maxMs) {
|
|
|
1226
1235
|
* @param maxMs - Maximum allowed duration in milliseconds
|
|
1227
1236
|
*/
|
|
1228
1237
|
function respondedWithinTimeSince(startTime, maxMs) {
|
|
1229
|
-
|
|
1238
|
+
const elapsed = Date.now() - startTime;
|
|
1239
|
+
const passed = elapsed <= maxMs;
|
|
1240
|
+
return {
|
|
1241
|
+
name: "respondedWithinTimeSince",
|
|
1242
|
+
passed,
|
|
1243
|
+
expected: `<= ${maxMs}ms`,
|
|
1244
|
+
actual: `${elapsed}ms`,
|
|
1245
|
+
message: passed
|
|
1246
|
+
? `Elapsed time ${elapsed}ms is within ${maxMs}ms limit`
|
|
1247
|
+
: `Elapsed time ${elapsed}ms exceeded ${maxMs}ms limit`,
|
|
1248
|
+
};
|
|
1230
1249
|
}
|
|
1231
1250
|
/**
|
|
1232
1251
|
* @deprecated Use {@link respondedWithinDuration} (takes measured duration)
|
package/dist/cli/index.js
CHANGED
|
File without changes
|
package/dist/index.d.ts
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
export { AIEvalClient } from "./client";
|
|
10
10
|
import { AuthenticationError, EvalGateError, NetworkError, RateLimitError, ValidationError } from "./errors";
|
|
11
11
|
export { EvalGateError, RateLimitError, AuthenticationError, ValidationError, NetworkError, };
|
|
12
|
-
export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasConsistency, hasConsistencyAsync, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasSentimentWithScore, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, respondedWithinDuration, respondedWithinTime, respondedWithinTimeSince, type SentimentAsyncResult, similarTo, toSemanticallyContain, withinRange, } from "./assertions";
|
|
12
|
+
export { type AssertionLLMConfig, configureAssertions, containsAllRequiredFields, containsJSON, containsKeywords, containsLanguage, containsLanguageAsync, expect, followsInstructions, getAssertionConfig, hasConsistency, hasConsistencyAsync, hasFactualAccuracy, hasFactualAccuracyAsync, hasLength, hasNoHallucinations, hasNoHallucinationsAsync, hasNoToxicity, hasNoToxicityAsync, hasPII, hasReadabilityScore, hasSentiment, hasSentimentAsync, hasSentimentWithScore, hasValidCodeSyntax, hasValidCodeSyntaxAsync, isValidEmail, isValidURL, matchesPattern, matchesSchema, notContainsPII, resetSentimentDeprecationWarning, respondedWithinDuration, respondedWithinTime, respondedWithinTimeSince, type SentimentAsyncResult, similarTo, toSemanticallyContain, withinRange, } from "./assertions";
|
|
13
13
|
export { EvalGateError as SDKError } from "./errors";
|
|
14
14
|
import { createContext, EvalContext, getCurrentContext, withContext } from "./context";
|
|
15
15
|
export { createContext, getCurrentContext as getContext, withContext, EvalContext as ContextManager, };
|
|
@@ -28,6 +28,7 @@ export { exportData, importData };
|
|
|
28
28
|
export type { ExportFormat, ExportFormat as ExportType };
|
|
29
29
|
export { RequestBatcher } from "./batch";
|
|
30
30
|
export { CacheTTL } from "./cache";
|
|
31
|
+
export { computeBaselineChecksum, verifyBaselineChecksum, } from "./cli/baseline";
|
|
31
32
|
export { type CheckArgs, EXIT, parseArgs, runCheck } from "./cli/check";
|
|
32
33
|
export { traceAnthropic } from "./integrations/anthropic";
|
|
33
34
|
export { traceOpenAI } from "./integrations/openai";
|
package/dist/index.js
CHANGED
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
* @packageDocumentation
|
|
9
9
|
*/
|
|
10
10
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
-
exports.
|
|
12
|
-
exports.
|
|
13
|
-
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = void 0;
|
|
11
|
+
exports.mergeContexts = exports.cloneContext = exports.ContextManager = exports.withContext = exports.getContext = exports.createContext = exports.SDKError = exports.withinRange = exports.toSemanticallyContain = exports.similarTo = exports.respondedWithinTimeSince = exports.respondedWithinTime = exports.respondedWithinDuration = exports.resetSentimentDeprecationWarning = exports.notContainsPII = exports.matchesSchema = exports.matchesPattern = exports.isValidURL = exports.isValidEmail = exports.hasValidCodeSyntaxAsync = exports.hasValidCodeSyntax = exports.hasSentimentWithScore = exports.hasSentimentAsync = exports.hasSentiment = exports.hasReadabilityScore = exports.hasPII = exports.hasNoToxicityAsync = exports.hasNoToxicity = exports.hasNoHallucinationsAsync = exports.hasNoHallucinations = exports.hasLength = exports.hasFactualAccuracyAsync = exports.hasFactualAccuracy = exports.hasConsistencyAsync = exports.hasConsistency = exports.getAssertionConfig = exports.followsInstructions = exports.expect = exports.containsLanguageAsync = exports.containsLanguage = exports.containsKeywords = exports.containsJSON = exports.containsAllRequiredFields = exports.configureAssertions = exports.NetworkError = exports.ValidationError = exports.AuthenticationError = exports.RateLimitError = exports.EvalGateError = exports.AIEvalClient = void 0;
|
|
12
|
+
exports.batchProcess = exports.REPORT_SCHEMA_VERSION = exports.GATE_EXIT = exports.GATE_CATEGORY = exports.ARTIFACTS = exports.PaginatedIterator = exports.encodeCursor = exports.decodeCursor = exports.createPaginatedIterator = exports.autoPaginateGenerator = exports.autoPaginate = exports.OTelExporter = exports.createOTelExporter = exports.extendExpectWithToPassGate = exports.Logger = exports.openAIChatEval = exports.traceOpenAI = exports.traceAnthropic = exports.runCheck = exports.parseArgs = exports.EXIT = exports.verifyBaselineChecksum = exports.computeBaselineChecksum = exports.CacheTTL = exports.RequestBatcher = exports.importData = exports.exportData = exports.saveSnapshot = exports.compareSnapshots = exports.compareWithSnapshot = exports.snapshot = exports.TestSuite = exports.createTestSuite = exports.SpecRegistrationError = exports.SpecExecutionError = exports.RuntimeError = exports.EvalRuntimeError = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.defaultLocalExecutor = exports.createLocalExecutor = exports.getFilteredSpecs = exports.evalai = exports.defineSuite = exports.defineEval = exports.createResult = exports.createEvalContext = exports.validateContext = void 0;
|
|
13
|
+
exports.WorkflowTracer = exports.traceWorkflowStep = exports.traceLangChainAgent = exports.traceCrewAI = exports.traceAutoGen = exports.createWorkflowTracer = exports.EvaluationTemplates = exports.streamEvaluation = exports.RateLimiter = exports.batchRead = void 0;
|
|
14
14
|
// Main SDK exports
|
|
15
15
|
var client_1 = require("./client");
|
|
16
16
|
Object.defineProperty(exports, "AIEvalClient", { enumerable: true, get: function () { return client_1.AIEvalClient; } });
|
|
@@ -55,6 +55,7 @@ Object.defineProperty(exports, "isValidURL", { enumerable: true, get: function (
|
|
|
55
55
|
Object.defineProperty(exports, "matchesPattern", { enumerable: true, get: function () { return assertions_1.matchesPattern; } });
|
|
56
56
|
Object.defineProperty(exports, "matchesSchema", { enumerable: true, get: function () { return assertions_1.matchesSchema; } });
|
|
57
57
|
Object.defineProperty(exports, "notContainsPII", { enumerable: true, get: function () { return assertions_1.notContainsPII; } });
|
|
58
|
+
Object.defineProperty(exports, "resetSentimentDeprecationWarning", { enumerable: true, get: function () { return assertions_1.resetSentimentDeprecationWarning; } });
|
|
58
59
|
Object.defineProperty(exports, "respondedWithinDuration", { enumerable: true, get: function () { return assertions_1.respondedWithinDuration; } });
|
|
59
60
|
Object.defineProperty(exports, "respondedWithinTime", { enumerable: true, get: function () { return assertions_1.respondedWithinTime; } });
|
|
60
61
|
Object.defineProperty(exports, "respondedWithinTimeSince", { enumerable: true, get: function () { return assertions_1.respondedWithinTimeSince; } });
|
|
@@ -122,6 +123,9 @@ Object.defineProperty(exports, "RequestBatcher", { enumerable: true, get: functi
|
|
|
122
123
|
var cache_1 = require("./cache");
|
|
123
124
|
Object.defineProperty(exports, "CacheTTL", { enumerable: true, get: function () { return cache_1.CacheTTL; } });
|
|
124
125
|
// CLI (programmatic use)
|
|
126
|
+
var baseline_1 = require("./cli/baseline");
|
|
127
|
+
Object.defineProperty(exports, "computeBaselineChecksum", { enumerable: true, get: function () { return baseline_1.computeBaselineChecksum; } });
|
|
128
|
+
Object.defineProperty(exports, "verifyBaselineChecksum", { enumerable: true, get: function () { return baseline_1.verifyBaselineChecksum; } });
|
|
125
129
|
var check_1 = require("./cli/check");
|
|
126
130
|
Object.defineProperty(exports, "EXIT", { enumerable: true, get: function () { return check_1.EXIT; } });
|
|
127
131
|
Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return check_1.parseArgs; } });
|
package/dist/otel.js
CHANGED
|
@@ -70,7 +70,7 @@ class OTelExporter {
|
|
|
70
70
|
endpoint: options.endpoint ?? "http://localhost:4318/v1/traces",
|
|
71
71
|
serviceName: options.serviceName ?? "evalgate",
|
|
72
72
|
resourceAttributes: options.resourceAttributes ?? {},
|
|
73
|
-
sdkVersion: options.sdkVersion ?? "2.
|
|
73
|
+
sdkVersion: options.sdkVersion ?? "2.3.0",
|
|
74
74
|
headers: options.headers ?? {},
|
|
75
75
|
};
|
|
76
76
|
}
|
package/dist/version.d.ts
CHANGED
|
@@ -3,5 +3,5 @@
|
|
|
3
3
|
* X-EvalGate-SDK-Version: SDK package version
|
|
4
4
|
* X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
5
5
|
*/
|
|
6
|
-
export declare const SDK_VERSION = "2.
|
|
7
|
-
export declare const SPEC_VERSION = "2.
|
|
6
|
+
export declare const SDK_VERSION = "2.3.0";
|
|
7
|
+
export declare const SPEC_VERSION = "2.3.0";
|
package/dist/version.js
CHANGED
|
@@ -6,5 +6,5 @@ exports.SPEC_VERSION = exports.SDK_VERSION = void 0;
|
|
|
6
6
|
* X-EvalGate-SDK-Version: SDK package version
|
|
7
7
|
* X-EvalGate-Spec-Version: OpenAPI spec version (docs/openapi.json info.version)
|
|
8
8
|
*/
|
|
9
|
-
exports.SDK_VERSION = "2.
|
|
10
|
-
exports.SPEC_VERSION = "2.
|
|
9
|
+
exports.SDK_VERSION = "2.3.0";
|
|
10
|
+
exports.SPEC_VERSION = "2.3.0";
|
package/package.json
CHANGED
|
@@ -1,125 +1,125 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
2
|
+
"name": "@evalgate/sdk",
|
|
3
|
+
"version": "2.3.0",
|
|
4
|
+
"publishConfig": {
|
|
5
|
+
"access": "public",
|
|
6
|
+
"registry": "https://registry.npmjs.org/"
|
|
7
|
+
},
|
|
8
|
+
"description": "EvalGate SDK - Complete API Coverage with Performance Optimizations",
|
|
9
|
+
"main": "dist/index.js",
|
|
10
|
+
"module": "dist/index.js",
|
|
11
|
+
"types": "dist/index.d.ts",
|
|
12
|
+
"sideEffects": false,
|
|
13
|
+
"files": [
|
|
14
|
+
"dist",
|
|
15
|
+
"README.md",
|
|
16
|
+
"CHANGELOG.md"
|
|
17
|
+
],
|
|
18
|
+
"bin": {
|
|
19
|
+
"evalgate": "dist/cli/index.js"
|
|
20
|
+
},
|
|
21
|
+
"engines": {
|
|
22
|
+
"node": ">=16.0.0"
|
|
23
|
+
},
|
|
24
|
+
"keywords": [
|
|
25
|
+
"ai",
|
|
26
|
+
"evaluation",
|
|
27
|
+
"llm",
|
|
28
|
+
"testing",
|
|
29
|
+
"observability",
|
|
30
|
+
"tracing",
|
|
31
|
+
"monitoring",
|
|
32
|
+
"annotations",
|
|
33
|
+
"webhooks",
|
|
34
|
+
"developer-tools",
|
|
35
|
+
"openai",
|
|
36
|
+
"anthropic"
|
|
37
|
+
],
|
|
38
|
+
"author": "EvalGate Team",
|
|
39
|
+
"license": "MIT",
|
|
40
|
+
"repository": {
|
|
41
|
+
"type": "git",
|
|
42
|
+
"url": "git+https://github.com/pauly7610/ai-evaluation-platform.git",
|
|
43
|
+
"directory": "src/packages/sdk"
|
|
44
|
+
},
|
|
45
|
+
"homepage": "https://evalgate.com",
|
|
46
|
+
"bugs": {
|
|
47
|
+
"url": "https://github.com/pauly7610/ai-evaluation-platform/issues"
|
|
48
|
+
},
|
|
49
|
+
"dependencies": {
|
|
50
|
+
"commander": "^14.0.0"
|
|
51
|
+
},
|
|
52
|
+
"peerDependencies": {
|
|
53
|
+
"@anthropic-ai/sdk": "^0.20.0",
|
|
54
|
+
"openai": "^4.0.0"
|
|
55
|
+
},
|
|
56
|
+
"peerDependenciesMeta": {
|
|
57
|
+
"openai": {
|
|
58
|
+
"optional": true
|
|
59
|
+
},
|
|
60
|
+
"@anthropic-ai/sdk": {
|
|
61
|
+
"optional": true
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"@types/node": "^20.0.0",
|
|
66
|
+
"ts-node": "^10.9.2",
|
|
67
|
+
"typescript": "^5.0.0",
|
|
68
|
+
"vitest": "^1.0.0"
|
|
69
|
+
},
|
|
70
|
+
"exports": {
|
|
71
|
+
".": {
|
|
72
|
+
"import": "./dist/index.js",
|
|
73
|
+
"require": "./dist/index.js",
|
|
74
|
+
"types": "./dist/index.d.ts"
|
|
75
|
+
},
|
|
76
|
+
"./assertions": {
|
|
77
|
+
"import": "./dist/assertions.js",
|
|
78
|
+
"require": "./dist/assertions.js",
|
|
79
|
+
"types": "./dist/assertions.d.ts"
|
|
80
|
+
},
|
|
81
|
+
"./testing": {
|
|
82
|
+
"import": "./dist/testing.js",
|
|
83
|
+
"require": "./dist/testing.js",
|
|
84
|
+
"types": "./dist/testing.d.ts"
|
|
85
|
+
},
|
|
86
|
+
"./integrations/openai": {
|
|
87
|
+
"import": "./dist/integrations/openai.js",
|
|
88
|
+
"require": "./dist/integrations/openai.js",
|
|
89
|
+
"types": "./dist/integrations/openai.d.ts"
|
|
90
|
+
},
|
|
91
|
+
"./integrations/anthropic": {
|
|
92
|
+
"import": "./dist/integrations/anthropic.js",
|
|
93
|
+
"require": "./dist/integrations/anthropic.js",
|
|
94
|
+
"types": "./dist/integrations/anthropic.d.ts"
|
|
95
|
+
},
|
|
96
|
+
"./integrations/openai-eval": {
|
|
97
|
+
"import": "./dist/integrations/openai-eval.js",
|
|
98
|
+
"require": "./dist/integrations/openai-eval.js",
|
|
99
|
+
"types": "./dist/integrations/openai-eval.d.ts"
|
|
100
|
+
},
|
|
101
|
+
"./matchers": {
|
|
102
|
+
"import": "./dist/matchers/index.js",
|
|
103
|
+
"require": "./dist/matchers/index.js",
|
|
104
|
+
"types": "./dist/matchers/index.d.ts"
|
|
105
|
+
},
|
|
106
|
+
"./regression": {
|
|
107
|
+
"import": "./dist/regression.js",
|
|
108
|
+
"require": "./dist/regression.js",
|
|
109
|
+
"types": "./dist/regression.d.ts"
|
|
110
|
+
},
|
|
111
|
+
"./otel": {
|
|
112
|
+
"import": "./dist/otel.js",
|
|
113
|
+
"require": "./dist/otel.js",
|
|
114
|
+
"types": "./dist/otel.d.ts"
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"scripts": {
|
|
118
|
+
"build": "tsc",
|
|
119
|
+
"dev": "tsc --watch",
|
|
120
|
+
"test": "vitest run",
|
|
121
|
+
"test:dist": "tsc && vitest run src/__tests__/dist-smoke.test.ts",
|
|
122
|
+
"otel:test": "bash otel-integration/run-test.sh",
|
|
123
|
+
"test:watch": "vitest"
|
|
124
|
+
}
|
|
125
|
+
}
|