@artemiskit/sdk 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/README.md +173 -0
- package/adapters/openai/dist/index.js +5625 -0
- package/dist/index.js +42577 -0
- package/dist/matchers/index.js +224 -0
- package/dist/matchers/jest.js +257 -0
- package/dist/matchers/vitest.js +257 -0
- package/package.json +78 -0
- package/src/__tests__/artemiskit.test.ts +425 -0
- package/src/__tests__/matchers.test.ts +450 -0
- package/src/artemiskit.ts +791 -0
- package/src/guardian/action-validator.ts +585 -0
- package/src/guardian/circuit-breaker.ts +655 -0
- package/src/guardian/guardian.ts +497 -0
- package/src/guardian/guardrails.ts +536 -0
- package/src/guardian/index.ts +142 -0
- package/src/guardian/intent-classifier.ts +378 -0
- package/src/guardian/interceptor.ts +381 -0
- package/src/guardian/policy.ts +446 -0
- package/src/guardian/types.ts +436 -0
- package/src/index.ts +164 -0
- package/src/matchers/core.ts +315 -0
- package/src/matchers/index.ts +26 -0
- package/src/matchers/jest.ts +112 -0
- package/src/matchers/vitest.ts +84 -0
- package/src/types.ts +259 -0
- package/tsconfig.json +11 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
function __accessProp(key) {
|
|
8
|
+
return this[key];
|
|
9
|
+
}
|
|
10
|
+
var __toESMCache_node;
|
|
11
|
+
var __toESMCache_esm;
|
|
12
|
+
var __toESM = (mod, isNodeMode, target) => {
|
|
13
|
+
var canCache = mod != null && typeof mod === "object";
|
|
14
|
+
if (canCache) {
|
|
15
|
+
var cache = isNodeMode ? __toESMCache_node ??= new WeakMap : __toESMCache_esm ??= new WeakMap;
|
|
16
|
+
var cached = cache.get(mod);
|
|
17
|
+
if (cached)
|
|
18
|
+
return cached;
|
|
19
|
+
}
|
|
20
|
+
target = mod != null ? __create(__getProtoOf(mod)) : {};
|
|
21
|
+
const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
|
|
22
|
+
for (let key of __getOwnPropNames(mod))
|
|
23
|
+
if (!__hasOwnProp.call(to, key))
|
|
24
|
+
__defProp(to, key, {
|
|
25
|
+
get: __accessProp.bind(mod, key),
|
|
26
|
+
enumerable: true
|
|
27
|
+
});
|
|
28
|
+
if (canCache)
|
|
29
|
+
cache.set(mod, to);
|
|
30
|
+
return to;
|
|
31
|
+
};
|
|
32
|
+
var __commonJS = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
|
|
33
|
+
var __returnValue = (v) => v;
|
|
34
|
+
function __exportSetter(name, newValue) {
|
|
35
|
+
this[name] = __returnValue.bind(null, newValue);
|
|
36
|
+
}
|
|
37
|
+
var __export = (target, all) => {
|
|
38
|
+
for (var name in all)
|
|
39
|
+
__defProp(target, name, {
|
|
40
|
+
get: all[name],
|
|
41
|
+
enumerable: true,
|
|
42
|
+
configurable: true,
|
|
43
|
+
set: __exportSetter.bind(all, name)
|
|
44
|
+
});
|
|
45
|
+
};
|
|
46
|
+
var __require = import.meta.require;
|
|
47
|
+
|
|
48
|
+
// src/matchers/core.ts
|
|
49
|
+
function formatManifestSummary(manifest) {
|
|
50
|
+
const { metrics } = manifest;
|
|
51
|
+
return `
|
|
52
|
+
Scenario: ${manifest.config.scenario}
|
|
53
|
+
Total Cases: ${metrics.total_cases}
|
|
54
|
+
Passed: ${metrics.passed_cases}
|
|
55
|
+
Failed: ${metrics.failed_cases}
|
|
56
|
+
Success Rate: ${(metrics.success_rate * 100).toFixed(1)}%
|
|
57
|
+
`;
|
|
58
|
+
}
|
|
59
|
+
function formatFailedCases(cases) {
|
|
60
|
+
const failed = cases.filter((c) => !c.ok);
|
|
61
|
+
if (failed.length === 0)
|
|
62
|
+
return "";
|
|
63
|
+
return `
|
|
64
|
+
Failed Cases:
|
|
65
|
+
${failed.slice(0, 5).map((c) => ` - ${c.name ?? c.id}: ${c.reason ?? "No reason provided"}`).join(`
|
|
66
|
+
`)}${failed.length > 5 ? `
|
|
67
|
+
... and ${failed.length - 5} more` : ""}
|
|
68
|
+
`;
|
|
69
|
+
}
|
|
70
|
+
function toPassAllCases(result) {
|
|
71
|
+
const pass = result.success;
|
|
72
|
+
return {
|
|
73
|
+
pass,
|
|
74
|
+
message: () => pass ? `Expected test run to fail, but all ${result.manifest.metrics.total_cases} cases passed` : `Expected test run to pass, but ${result.manifest.metrics.failed_cases} out of ${result.manifest.metrics.total_cases} cases failed
|
|
75
|
+
${formatManifestSummary(result.manifest)}${formatFailedCases(result.cases)}`
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function toHaveSuccessRate(result, expectedRate) {
|
|
79
|
+
const actualRate = result.manifest.metrics.success_rate;
|
|
80
|
+
const pass = actualRate >= expectedRate;
|
|
81
|
+
return {
|
|
82
|
+
pass,
|
|
83
|
+
message: () => pass ? `Expected success rate to be less than ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%` : `Expected success rate to be at least ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%
|
|
84
|
+
${formatManifestSummary(result.manifest)}${formatFailedCases(result.cases)}`
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
function toPassCasesWithTag(result, tag) {
|
|
88
|
+
const taggedCases = result.cases.filter((c) => c.tags.includes(tag));
|
|
89
|
+
const failedTaggedCases = taggedCases.filter((c) => !c.ok);
|
|
90
|
+
const pass = failedTaggedCases.length === 0;
|
|
91
|
+
return {
|
|
92
|
+
pass,
|
|
93
|
+
message: () => pass ? `Expected cases with tag "${tag}" to fail, but all ${taggedCases.length} passed` : `Expected all cases with tag "${tag}" to pass, but ${failedTaggedCases.length} out of ${taggedCases.length} failed
|
|
94
|
+
${formatFailedCases(failedTaggedCases)}`
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
function toHaveMedianLatencyBelow(result, maxLatencyMs) {
|
|
98
|
+
const actualLatency = result.manifest.metrics.median_latency_ms;
|
|
99
|
+
const pass = actualLatency <= maxLatencyMs;
|
|
100
|
+
return {
|
|
101
|
+
pass,
|
|
102
|
+
message: () => pass ? `Expected median latency to be above ${maxLatencyMs}ms, but got ${actualLatency}ms` : `Expected median latency to be at most ${maxLatencyMs}ms, but got ${actualLatency}ms`
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function toHaveP95LatencyBelow(result, maxLatencyMs) {
|
|
106
|
+
const actualLatency = result.manifest.metrics.p95_latency_ms;
|
|
107
|
+
const pass = actualLatency <= maxLatencyMs;
|
|
108
|
+
return {
|
|
109
|
+
pass,
|
|
110
|
+
message: () => pass ? `Expected P95 latency to be above ${maxLatencyMs}ms, but got ${actualLatency}ms` : `Expected P95 latency to be at most ${maxLatencyMs}ms, but got ${actualLatency}ms`
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
function toHaveDefenseRate(result, expectedRate) {
|
|
114
|
+
const actualRate = result.defenseRate;
|
|
115
|
+
const pass = actualRate >= expectedRate;
|
|
116
|
+
return {
|
|
117
|
+
pass,
|
|
118
|
+
message: () => pass ? `Expected defense rate to be less than ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%` : `Expected defense rate to be at least ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%
|
|
119
|
+
|
|
120
|
+
Unsafe responses: ${result.unsafeCount}
|
|
121
|
+
Total tests: ${result.manifest.metrics.total_tests}`
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function toHaveNoCriticalVulnerabilities(result) {
|
|
125
|
+
const criticalCount = result.manifest.metrics.by_severity.critical;
|
|
126
|
+
const pass = criticalCount === 0;
|
|
127
|
+
return {
|
|
128
|
+
pass,
|
|
129
|
+
message: () => pass ? "Expected critical vulnerabilities, but none were found" : `Expected no critical vulnerabilities, but found ${criticalCount}`
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
function toHaveNoHighSeverityVulnerabilities(result) {
|
|
133
|
+
const highCount = result.manifest.metrics.by_severity.high;
|
|
134
|
+
const criticalCount = result.manifest.metrics.by_severity.critical;
|
|
135
|
+
const totalHighSeverity = highCount + criticalCount;
|
|
136
|
+
const pass = totalHighSeverity === 0;
|
|
137
|
+
return {
|
|
138
|
+
pass,
|
|
139
|
+
message: () => pass ? "Expected high/critical vulnerabilities, but none were found" : `Expected no high/critical vulnerabilities, but found ${totalHighSeverity} (${highCount} high, ${criticalCount} critical)`
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
function toPassRedTeam(result) {
|
|
143
|
+
const pass = result.success;
|
|
144
|
+
const { metrics } = result.manifest;
|
|
145
|
+
return {
|
|
146
|
+
pass,
|
|
147
|
+
message: () => pass ? `Expected red team test to fail, but passed with ${(result.defenseRate * 100).toFixed(1)}% defense rate` : `Expected red team test to pass, but defense rate was only ${(result.defenseRate * 100).toFixed(1)}%
|
|
148
|
+
|
|
149
|
+
Total tests: ${metrics.total_tests}
|
|
150
|
+
Safe: ${metrics.safe_responses}
|
|
151
|
+
Blocked: ${metrics.blocked_responses}
|
|
152
|
+
Unsafe: ${metrics.unsafe_responses}
|
|
153
|
+
Errors: ${metrics.error_responses}`
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function toHaveStressSuccessRate(result, expectedRate) {
|
|
157
|
+
const actualRate = result.successRate;
|
|
158
|
+
const pass = actualRate >= expectedRate;
|
|
159
|
+
return {
|
|
160
|
+
pass,
|
|
161
|
+
message: () => pass ? `Expected stress test success rate to be less than ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%` : `Expected stress test success rate to be at least ${(expectedRate * 100).toFixed(1)}%, but got ${(actualRate * 100).toFixed(1)}%`
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
function toAchieveRPS(result, targetRPS) {
|
|
165
|
+
const actualRPS = result.rps;
|
|
166
|
+
const pass = actualRPS >= targetRPS;
|
|
167
|
+
return {
|
|
168
|
+
pass,
|
|
169
|
+
message: () => pass ? `Expected RPS to be less than ${targetRPS}, but achieved ${actualRPS.toFixed(1)} RPS` : `Expected to achieve at least ${targetRPS} RPS, but only got ${actualRPS.toFixed(1)} RPS`
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
function toHaveStressP95LatencyBelow(result, maxLatencyMs) {
|
|
173
|
+
const actualLatency = result.p95LatencyMs;
|
|
174
|
+
const pass = actualLatency <= maxLatencyMs;
|
|
175
|
+
return {
|
|
176
|
+
pass,
|
|
177
|
+
message: () => pass ? `Expected P95 latency to be above ${maxLatencyMs}ms, but got ${actualLatency}ms` : `Expected P95 latency to be at most ${maxLatencyMs}ms, but got ${actualLatency}ms`
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
function toPassStressTest(result) {
|
|
181
|
+
const pass = result.success;
|
|
182
|
+
const { metrics } = result.manifest;
|
|
183
|
+
return {
|
|
184
|
+
pass,
|
|
185
|
+
message: () => pass ? `Expected stress test to fail, but passed with ${(result.successRate * 100).toFixed(1)}% success rate` : `Expected stress test to pass, but success rate was only ${(result.successRate * 100).toFixed(1)}%
|
|
186
|
+
|
|
187
|
+
Total requests: ${metrics.total_requests}
|
|
188
|
+
Successful: ${metrics.successful_requests}
|
|
189
|
+
Failed: ${metrics.failed_requests}
|
|
190
|
+
RPS: ${metrics.requests_per_second.toFixed(1)}
|
|
191
|
+
P95 Latency: ${metrics.p95_latency_ms}ms`
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
var artemiskitMatchers = {
|
|
195
|
+
toPassAllCases,
|
|
196
|
+
toHaveSuccessRate,
|
|
197
|
+
toPassCasesWithTag,
|
|
198
|
+
toHaveMedianLatencyBelow,
|
|
199
|
+
toHaveP95LatencyBelow,
|
|
200
|
+
toHaveDefenseRate,
|
|
201
|
+
toHaveNoCriticalVulnerabilities,
|
|
202
|
+
toHaveNoHighSeverityVulnerabilities,
|
|
203
|
+
toPassRedTeam,
|
|
204
|
+
toHaveStressSuccessRate,
|
|
205
|
+
toAchieveRPS,
|
|
206
|
+
toHaveStressP95LatencyBelow,
|
|
207
|
+
toPassStressTest
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
// src/matchers/vitest.ts
|
|
211
|
+
var vitestMatchers = {
|
|
212
|
+
toPassAllCases(received) {
|
|
213
|
+
return toPassAllCases(received);
|
|
214
|
+
},
|
|
215
|
+
toHaveSuccessRate(received, expectedRate) {
|
|
216
|
+
return toHaveSuccessRate(received, expectedRate);
|
|
217
|
+
},
|
|
218
|
+
toPassCasesWithTag(received, tag) {
|
|
219
|
+
return toPassCasesWithTag(received, tag);
|
|
220
|
+
},
|
|
221
|
+
toHaveMedianLatencyBelow(received, maxLatencyMs) {
|
|
222
|
+
return toHaveMedianLatencyBelow(received, maxLatencyMs);
|
|
223
|
+
},
|
|
224
|
+
toHaveP95LatencyBelow(received, maxLatencyMs) {
|
|
225
|
+
return toHaveP95LatencyBelow(received, maxLatencyMs);
|
|
226
|
+
},
|
|
227
|
+
toHaveDefenseRate(received, expectedRate) {
|
|
228
|
+
return toHaveDefenseRate(received, expectedRate);
|
|
229
|
+
},
|
|
230
|
+
toHaveNoCriticalVulnerabilities(received) {
|
|
231
|
+
return toHaveNoCriticalVulnerabilities(received);
|
|
232
|
+
},
|
|
233
|
+
toHaveNoHighSeverityVulnerabilities(received) {
|
|
234
|
+
return toHaveNoHighSeverityVulnerabilities(received);
|
|
235
|
+
},
|
|
236
|
+
toPassRedTeam(received) {
|
|
237
|
+
return toPassRedTeam(received);
|
|
238
|
+
},
|
|
239
|
+
toHaveStressSuccessRate(received, expectedRate) {
|
|
240
|
+
return toHaveStressSuccessRate(received, expectedRate);
|
|
241
|
+
},
|
|
242
|
+
toAchieveRPS(received, targetRPS) {
|
|
243
|
+
return toAchieveRPS(received, targetRPS);
|
|
244
|
+
},
|
|
245
|
+
toHaveStressP95LatencyBelow(received, maxLatencyMs) {
|
|
246
|
+
return toHaveStressP95LatencyBelow(received, maxLatencyMs);
|
|
247
|
+
},
|
|
248
|
+
toPassStressTest(received) {
|
|
249
|
+
return toPassStressTest(received);
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
if (typeof expect !== "undefined" && typeof expect.extend === "function") {
|
|
253
|
+
expect.extend(vitestMatchers);
|
|
254
|
+
}
|
|
255
|
+
export {
|
|
256
|
+
vitestMatchers
|
|
257
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@artemiskit/sdk",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Programmatic SDK for ArtemisKit LLM evaluation toolkit - integrate into your Node.js apps",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "Apache-2.0",
|
|
7
|
+
"author": "code-sensei",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/code-sensei/artemiskit.git",
|
|
11
|
+
"directory": "packages/sdk"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/code-sensei/artemiskit/issues"
|
|
15
|
+
},
|
|
16
|
+
"homepage": "https://artemiskit.vercel.app",
|
|
17
|
+
"keywords": [
|
|
18
|
+
"llm",
|
|
19
|
+
"testing",
|
|
20
|
+
"sdk",
|
|
21
|
+
"ai",
|
|
22
|
+
"agents",
|
|
23
|
+
"evaluation",
|
|
24
|
+
"openai",
|
|
25
|
+
"anthropic",
|
|
26
|
+
"vitest",
|
|
27
|
+
"jest",
|
|
28
|
+
"red-team",
|
|
29
|
+
"security"
|
|
30
|
+
],
|
|
31
|
+
"main": "./dist/index.js",
|
|
32
|
+
"types": "./dist/index.d.ts",
|
|
33
|
+
"exports": {
|
|
34
|
+
".": {
|
|
35
|
+
"import": "./dist/index.js",
|
|
36
|
+
"types": "./dist/index.d.ts"
|
|
37
|
+
},
|
|
38
|
+
"./matchers": {
|
|
39
|
+
"import": "./dist/matchers/index.js",
|
|
40
|
+
"types": "./dist/matchers/index.d.ts"
|
|
41
|
+
},
|
|
42
|
+
"./vitest": {
|
|
43
|
+
"import": "./dist/matchers/vitest.js",
|
|
44
|
+
"types": "./dist/matchers/vitest.d.ts"
|
|
45
|
+
},
|
|
46
|
+
"./jest": {
|
|
47
|
+
"import": "./dist/matchers/jest.js",
|
|
48
|
+
"types": "./dist/matchers/jest.d.ts"
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"scripts": {
|
|
52
|
+
"build": "tsc --emitDeclarationOnly && bun build ./src/index.ts ./src/matchers/index.ts ./src/matchers/vitest.ts ./src/matchers/jest.ts --outdir ./dist --target bun",
|
|
53
|
+
"typecheck": "tsc --noEmit",
|
|
54
|
+
"clean": "rm -rf dist",
|
|
55
|
+
"test": "bun test",
|
|
56
|
+
"test:coverage": "bun test --coverage"
|
|
57
|
+
},
|
|
58
|
+
"dependencies": {
|
|
59
|
+
"@artemiskit/core": "workspace:*",
|
|
60
|
+
"@artemiskit/redteam": "workspace:*"
|
|
61
|
+
},
|
|
62
|
+
"devDependencies": {
|
|
63
|
+
"@types/bun": "^1.1.0",
|
|
64
|
+
"typescript": "^5.3.0"
|
|
65
|
+
},
|
|
66
|
+
"peerDependencies": {
|
|
67
|
+
"vitest": ">=1.0.0",
|
|
68
|
+
"@jest/expect": ">=29.0.0"
|
|
69
|
+
},
|
|
70
|
+
"peerDependenciesMeta": {
|
|
71
|
+
"vitest": {
|
|
72
|
+
"optional": true
|
|
73
|
+
},
|
|
74
|
+
"@jest/expect": {
|
|
75
|
+
"optional": true
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|