@artemiskit/core 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dist/artifacts/manifest.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +20 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.js +9480 -9448
- package/dist/storage/local.d.ts.map +1 -1
- package/dist/storage/types.d.ts +4 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/artifacts/manifest.ts +24 -2
- package/src/artifacts/types.ts +21 -0
- package/src/evaluators/similarity.test.ts +4 -3
- package/src/storage/local.ts +24 -2
- package/src/storage/types.ts +4 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local.d.ts","sourceRoot":"","sources":["../../src/storage/local.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpG,OAAO,KAAK,EACV,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACZ,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"local.d.ts","sourceRoot":"","sources":["../../src/storage/local.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpG,OAAO,KAAK,EACV,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACZ,MAAM,SAAS,CAAC;AAyDjB,qBAAa,mBAAoB,YAAW,sBAAsB;IAChE,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,aAAa,CAAS;gBAElB,QAAQ,SAAmB;IAKjC,IAAI,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAU5C,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAczC,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAQ5C,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAQpD,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC;IAQlD,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IA0DnD,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAYpC,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAiBjE,eAAe;YASf,SAAS;IAWvB;;OAEG;YACW,iBAAiB;IAS/B;;OAEG;YACW,iBAAiB;IAM/B;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA4B3F;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAKrE;;OAEG;IACG,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAMzE;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAOlD;;OAEG;IACG,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAUxD;;OAEG;IACG,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAW5D;;OAEG;IACG,iBAAiB,CACrB,KAAK,EAAE,MAAM,EACb,mBAAmB,SAAO,GACzB,OAAO,CAAC;QACT,QAAQ,EAAE,gBAAgB,CAAC;QAC3B,UAAU,EAAE,gBAAgB,CAAC;QAC7B,aAAa,EAAE,OAAO,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;KAC7B,GAAG,IAAI,CAAC;CAwBV"}
|
package/dist/storage/types.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export interface RunListItem {
|
|
|
12
12
|
createdAt: string;
|
|
13
13
|
/** Type of manifest (run, redteam, stress) */
|
|
14
14
|
type?: 'run' | 'redteam' | 'stress';
|
|
15
|
+
/** Estimated cost in USD (optional, included when --show-cost is used) */
|
|
16
|
+
estimatedCostUsd?: number;
|
|
15
17
|
}
|
|
16
18
|
/**
|
|
17
19
|
* Comparison result between two runs
|
|
@@ -35,6 +37,8 @@ export interface ListOptions {
|
|
|
35
37
|
offset?: number;
|
|
36
38
|
/** Filter by manifest type */
|
|
37
39
|
type?: 'run' | 'redteam' | 'stress';
|
|
40
|
+
/** Include cost information in results */
|
|
41
|
+
includeCost?: boolean;
|
|
38
42
|
}
|
|
39
43
|
/**
|
|
40
44
|
* Storage adapter interface - implement to create custom storage backends
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/storage/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpG;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/storage/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpG;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;IACpC,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,WAAW,CAAC;IACtB,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE;QACL,WAAW,EAAE,MAAM,CAAC;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;IACpC,0CAA0C;IAC1C,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B;;OAEG;IACH,IAAI,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE7C;;OAEG;IACH,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE1C;;OAEG;IACH,OAAO,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9C;;OAEG;IACH,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAEtD;;OAEG;IACH,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAEpD;;OAEG;IACH,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IAEpD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErC;;OAEG;IACH,OAAO,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;CAC5E;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC;IAC3B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,kCAAkC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,sBAAuB,SAAQ,cAAc;IAC5D;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAEtF;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC;IAEhE;;OAEG;IACH,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC;IAEpE;;OAEG;IACH,aAAa,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;IAE7C;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEnD;;OAEG;IACH,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvD;;;;OAIG;IACH,iBAAiB,CAAC,CAChB,KAAK,EAAE,MAAM,EACb,mBAAmB,CAAC,EAAE,MAAM,GAC3B,OAAO,CAAC;QACT,QAAQ,EAAE,gBAAgB,CAAC;QAC3B,UAAU,EAAE,gBAAgB,CAAC;QAC7B,aAAa,EAAE,OAAO,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;KAC7B,GAAG,IAAI,CAAC,CAAC;CACX"}
|
package/package.json
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { nanoid } from 'nanoid';
|
|
6
|
+
import { estimateCost, getModelPricing } from '../cost/pricing';
|
|
6
7
|
import { getEnvironmentInfo } from '../provenance/environment';
|
|
7
8
|
import { getGitInfo } from '../provenance/git';
|
|
8
9
|
import type {
|
|
9
10
|
CaseResult,
|
|
11
|
+
CostEstimateInfo,
|
|
10
12
|
ManifestRedactionInfo,
|
|
11
13
|
ResolvedConfig,
|
|
12
14
|
RunConfig,
|
|
@@ -40,7 +42,9 @@ export function createRunManifest(options: {
|
|
|
40
42
|
redaction,
|
|
41
43
|
} = options;
|
|
42
44
|
|
|
43
|
-
|
|
45
|
+
// Get model for cost calculation - prefer resolvedConfig, then config
|
|
46
|
+
const modelForCost = resolvedConfig?.model || config.model;
|
|
47
|
+
const metrics = calculateMetrics(cases, modelForCost);
|
|
44
48
|
const git = getGitInfo();
|
|
45
49
|
const environment = getEnvironmentInfo();
|
|
46
50
|
|
|
@@ -69,7 +73,7 @@ export function createRunManifest(options: {
|
|
|
69
73
|
/**
|
|
70
74
|
* Calculate metrics from case results
|
|
71
75
|
*/
|
|
72
|
-
function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
76
|
+
function calculateMetrics(cases: CaseResult[], model?: string): RunMetrics {
|
|
73
77
|
const passedCases = cases.filter((c) => c.ok);
|
|
74
78
|
const latencies = cases.map((c) => c.latencyMs).sort((a, b) => a - b);
|
|
75
79
|
|
|
@@ -81,6 +85,23 @@ function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
|
81
85
|
const totalPromptTokens = cases.reduce((sum, c) => sum + c.tokens.prompt, 0);
|
|
82
86
|
const totalCompletionTokens = cases.reduce((sum, c) => sum + c.tokens.completion, 0);
|
|
83
87
|
|
|
88
|
+
// Calculate cost if model is provided
|
|
89
|
+
let cost: CostEstimateInfo | undefined;
|
|
90
|
+
if (model && (totalPromptTokens > 0 || totalCompletionTokens > 0)) {
|
|
91
|
+
const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
|
|
92
|
+
const pricing = getModelPricing(model);
|
|
93
|
+
cost = {
|
|
94
|
+
total_usd: costEstimate.totalUsd,
|
|
95
|
+
prompt_cost_usd: costEstimate.promptCostUsd,
|
|
96
|
+
completion_cost_usd: costEstimate.completionCostUsd,
|
|
97
|
+
model: costEstimate.model,
|
|
98
|
+
pricing: {
|
|
99
|
+
prompt_per_1k: pricing.promptPer1K,
|
|
100
|
+
completion_per_1k: pricing.completionPer1K,
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
84
105
|
return {
|
|
85
106
|
success_rate: cases.length > 0 ? passedCases.length / cases.length : 0,
|
|
86
107
|
total_cases: cases.length,
|
|
@@ -91,6 +112,7 @@ function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
|
91
112
|
total_tokens: totalPromptTokens + totalCompletionTokens,
|
|
92
113
|
total_prompt_tokens: totalPromptTokens,
|
|
93
114
|
total_completion_tokens: totalCompletionTokens,
|
|
115
|
+
cost,
|
|
94
116
|
};
|
|
95
117
|
}
|
|
96
118
|
|
package/src/artifacts/types.ts
CHANGED
|
@@ -67,6 +67,25 @@ export interface CaseResult {
|
|
|
67
67
|
redaction?: CaseRedactionInfo;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Cost estimation details
|
|
72
|
+
*/
|
|
73
|
+
export interface CostEstimateInfo {
|
|
74
|
+
/** Estimated total cost in USD */
|
|
75
|
+
total_usd: number;
|
|
76
|
+
/** Cost for prompt/input tokens */
|
|
77
|
+
prompt_cost_usd: number;
|
|
78
|
+
/** Cost for completion/output tokens */
|
|
79
|
+
completion_cost_usd: number;
|
|
80
|
+
/** Model used for cost calculation */
|
|
81
|
+
model: string;
|
|
82
|
+
/** Pricing used (per 1K tokens) */
|
|
83
|
+
pricing: {
|
|
84
|
+
prompt_per_1k: number;
|
|
85
|
+
completion_per_1k: number;
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
70
89
|
/**
|
|
71
90
|
* Run metrics
|
|
72
91
|
*/
|
|
@@ -80,6 +99,8 @@ export interface RunMetrics {
|
|
|
80
99
|
total_tokens: number;
|
|
81
100
|
total_prompt_tokens: number;
|
|
82
101
|
total_completion_tokens: number;
|
|
102
|
+
/** Estimated cost information */
|
|
103
|
+
cost?: CostEstimateInfo;
|
|
83
104
|
}
|
|
84
105
|
|
|
85
106
|
/**
|
|
@@ -15,7 +15,8 @@ describe('SimilarityEvaluator', () => {
|
|
|
15
15
|
|
|
16
16
|
test('throws on invalid expected type', async () => {
|
|
17
17
|
await expect(
|
|
18
|
-
|
|
18
|
+
// @ts-expect-error Testing invalid type handling
|
|
19
|
+
evaluator.evaluate('response', { type: 'exact', value: 'test' })
|
|
19
20
|
).rejects.toThrow('Invalid expected type');
|
|
20
21
|
});
|
|
21
22
|
|
|
@@ -288,8 +289,8 @@ describe('SimilarityEvaluator', () => {
|
|
|
288
289
|
{
|
|
289
290
|
type: 'similarity',
|
|
290
291
|
value: 'Text B',
|
|
291
|
-
|
|
292
|
-
}
|
|
292
|
+
threshold: undefined, // Testing default threshold (0.75)
|
|
293
|
+
},
|
|
293
294
|
mockContext
|
|
294
295
|
);
|
|
295
296
|
|
package/src/storage/local.ts
CHANGED
|
@@ -38,6 +38,21 @@ function getSuccessRate(manifest: AnyManifest): number {
|
|
|
38
38
|
return (manifest as RunManifest).metrics.success_rate;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Get estimated cost from any manifest type
|
|
43
|
+
*/
|
|
44
|
+
function getEstimatedCost(manifest: AnyManifest): number | undefined {
|
|
45
|
+
const type = getManifestType(manifest);
|
|
46
|
+
if (type === 'stress') {
|
|
47
|
+
return (manifest as StressManifest).metrics.cost?.estimated_total_usd;
|
|
48
|
+
}
|
|
49
|
+
if (type === 'run') {
|
|
50
|
+
return (manifest as RunManifest).metrics.cost?.total_usd;
|
|
51
|
+
}
|
|
52
|
+
// Redteam doesn't have cost tracking yet
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
|
|
41
56
|
/**
|
|
42
57
|
* Get scenario name from any manifest type
|
|
43
58
|
*/
|
|
@@ -138,13 +153,20 @@ export class LocalStorageAdapter implements BaselineStorageAdapter {
|
|
|
138
153
|
continue;
|
|
139
154
|
}
|
|
140
155
|
|
|
141
|
-
|
|
156
|
+
const item: RunListItem = {
|
|
142
157
|
runId: manifest.run_id,
|
|
143
158
|
scenario: getScenario(manifest),
|
|
144
159
|
successRate: getSuccessRate(manifest),
|
|
145
160
|
createdAt: manifest.start_time,
|
|
146
161
|
type: manifestType,
|
|
147
|
-
}
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
// Include cost if requested
|
|
165
|
+
if (options?.includeCost) {
|
|
166
|
+
item.estimatedCostUsd = getEstimatedCost(manifest);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
results.push(item);
|
|
148
170
|
} catch {}
|
|
149
171
|
}
|
|
150
172
|
}
|
package/src/storage/types.ts
CHANGED
|
@@ -14,6 +14,8 @@ export interface RunListItem {
|
|
|
14
14
|
createdAt: string;
|
|
15
15
|
/** Type of manifest (run, redteam, stress) */
|
|
16
16
|
type?: 'run' | 'redteam' | 'stress';
|
|
17
|
+
/** Estimated cost in USD (optional, included when --show-cost is used) */
|
|
18
|
+
estimatedCostUsd?: number;
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -39,6 +41,8 @@ export interface ListOptions {
|
|
|
39
41
|
offset?: number;
|
|
40
42
|
/** Filter by manifest type */
|
|
41
43
|
type?: 'run' | 'redteam' | 'stress';
|
|
44
|
+
/** Include cost information in results */
|
|
45
|
+
includeCost?: boolean;
|
|
42
46
|
}
|
|
43
47
|
|
|
44
48
|
/**
|