@artemiskit/core 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -0
- package/dist/artifacts/manifest.d.ts.map +1 -1
- package/dist/artifacts/types.d.ts +20 -0
- package/dist/artifacts/types.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +688 -408
- package/dist/storage/local.d.ts.map +1 -1
- package/dist/storage/types.d.ts +4 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/validator/index.d.ts +6 -0
- package/dist/validator/index.d.ts.map +1 -0
- package/dist/validator/types.d.ts +58 -0
- package/dist/validator/types.d.ts.map +1 -0
- package/dist/validator/validator.d.ts +55 -0
- package/dist/validator/validator.d.ts.map +1 -0
- package/package.json +1 -1
- package/src/artifacts/manifest.ts +24 -2
- package/src/artifacts/types.ts +21 -0
- package/src/evaluators/similarity.test.ts +4 -3
- package/src/index.ts +3 -0
- package/src/storage/local.ts +24 -2
- package/src/storage/types.ts +4 -0
- package/src/validator/index.ts +6 -0
- package/src/validator/types.ts +62 -0
- package/src/validator/validator.ts +345 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local.d.ts","sourceRoot":"","sources":["../../src/storage/local.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpG,OAAO,KAAK,EACV,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACZ,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"local.d.ts","sourceRoot":"","sources":["../../src/storage/local.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpG,OAAO,KAAK,EACV,gBAAgB,EAChB,sBAAsB,EACtB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACZ,MAAM,SAAS,CAAC;AAyDjB,qBAAa,mBAAoB,YAAW,sBAAsB;IAChE,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,aAAa,CAAS;gBAElB,QAAQ,SAAmB;IAKjC,IAAI,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IAU5C,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAczC,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAQ5C,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAQpD,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC;IAQlD,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IA0DnD,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAYpC,OAAO,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAiBjE,eAAe;YASf,SAAS;IAWvB;;OAEG;YACW,iBAAiB;IAS/B;;OAEG;YACW,iBAAiB;IAM/B;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA4B3F;;OAEG;IACG,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAKrE;;OAEG;IACG,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAMzE;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAOlD;;OAEG;IACG,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAUxD;;OAEG;IACG,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAW5D;;OAEG;IACG,iBAAiB,CACrB,KAAK,EAAE,MAAM,EACb,mBAAmB,SAAO,GACzB,OAAO,CAAC;QACT,QAAQ,EAAE,gBAAgB,CAAC;QAC3B,UAAU,EAAE,gBAAgB,CAAC;QAC7B,aAAa,EAAE,OAAO,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;KAC7B,GAAG,IAAI,CAAC;CAwBV"}
|
package/dist/storage/types.d.ts
CHANGED
|
@@ -12,6 +12,8 @@ export interface RunListItem {
|
|
|
12
12
|
createdAt: string;
|
|
13
13
|
/** Type of manifest (run, redteam, stress) */
|
|
14
14
|
type?: 'run' | 'redteam' | 'stress';
|
|
15
|
+
/** Estimated cost in USD (optional, included when --show-cost is used) */
|
|
16
|
+
estimatedCostUsd?: number;
|
|
15
17
|
}
|
|
16
18
|
/**
|
|
17
19
|
* Comparison result between two runs
|
|
@@ -35,6 +37,8 @@ export interface ListOptions {
|
|
|
35
37
|
offset?: number;
|
|
36
38
|
/** Filter by manifest type */
|
|
37
39
|
type?: 'run' | 'redteam' | 'stress';
|
|
40
|
+
/** Include cost information in results */
|
|
41
|
+
includeCost?: boolean;
|
|
38
42
|
}
|
|
39
43
|
/**
|
|
40
44
|
* Storage adapter interface - implement to create custom storage backends
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/storage/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpG;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/storage/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEpG;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,8CAA8C;IAC9C,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;IACpC,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,WAAW,CAAC;IACtB,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE;QACL,WAAW,EAAE,MAAM,CAAC;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,8BAA8B;IAC9B,IAAI,CAAC,EAAE,KAAK,GAAG,SAAS,GAAG,QAAQ,CAAC;IACpC,0CAA0C;IAC1C,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B;;OAEG;IACH,IAAI,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE7C;;OAEG;IACH,IAAI,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE1C;;OAEG;IACH,OAAO,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9C;;OAEG;IACH,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAEtD;;OAEG;IACH,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;IAEpD;;OAEG;IACH,IAAI,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IAEpD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErC;;OAEG;IACH,OAAO,CAAC,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;CAC5E;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC;IAC3B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,eAAe,EAAE,MAAM,CAAC;QACxB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,kCAAkC;IAClC,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,sBAAuB,SAAQ,cAAc;IAC5D;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAEtF;;OAEG;IACH,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC;IAEhE;;OAEG;IACH,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,CAAC;IAEpE;;OAEG;IACH,aAAa,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC;IAE7C;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEnD;;OAEG;IACH,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvD;;;;OAIG;IACH,iBAAiB,CAAC,CAChB,KAAK,EAAE,MAAM,EACb,mBAAmB,CAAC,EAAE,MAAM,GAC3B,OAAO,CAAC;QACT,QAAQ,EAAE,gBAAgB,CAAC;QAC3B,UAAU,EAAE,gBAAgB,CAAC;QAC7B,aAAa,EAAE,OAAO,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;KAC7B,GAAG,IAAI,CAAC,CAAC;CACX"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/validator/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validator types
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Validation error severity
|
|
6
|
+
*/
|
|
7
|
+
export type ValidationSeverity = 'error' | 'warning';
|
|
8
|
+
/**
|
|
9
|
+
* Validation error/warning
|
|
10
|
+
*/
|
|
11
|
+
export interface ValidationIssue {
|
|
12
|
+
/** Line number in the file (1-indexed) */
|
|
13
|
+
line: number;
|
|
14
|
+
/** Column number (optional) */
|
|
15
|
+
column?: number;
|
|
16
|
+
/** Error/warning message */
|
|
17
|
+
message: string;
|
|
18
|
+
/** Rule that triggered this issue */
|
|
19
|
+
rule: string;
|
|
20
|
+
/** Severity level */
|
|
21
|
+
severity: ValidationSeverity;
|
|
22
|
+
/** Suggested fix (optional) */
|
|
23
|
+
suggestion?: string;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Result for a single file validation
|
|
27
|
+
*/
|
|
28
|
+
export interface ValidationResult {
|
|
29
|
+
/** File path that was validated */
|
|
30
|
+
file: string;
|
|
31
|
+
/** Whether the file is valid (no errors) */
|
|
32
|
+
valid: boolean;
|
|
33
|
+
/** List of errors found */
|
|
34
|
+
errors: ValidationIssue[];
|
|
35
|
+
/** List of warnings found */
|
|
36
|
+
warnings: ValidationIssue[];
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Summary of validation across multiple files
|
|
40
|
+
*/
|
|
41
|
+
export interface ValidationSummary {
|
|
42
|
+
/** Total files validated */
|
|
43
|
+
total: number;
|
|
44
|
+
/** Files that passed validation */
|
|
45
|
+
passed: number;
|
|
46
|
+
/** Files that failed validation */
|
|
47
|
+
failed: number;
|
|
48
|
+
/** Files with warnings only */
|
|
49
|
+
withWarnings: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Options for the validator
|
|
53
|
+
*/
|
|
54
|
+
export interface ValidatorOptions {
|
|
55
|
+
/** Treat warnings as errors */
|
|
56
|
+
strict?: boolean;
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/validator/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,OAAO,GAAG,SAAS,CAAC;AAErD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,0CAA0C;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,QAAQ,EAAE,kBAAkB,CAAC;IAC7B,+BAA+B;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,mCAAmC;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,4CAA4C;IAC5C,KAAK,EAAE,OAAO,CAAC;IACf,2BAA2B;IAC3B,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,6BAA6B;IAC7B,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,mCAAmC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,+BAA+B;IAC/B,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scenario Validator
|
|
3
|
+
*
|
|
4
|
+
* Validates scenario files for:
|
|
5
|
+
* 1. YAML syntax errors
|
|
6
|
+
* 2. Schema violations (required fields, types)
|
|
7
|
+
* 3. Semantic errors (duplicate IDs, undefined variables)
|
|
8
|
+
* 4. Warnings (deprecated patterns)
|
|
9
|
+
*/
|
|
10
|
+
import type { ValidationResult, ValidatorOptions } from './types';
|
|
11
|
+
/**
|
|
12
|
+
* Scenario validator class
|
|
13
|
+
*/
|
|
14
|
+
export declare class ScenarioValidator {
|
|
15
|
+
private _options;
|
|
16
|
+
constructor(options?: ValidatorOptions);
|
|
17
|
+
get options(): ValidatorOptions;
|
|
18
|
+
/**
|
|
19
|
+
* Validate a scenario file
|
|
20
|
+
*/
|
|
21
|
+
validate(filePath: string): ValidationResult;
|
|
22
|
+
/**
|
|
23
|
+
* Format Zod errors into ValidationIssues
|
|
24
|
+
*/
|
|
25
|
+
private formatZodErrors;
|
|
26
|
+
/**
|
|
27
|
+
* Find approximate line number for a YAML path
|
|
28
|
+
*/
|
|
29
|
+
private findLineForPath;
|
|
30
|
+
/**
|
|
31
|
+
* Validate semantic rules
|
|
32
|
+
*/
|
|
33
|
+
private validateSemantics;
|
|
34
|
+
/**
|
|
35
|
+
* Find line number for a case ID
|
|
36
|
+
*/
|
|
37
|
+
private findLineForCaseId;
|
|
38
|
+
/**
|
|
39
|
+
* Extract variable references from a string ({{varName}} format)
|
|
40
|
+
*/
|
|
41
|
+
private extractVariableRefs;
|
|
42
|
+
/**
|
|
43
|
+
* Detect warnings (non-blocking issues)
|
|
44
|
+
*/
|
|
45
|
+
private detectWarnings;
|
|
46
|
+
/**
|
|
47
|
+
* Check if object has a key at any depth
|
|
48
|
+
*/
|
|
49
|
+
private hasDeepKey;
|
|
50
|
+
/**
|
|
51
|
+
* Find line number for a key
|
|
52
|
+
*/
|
|
53
|
+
private findLineForKey;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=validator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,OAAO,KAAK,EAAmB,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAC;AAEnF;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,OAAO,GAAE,gBAAqB;IAI1C,IAAI,OAAO,IAAI,gBAAgB,CAE9B;IAED;;OAEG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB;IAmF5C;;OAEG;IACH,OAAO,CAAC,eAAe;IAyCvB;;OAEG;IACH,OAAO,CAAC,eAAe;IAuBvB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAazB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAU3B;;OAEG;IACH,OAAO,CAAC,cAAc;IA6CtB;;OAEG;IACH,OAAO,CAAC,UAAU;IAYlB;;OAEG;IACH,OAAO,CAAC,cAAc;CAQvB"}
|
package/package.json
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { nanoid } from 'nanoid';
|
|
6
|
+
import { estimateCost, getModelPricing } from '../cost/pricing';
|
|
6
7
|
import { getEnvironmentInfo } from '../provenance/environment';
|
|
7
8
|
import { getGitInfo } from '../provenance/git';
|
|
8
9
|
import type {
|
|
9
10
|
CaseResult,
|
|
11
|
+
CostEstimateInfo,
|
|
10
12
|
ManifestRedactionInfo,
|
|
11
13
|
ResolvedConfig,
|
|
12
14
|
RunConfig,
|
|
@@ -40,7 +42,9 @@ export function createRunManifest(options: {
|
|
|
40
42
|
redaction,
|
|
41
43
|
} = options;
|
|
42
44
|
|
|
43
|
-
|
|
45
|
+
// Get model for cost calculation - prefer resolvedConfig, then config
|
|
46
|
+
const modelForCost = resolvedConfig?.model || config.model;
|
|
47
|
+
const metrics = calculateMetrics(cases, modelForCost);
|
|
44
48
|
const git = getGitInfo();
|
|
45
49
|
const environment = getEnvironmentInfo();
|
|
46
50
|
|
|
@@ -69,7 +73,7 @@ export function createRunManifest(options: {
|
|
|
69
73
|
/**
|
|
70
74
|
* Calculate metrics from case results
|
|
71
75
|
*/
|
|
72
|
-
function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
76
|
+
function calculateMetrics(cases: CaseResult[], model?: string): RunMetrics {
|
|
73
77
|
const passedCases = cases.filter((c) => c.ok);
|
|
74
78
|
const latencies = cases.map((c) => c.latencyMs).sort((a, b) => a - b);
|
|
75
79
|
|
|
@@ -81,6 +85,23 @@ function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
|
81
85
|
const totalPromptTokens = cases.reduce((sum, c) => sum + c.tokens.prompt, 0);
|
|
82
86
|
const totalCompletionTokens = cases.reduce((sum, c) => sum + c.tokens.completion, 0);
|
|
83
87
|
|
|
88
|
+
// Calculate cost if model is provided
|
|
89
|
+
let cost: CostEstimateInfo | undefined;
|
|
90
|
+
if (model && (totalPromptTokens > 0 || totalCompletionTokens > 0)) {
|
|
91
|
+
const costEstimate = estimateCost(totalPromptTokens, totalCompletionTokens, model);
|
|
92
|
+
const pricing = getModelPricing(model);
|
|
93
|
+
cost = {
|
|
94
|
+
total_usd: costEstimate.totalUsd,
|
|
95
|
+
prompt_cost_usd: costEstimate.promptCostUsd,
|
|
96
|
+
completion_cost_usd: costEstimate.completionCostUsd,
|
|
97
|
+
model: costEstimate.model,
|
|
98
|
+
pricing: {
|
|
99
|
+
prompt_per_1k: pricing.promptPer1K,
|
|
100
|
+
completion_per_1k: pricing.completionPer1K,
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
84
105
|
return {
|
|
85
106
|
success_rate: cases.length > 0 ? passedCases.length / cases.length : 0,
|
|
86
107
|
total_cases: cases.length,
|
|
@@ -91,6 +112,7 @@ function calculateMetrics(cases: CaseResult[]): RunMetrics {
|
|
|
91
112
|
total_tokens: totalPromptTokens + totalCompletionTokens,
|
|
92
113
|
total_prompt_tokens: totalPromptTokens,
|
|
93
114
|
total_completion_tokens: totalCompletionTokens,
|
|
115
|
+
cost,
|
|
94
116
|
};
|
|
95
117
|
}
|
|
96
118
|
|
package/src/artifacts/types.ts
CHANGED
|
@@ -67,6 +67,25 @@ export interface CaseResult {
|
|
|
67
67
|
redaction?: CaseRedactionInfo;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Cost estimation details
|
|
72
|
+
*/
|
|
73
|
+
export interface CostEstimateInfo {
|
|
74
|
+
/** Estimated total cost in USD */
|
|
75
|
+
total_usd: number;
|
|
76
|
+
/** Cost for prompt/input tokens */
|
|
77
|
+
prompt_cost_usd: number;
|
|
78
|
+
/** Cost for completion/output tokens */
|
|
79
|
+
completion_cost_usd: number;
|
|
80
|
+
/** Model used for cost calculation */
|
|
81
|
+
model: string;
|
|
82
|
+
/** Pricing used (per 1K tokens) */
|
|
83
|
+
pricing: {
|
|
84
|
+
prompt_per_1k: number;
|
|
85
|
+
completion_per_1k: number;
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
70
89
|
/**
|
|
71
90
|
* Run metrics
|
|
72
91
|
*/
|
|
@@ -80,6 +99,8 @@ export interface RunMetrics {
|
|
|
80
99
|
total_tokens: number;
|
|
81
100
|
total_prompt_tokens: number;
|
|
82
101
|
total_completion_tokens: number;
|
|
102
|
+
/** Estimated cost information */
|
|
103
|
+
cost?: CostEstimateInfo;
|
|
83
104
|
}
|
|
84
105
|
|
|
85
106
|
/**
|
|
@@ -15,7 +15,8 @@ describe('SimilarityEvaluator', () => {
|
|
|
15
15
|
|
|
16
16
|
test('throws on invalid expected type', async () => {
|
|
17
17
|
await expect(
|
|
18
|
-
|
|
18
|
+
// @ts-expect-error Testing invalid type handling
|
|
19
|
+
evaluator.evaluate('response', { type: 'exact', value: 'test' })
|
|
19
20
|
).rejects.toThrow('Invalid expected type');
|
|
20
21
|
});
|
|
21
22
|
|
|
@@ -288,8 +289,8 @@ describe('SimilarityEvaluator', () => {
|
|
|
288
289
|
{
|
|
289
290
|
type: 'similarity',
|
|
290
291
|
value: 'Text B',
|
|
291
|
-
|
|
292
|
-
}
|
|
292
|
+
threshold: undefined, // Testing default threshold (0.75)
|
|
293
|
+
},
|
|
293
294
|
mockContext
|
|
294
295
|
);
|
|
295
296
|
|
package/src/index.ts
CHANGED
package/src/storage/local.ts
CHANGED
|
@@ -38,6 +38,21 @@ function getSuccessRate(manifest: AnyManifest): number {
|
|
|
38
38
|
return (manifest as RunManifest).metrics.success_rate;
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Get estimated cost from any manifest type
|
|
43
|
+
*/
|
|
44
|
+
function getEstimatedCost(manifest: AnyManifest): number | undefined {
|
|
45
|
+
const type = getManifestType(manifest);
|
|
46
|
+
if (type === 'stress') {
|
|
47
|
+
return (manifest as StressManifest).metrics.cost?.estimated_total_usd;
|
|
48
|
+
}
|
|
49
|
+
if (type === 'run') {
|
|
50
|
+
return (manifest as RunManifest).metrics.cost?.total_usd;
|
|
51
|
+
}
|
|
52
|
+
// Redteam doesn't have cost tracking yet
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
|
|
41
56
|
/**
|
|
42
57
|
* Get scenario name from any manifest type
|
|
43
58
|
*/
|
|
@@ -138,13 +153,20 @@ export class LocalStorageAdapter implements BaselineStorageAdapter {
|
|
|
138
153
|
continue;
|
|
139
154
|
}
|
|
140
155
|
|
|
141
|
-
|
|
156
|
+
const item: RunListItem = {
|
|
142
157
|
runId: manifest.run_id,
|
|
143
158
|
scenario: getScenario(manifest),
|
|
144
159
|
successRate: getSuccessRate(manifest),
|
|
145
160
|
createdAt: manifest.start_time,
|
|
146
161
|
type: manifestType,
|
|
147
|
-
}
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
// Include cost if requested
|
|
165
|
+
if (options?.includeCost) {
|
|
166
|
+
item.estimatedCostUsd = getEstimatedCost(manifest);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
results.push(item);
|
|
148
170
|
} catch {}
|
|
149
171
|
}
|
|
150
172
|
}
|
package/src/storage/types.ts
CHANGED
|
@@ -14,6 +14,8 @@ export interface RunListItem {
|
|
|
14
14
|
createdAt: string;
|
|
15
15
|
/** Type of manifest (run, redteam, stress) */
|
|
16
16
|
type?: 'run' | 'redteam' | 'stress';
|
|
17
|
+
/** Estimated cost in USD (optional, included when --show-cost is used) */
|
|
18
|
+
estimatedCostUsd?: number;
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -39,6 +41,8 @@ export interface ListOptions {
|
|
|
39
41
|
offset?: number;
|
|
40
42
|
/** Filter by manifest type */
|
|
41
43
|
type?: 'run' | 'redteam' | 'stress';
|
|
44
|
+
/** Include cost information in results */
|
|
45
|
+
includeCost?: boolean;
|
|
42
46
|
}
|
|
43
47
|
|
|
44
48
|
/**
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validator types
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Validation error severity
|
|
7
|
+
*/
|
|
8
|
+
export type ValidationSeverity = 'error' | 'warning';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Validation error/warning
|
|
12
|
+
*/
|
|
13
|
+
export interface ValidationIssue {
|
|
14
|
+
/** Line number in the file (1-indexed) */
|
|
15
|
+
line: number;
|
|
16
|
+
/** Column number (optional) */
|
|
17
|
+
column?: number;
|
|
18
|
+
/** Error/warning message */
|
|
19
|
+
message: string;
|
|
20
|
+
/** Rule that triggered this issue */
|
|
21
|
+
rule: string;
|
|
22
|
+
/** Severity level */
|
|
23
|
+
severity: ValidationSeverity;
|
|
24
|
+
/** Suggested fix (optional) */
|
|
25
|
+
suggestion?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Result for a single file validation
|
|
30
|
+
*/
|
|
31
|
+
export interface ValidationResult {
|
|
32
|
+
/** File path that was validated */
|
|
33
|
+
file: string;
|
|
34
|
+
/** Whether the file is valid (no errors) */
|
|
35
|
+
valid: boolean;
|
|
36
|
+
/** List of errors found */
|
|
37
|
+
errors: ValidationIssue[];
|
|
38
|
+
/** List of warnings found */
|
|
39
|
+
warnings: ValidationIssue[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Summary of validation across multiple files
|
|
44
|
+
*/
|
|
45
|
+
export interface ValidationSummary {
|
|
46
|
+
/** Total files validated */
|
|
47
|
+
total: number;
|
|
48
|
+
/** Files that passed validation */
|
|
49
|
+
passed: number;
|
|
50
|
+
/** Files that failed validation */
|
|
51
|
+
failed: number;
|
|
52
|
+
/** Files with warnings only */
|
|
53
|
+
withWarnings: number;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Options for the validator
|
|
58
|
+
*/
|
|
59
|
+
export interface ValidatorOptions {
|
|
60
|
+
/** Treat warnings as errors */
|
|
61
|
+
strict?: boolean;
|
|
62
|
+
}
|